diff --git a/loopy/check.py b/loopy/check.py index c4d2d7f71d0f728d9674b066dedad37ba0ed9338..4ed4abd428c2d7757f66f31e41d1208ff0e86141 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -154,22 +154,13 @@ def _is_racing_iname_tag(tv, tag): def check_for_write_races(kernel): - from loopy.symbolic import DependencyMapper from loopy.kernel.data import ParallelTag - depmap = DependencyMapper(composite_leaves=False) iname_to_tag = kernel.iname_to_tag.get for insn in kernel.instructions: - for assignee_name, assignee_indices in insn.assignees_and_indices(): - assignee_indices = depmap(assignee_indices) - - def strip_var(expr): - from pymbolic.primitives import Variable - assert isinstance(expr, Variable) - return expr.name - - assignee_indices = set(strip_var(index) for index in assignee_indices) - + for assignee_name, assignee_indices in zip( + insn.assignee_var_names(), + insn.assignee_subscript_deps()): assignee_inames = assignee_indices & kernel.all_inames() if not assignee_inames <= kernel.insn_inames(insn): raise LoopyError( @@ -332,7 +323,7 @@ def check_bounds(kernel): def check_write_destinations(kernel): for insn in kernel.instructions: - for wvar, _ in insn.assignees_and_indices(): + for wvar in insn.assignee_var_names(): if wvar in kernel.all_inames(): raise LoopyError("iname '%s' may not be written" % wvar) diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index db3d15184a94aa13e6a4b449a2036869e34ca566..8531b97a947f3c46b984cf7405b794bd68e2f638 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -122,7 +122,25 @@ def generate_assignment_instruction_code(codegen_state, insn): # }}} - (assignee_var_name, assignee_indices), = insn.assignees_and_indices() + from pymbolic.primitives import Variable, Subscript + from loopy.symbolic import LinearSubscript + + lhs = insn.assignee + if isinstance(lhs, Variable): + assignee_var_name = lhs.name + assignee_indices = () + + elif isinstance(lhs, Subscript): + assignee_var_name = lhs.aggregate.name + assignee_indices = lhs.index_tuple + + elif isinstance(lhs, LinearSubscript): + assignee_var_name = lhs.aggregate.name + assignee_indices = (lhs.index,) + + else: + raise RuntimeError("invalid lvalue '%s'" % lhs) + lhs_var = kernel.get_var_descriptor(assignee_var_name) lhs_dtype = lhs_var.dtype diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index af4694b20ab7884048fb634fcea33449520c8eb8..f9049ee43b2c73d731dc88f3b9e18bce27273b80 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -790,7 +790,7 @@ class LoopKernel(RecordWithoutPickling): result = {} for insn in self.instructions: - for var_name, _ in insn.assignees_and_indices(): + for var_name in insn.assignee_var_names(): result.setdefault(var_name, set()).add(insn.id) return result @@ -807,7 +807,7 @@ class LoopKernel(RecordWithoutPickling): return frozenset( var_name for insn in self.instructions - for var_name, _ in insn.assignees_and_indices()) + for var_name in insn.assignee_var_names()) @memoize_method def get_temporary_to_base_storage_map(self): diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index bcc387494e84c10dfd4087c74835ac64c3bd2705..d829b6d3b759fd3218153a99223800ec4b99d9fb 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -244,9 +244,10 @@ def parse_insn(insn): else: temp_var_types.append(None) + from loopy.symbolic import LinearSubscript if isinstance(lhs_i, Variable): assignee_names.append(lhs_i.name) - elif isinstance(lhs_i, Subscript): + elif isinstance(lhs_i, (Subscript, LinearSubscript)): assignee_names.append(lhs_i.aggregate.name) else: raise LoopyError("left hand side of assignment '%s' must " @@ -551,7 +552,7 @@ class ArgumentGuesser: from loopy.symbolic import get_dependencies for insn in instructions: if isinstance(insn, MultiAssignmentBase): - for assignee_var_name, _ in insn.assignees_and_indices(): + for assignee_var_name in insn.assignee_var_names(): self.all_written_names.add(assignee_var_name) self.all_names.update(get_dependencies( self.submap(insn.assignees))) @@ -619,8 +620,8 @@ class ArgumentGuesser: for insn in self.instructions: if isinstance(insn, MultiAssignmentBase): - for (assignee_var_name, _), temp_var_type in zip( - insn.assignees_and_indices(), + for assignee_var_name, temp_var_type in zip( + insn.assignee_var_names(), insn.temp_var_types): if temp_var_type is not None: temp_var_names.add(assignee_var_name) @@ -713,7 +714,7 @@ def check_written_variable_names(knl): | set(six.iterkeys(knl.temporary_variables))) for insn in knl.instructions: - for var_name, _ in insn.assignees_and_indices(): + for var_name in insn.assignee_var_names(): if var_name not in admissible_vars: raise RuntimeError("variable '%s' not declared or not " "allowed for writing" % var_name) @@ -810,8 +811,8 @@ def create_temporaries(knl, default_order): for insn in knl.instructions: if isinstance(insn, MultiAssignmentBase): - for (assignee_name, _), temp_var_type in zip( - insn.assignees_and_indices(), + for assignee_name, temp_var_type in zip( + insn.assignee_var_names(), insn.temp_var_types): if temp_var_type is None: @@ -855,7 +856,6 @@ def determine_shapes_of_temporaries(knl): new_temp_vars = knl.temporary_variables.copy() from loopy.symbolic import AccessRangeMapper - from pymbolic import var import loopy as lp new_temp_vars = {} @@ -863,10 +863,8 @@ def determine_shapes_of_temporaries(knl): if tv.shape is lp.auto or tv.base_indices is lp.auto: armap = AccessRangeMapper(knl, tv.name) for insn in knl.instructions: - for assignee_name, assignee_index in insn.assignees_and_indices(): - if assignee_index: - armap(var(assignee_name).index(assignee_index), - knl.insn_inames(insn)) + for assignee in insn.assignees: + armap(assignee) if armap.access_range is not None: base_indices, shape = list(zip(*[ @@ -949,7 +947,6 @@ def guess_arg_shape_if_requested(kernel, default_order): from traceback import print_exc print_exc() - from loopy.diagnostic import LoopyError raise LoopyError( "Failed to (automatically, as requested) find " "shape/strides for argument '%s'. " @@ -959,6 +956,14 @@ def guess_arg_shape_if_requested(kernel, default_order): if armap.access_range is None: if armap.bad_subscripts: + from loopy.symbolic import LinearSubscript + if any(isinstance(sub, LinearSubscript) + for sub in armap.bad_subscripts): + raise LoopyError("cannot determine access range for '%s': " + "linear subscript(s) in '%s'" + % (arg.name, ", ".join( + str(i) for i in armap.bad_subscripts))) + n_axes_in_subscripts = set( len(sub.index_tuple) for sub in armap.bad_subscripts) @@ -972,7 +977,7 @@ def guess_arg_shape_if_requested(kernel, default_order): # Leave shape undetermined--we can live with that for 1D. shape = (None,) else: - raise RuntimeError("cannot determine access range for '%s': " + raise LoopyError("cannot determine access range for '%s': " "undetermined index in subscript(s) '%s'" % (arg.name, ", ".join( str(i) for i in armap.bad_subscripts))) diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 0d4564aacad9e7abc5dd89bca46e7c628d4fce4d..e692bb36584dfd393e7b154bf68ddcfcf621f6d3 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -548,12 +548,11 @@ class InstructionBase(Record): of instructions. .. automethod:: __init__ - .. automethod:: assignees_and_indices - .. automethod:: assignee_name + .. automethod:: assignee_var_names + .. automethod:: assignee_subscript_deps .. automethod:: with_transformed_expressions .. automethod:: write_dependency_names .. automethod:: dependency_names - .. automethod:: assignee_var_names .. automethod:: copy """ @@ -661,10 +660,15 @@ class InstructionBase(Record): def reduction_inames(self): raise NotImplementedError - def assignees_and_indices(self): - """Return a list of tuples *(assignee_var_name, subscript)* - where assignee_var_name is a string representing an assigned - variable name and subscript is a :class:`tuple`. + def assignee_var_names(self): + """Return a tuple of tuples of assignee variable names, one + for each quantity being assigned to. + """ + raise NotImplementedError + + def assignee_subscript_deps(self): + """Return a list of sets of variable names referred to in the subscripts + of the quantities being assigned to, one for each assignee. """ raise NotImplementedError @@ -679,20 +683,20 @@ class InstructionBase(Record): @property def assignee_name(self): - """A convenience wrapper around :meth:`assignees_and_indices` + """A convenience wrapper around :meth:`assignee_names` that returns the the name of the variable being assigned. If more than one variable is being modified in the instruction, :raise:`ValueError` is raised. """ - aai = self.assignees_and_indices() + names = self.assignee_names() - if len(aai) != 1: + if len(names) != 1: raise ValueError("expected exactly one assignment in instruction " "on which assignee_name is being called, found %d" - % len(aai)) + % len(names)) - (name, _), = aai + name, = names return name @memoize_method @@ -703,19 +707,15 @@ class InstructionBase(Record): """ result = set() - for assignee, indices in self.assignees_and_indices(): - result.add(assignee) + for assignee in self.assignees: from loopy.symbolic import get_dependencies - result.update(get_dependencies(indices)) + result.update(get_dependencies(assignee)) return frozenset(result) def dependency_names(self): return self.read_dependency_names() | self.write_dependency_names() - def assignee_var_names(self): - return (var_name for var_name, _ in self.assignees_and_indices()) - def get_str_options(self): result = [] @@ -813,15 +813,38 @@ class InstructionBase(Record): # }}} -def _get_assignee_and_index(expr): +def _get_assignee_var_name(expr): from pymbolic.primitives import Variable, Subscript + from loopy.symbolic import LinearSubscript + if isinstance(expr, Variable): - return (expr.name, ()) + return expr.name + elif isinstance(expr, Subscript): agg = expr.aggregate assert isinstance(agg, Variable) - return (agg.name, expr.index_tuple) + return agg.name + + elif isinstance(expr, LinearSubscript): + agg = expr.aggregate + assert isinstance(agg, Variable) + + return agg.name + else: + raise RuntimeError("invalid lvalue '%s'" % expr) + + +def _get_assignee_subscript_deps(expr): + from pymbolic.primitives import Variable, Subscript + from loopy.symbolic import LinearSubscript, get_dependencies + + if isinstance(expr, Variable): + return frozenset() + elif isinstance(expr, Subscript): + return get_dependencies(expr.index) + elif isinstance(expr, LinearSubscript): + return get_dependencies(expr.index) else: raise RuntimeError("invalid lvalue '%s'" % expr) @@ -983,8 +1006,8 @@ class MultiAssignmentBase(InstructionBase): def read_dependency_names(self): from loopy.symbolic import get_dependencies result = get_dependencies(self.expression) - for _, subscript in self.assignees_and_indices(): - result = result | get_dependencies(subscript) + for subscript_deps in self.assignee_subscript_deps(): + result = result | subscript_deps processed_predicates = frozenset( pred.lstrip("!") for pred in self.predicates) @@ -1123,8 +1146,11 @@ class Assignment(MultiAssignmentBase): # {{{ implement InstructionBase interface @memoize_method - def assignees_and_indices(self): - return [_get_assignee_and_index(self.assignee)] + def assignee_var_names(self): + return (_get_assignee_var_name(self.assignee),) + + def assignee_subscript_deps(self): + return (_get_assignee_subscript_deps(self.assignee),) def with_transformed_expressions(self, f, *args): return self.copy( @@ -1269,8 +1295,13 @@ class CallInstruction(MultiAssignmentBase): # {{{ implement InstructionBase interface @memoize_method - def assignees_and_indices(self): - return [_get_assignee_and_index(a) for a in self.assignees] + def assignee_var_names(self): + return tuple(_get_assignee_var_name(a) for a in self.assignees) + + def assignee_subscript_deps(self): + return tuple( + _get_assignee_subscript_deps(a) + for a in self.assignees) def with_transformed_expressions(self, f, *args): return self.copy( @@ -1464,17 +1495,21 @@ class CInstruction(InstructionBase): for name, iname_expr in self.iname_exprs: result.update(get_dependencies(iname_expr)) - for _, subscript in self.assignees_and_indices(): - result.update(get_dependencies(subscript)) + for subscript_deps in self.assignee_subscript_deps(): + result.update(subscript_deps) return frozenset(result) | self.predicates def reduction_inames(self): return set() - def assignees_and_indices(self): - return [_get_assignee_and_index(expr) - for expr in self.assignees] + def assignee_var_names(self): + return tuple(_get_assignee_var_name(expr) for expr in self.assignees) + + def assignee_subscript_deps(self): + return tuple( + _get_assignee_subscript_deps(a) + for a in self.assignees) def with_transformed_expressions(self, f, *args): return self.copy( diff --git a/loopy/maxima.py b/loopy/maxima.py index 29f974ff9d1f6f0a6e6d4311eca88201559e6854..738df86c4b94988b57d6dc01bcc22bb0ca62ac21 100644 --- a/loopy/maxima.py +++ b/loopy/maxima.py @@ -60,7 +60,7 @@ def get_loopy_instructions_as_maxima(kernel, prefix): my_variable_names = ( avn for insn in kernel.instructions - for avn, _ in insn.assignees_and_indices() + for avn in insn.assignee_var_names() ) from pymbolic import var @@ -89,7 +89,7 @@ def get_loopy_instructions_as_maxima(kernel, prefix): if dep not in written_insn_ids: write_insn(dep) - (aname, _), = insn.assignees_and_indices() + aname, = insn.assignee_var_names() result.append("%s%s : %s;" % ( prefix, aname, mstr(substitute(insn.expression)))) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 7d2404f5596dee5c5744880c6efab6129f867a6a..5690218e60ce0eb73a214917e5bbdc54ae3b10b9 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -140,8 +140,8 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander): result_dtypes = type_inf_mapper(expr, multiple_types_ok=True) result = None - for (assignee, _), comp_dtype in zip( - writer_insn.assignees_and_indices(), result_dtypes): + for assignee, comp_dtype in zip( + writer_insn.assignee_var_names(), result_dtypes): if assignee == var_name: result = comp_dtype break @@ -305,12 +305,11 @@ def _get_compute_inames_tagged(kernel, insn, tag_base): def _get_assignee_inames_tagged(kernel, insn, tag_base, tv_name): - from loopy.symbolic import get_dependencies - return set(iname - for aname, aindices in insn.assignees_and_indices() - for iname in get_dependencies(aindices) - & kernel.all_inames() + for aname, adeps in zip( + insn.assignee_var_names(), + insn.assignee_subscript_deps()) + for iname in adeps & kernel.all_inames() if aname == tv_name if isinstance(kernel.iname_to_tag.get(iname), tag_base)) @@ -963,7 +962,7 @@ def find_idempotence(kernel): if not boostable: non_idempotently_updated_vars.update( - var_name for var_name, _ in insn.assignees_and_indices()) + insn.assignee_var_names()) insn = insn.copy(boostable=boostable) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index cade5e5e060b26a8c6d44965e61500baa31e5c08..a83db407fcc616918339610d6a1d368ce1c74dea 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1286,6 +1286,9 @@ class AccessRangeMapper(WalkMapper): self.access_range = self.access_range | access_range + def map_linear_subscript(self, expr, inames): + self.bad_subscripts.append(expr) + def map_reduction(self, expr, inames): return WalkMapper.map_reduction(self, expr, inames | set(expr.inames)) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 6aca830d99c5637fc92e96f361c3c8bef5d65229..0b55054374fa461f85b6ba98ee58e8145af1e3bb 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -399,8 +399,9 @@ class CASTBuilder(ASTBuilderBase): if isinstance(func_id, Variable): func_id = func_id.name - assignee_var_descriptors = [codegen_state.kernel.get_var_descriptor(a) - for a, _ in insn.assignees_and_indices()] + assignee_var_descriptors = [ + codegen_state.kernel.get_var_descriptor(a) + for a in insn.assignee_var_names()] par_dtypes = tuple(ecm.infer_type(par) for par in parameters) diff --git a/loopy/transform/arithmetic.py b/loopy/transform/arithmetic.py index 2c50b3f11a34ced81d6a93eb467482110f495f2f..2939a74cfc921d27396974ce74aaeaab220cad43 100644 --- a/loopy/transform/arithmetic.py +++ b/loopy/transform/arithmetic.py @@ -136,9 +136,7 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()): raise ValueError("unexpected type of access_expr") def is_assignee(insn): - return any( - lhs == var_name - for lhs, sbscript in insn.assignees_and_indices()) + return var_name in insn.assignee_var_names() def iterate_as(cls, expr): if isinstance(expr, cls): @@ -237,7 +235,7 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()): new_insns.append(insn) continue - (_, index_key), = insn.assignees_and_indices() + index_key = insn.assignee.subscript lhs = insn.assignee rhs = insn.expression diff --git a/loopy/transform/buffer.py b/loopy/transform/buffer.py index a7d22b2d0f7e376086b3a42ac2f186e2805fda26..fb32b3ce819686b556e5f3aafdbc8522ef01e937 100644 --- a/loopy/transform/buffer.py +++ b/loopy/transform/buffer.py @@ -239,8 +239,27 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None, if not within(kernel, insn.id, ()): continue - for assignee, index in insn.assignees_and_indices(): - if assignee == var_name: + from pymbolic.primitives import Variable, Subscript + from loopy.symbolic import LinearSubscript + + for assignee in insn.assignees: + if isinstance(assignee, Variable): + assignee_name = assignee.name + index = () + + elif isinstance(assignee, Subscript): + assignee_name = assignee.aggregate.name + index = assignee.index_tuple + + elif isinstance(assignee, LinearSubscript): + if assignee.aggregate.name == var_name: + raise LoopyError("buffer_array may not be applied in the " + "presence of linear write indexing into '%s'" % var_name) + + else: + raise LoopyError("invalid lvalue '%s'" % assignee) + + if assignee_name == var_name: within_inames.update( (get_dependencies(index) & kernel.all_inames()) - buffer_inames_set) @@ -396,8 +415,7 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None, did_write = False for insn_id in aar.modified_insn_ids: insn = kernel.id_to_insn[insn_id] - if any(assignee_name == buf_var_name - for assignee_name, _ in insn.assignees_and_indices()): + if buf_var_name in insn.assignee_var_names(): did_write = True # {{{ add init_insn_id to depends_on diff --git a/loopy/transform/diff.py b/loopy/transform/diff.py index 6e9eb45bb58b7750c075ee7b79fb024e4cf4f24a..0d80ee99ccbe5611fc7169884e240342f3065525 100644 --- a/loopy/transform/diff.py +++ b/loopy/transform/diff.py @@ -295,7 +295,17 @@ class DifferentiationContext(object): if not diff_expr: return None - (_, lhs_ind), = orig_writer_insn.assignees_and_indices() + assert isinstance(orig_writer_insn, lp.Assignment) + from pymbolic import Variable, Subscript + if isinstance(orig_writer_insn.assignee, Subscript): + lhs_ind = orig_writer_insn.assignee.index_tuple + elif isinstance(orig_writer_insn.assignee, Variable): + lhs_ind = () + else: + raise LoopyError( + "Unrecognized LHS type in differentiation: %s" + % type(orig_writer_insn.assignee).__name__) + new_insn_id = self.generate_instruction_id() insn = lp.Assignment( id=new_insn_id, diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py index 24fc0d40a7d30085ea14bf8f5440a46f80100db6..6cbf43da3fb4a3484ba028c96a2c0151e0582658 100644 --- a/loopy/transform/subst.py +++ b/loopy/transform/subst.py @@ -391,11 +391,21 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None, for def_id, subst_name in six.iteritems(tts.definition_insn_id_to_subst_name): def_insn = kernel.id_to_insn[def_id] - (_, indices), = def_insn.assignees_and_indices() + from loopy.kernel.data import Assignment + assert isinstance(def_insn, Assignment) + + from pymbolic import Variable, Subscript + if isinstance(def_insn.assignee, Subscript): + indices = def_insn.assignee.index_tuple + elif isinstance(def_insn.assignee, Variable): + indices = () + else: + raise LoopyError( + "Unrecognized LHS type: %s" + % type(def_insn.assignee).__name__) arguments = [] - from pymbolic.primitives import Variable for i in indices: if not isinstance(i, Variable): raise LoopyError("In defining instruction '%s': " diff --git a/test/test_loopy.py b/test/test_loopy.py index 3cf953ce41caa75b856c0c54fe23ed9e10afcd2a..063c9bee69eefe54b710b8576186b1e83b5810b3 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1124,6 +1124,26 @@ def test_global_temporary(ctx_factory): lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5)) +def test_assign_to_linear_subscript(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + knl1 = lp.make_kernel( + "{ [i]: 0<=i 1: exec(sys.argv[1]) diff --git a/test/test_numa_diff.py b/test/test_numa_diff.py index 3eacbaa2850b12ab1130a0f4b02ac5698bc9fab9..389151f173cc835c0e01f2cbc3c640952cf3c575 100644 --- a/test/test_numa_diff.py +++ b/test/test_numa_diff.py @@ -93,7 +93,7 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # turn the first reads into subst rules local_prep_var_names = set() for insn in lp.find_instructions(hsv, "tag:local_prep"): - (assignee, _), = insn.assignees_and_indices() + assignee, = insn.assignee_var_names() local_prep_var_names.add(assignee) hsv = lp.assignment_to_subst(hsv, assignee) @@ -122,7 +122,7 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): ("rknl", rflux_insn, ("j", "n",), rtmps, ("jj", "ii",)), ("sknl", sflux_insn, ("i", "n",), stmps, ("ii", "jj",)), ]: - (flux_var, _), = insn.assignees_and_indices() + flux_var, = insn.assignee_var_names() print(insn) reader, = lp.find_instructions(hsv,