diff --git a/loopy/auto_test.py b/loopy/auto_test.py index 6837b99a026debf32b12aceef00ed3863c620639..ca70c8489238ee6f1fd95f52b02dbe451ddf13ef 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -534,7 +534,7 @@ def auto_test_vs_ref( from loopy.target.pyopencl import PyOpenCLTarget if test_knl.state not in [ KernelState.PREPROCESSED, - KernelState.SCHEDULED]: + KernelState.LINEARIZED]: if isinstance(test_knl.target, PyOpenCLTarget): test_knl = test_knl.copy(target=PyOpenCLTarget(ctx.devices[0])) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 11f874e1bd90bcfc1fe4595345c1b1efb2e6a35f..b4811dc9966921fa612aabef9a726d6b53fd4052 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -388,7 +388,7 @@ def generate_code_v2(kernel): from loopy.schedule import get_one_scheduled_kernel kernel = get_one_scheduled_kernel(kernel) - if kernel.state != KernelState.SCHEDULED: + if kernel.state != KernelState.LINEARIZED: raise LoopyError("cannot generate code for a kernel that has not been " "scheduled") diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index c3cd1738d7160c7feb6ef5d1042e3d41e19cdfdb..2d926aad4faa511aa2919630c9b0e96b7f253ad9 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -349,7 +349,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): if state not in [ KernelState.INITIAL, KernelState.PREPROCESSED, - KernelState.SCHEDULED, + KernelState.LINEARIZED, ]: raise ValueError("invalid value for 'state'") diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index bcb6faa3fabaebfd89fa46fff44d137915b1e4bb..61127232a9f494fe2fdc536dd50d8fdf41b8f17c 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -345,10 +345,13 @@ class InstructionBase(ImmutableRecord): """ raise NotImplementedError - def with_transformed_expressions(self, f, *args): + def with_transformed_expressions(self, f, assignee_f=None): """Return a new copy of *self* where *f* has been applied to every expression occurring in *self*. *args* will be passed as extra arguments (in addition to the expression) to *f*. + + If *assignee_f* is passed, then left-hand sides of assignments are + passed to it. If it is not given, it defaults to the same as *f*. """ raise NotImplementedError @@ -960,12 +963,15 @@ class Assignment(MultiAssignmentBase): def assignee_subscript_deps(self): return (_get_assignee_subscript_deps(self.assignee),) - def with_transformed_expressions(self, f, *args): + def with_transformed_expressions(self, f, assignee_f=None): + if assignee_f is None: + assignee_f = f + return self.copy( - assignee=f(self.assignee, *args), - expression=f(self.expression, *args), + assignee=assignee_f(self.assignee), + expression=f(self.expression), predicates=frozenset( - f(pred, *args) for pred in self.predicates)) + f(pred) for pred in self.predicates)) # }}} @@ -1115,12 +1121,15 @@ class CallInstruction(MultiAssignmentBase): _get_assignee_subscript_deps(a) for a in self.assignees) - def with_transformed_expressions(self, f, *args): + def with_transformed_expressions(self, f, assignee_f=None): + if assignee_f is None: + assignee_f = f + return self.copy( - assignees=f(self.assignees, *args), - expression=f(self.expression, *args), + assignees=assignee_f(self.assignees), + expression=f(self.expression), predicates=frozenset( - f(pred, *args) for pred in self.predicates)) + f(pred) for pred in self.predicates)) # }}} @@ -1316,14 +1325,17 @@ class CInstruction(InstructionBase): _get_assignee_subscript_deps(a) for a in self.assignees) - def with_transformed_expressions(self, f, *args): + def with_transformed_expressions(self, f, assignee_f=None): + if assignee_f is None: + assignee_f = f + return self.copy( iname_exprs=[ - (name, f(expr, *args)) + (name, f(expr)) for name, expr in self.iname_exprs], - assignees=[f(a, *args) for a in self.assignees], + assignees=[assignee_f(a) for a in self.assignees], predicates=frozenset( - f(pred, *args) for pred in self.predicates)) + f(pred) for pred in self.predicates)) # }}} @@ -1358,7 +1370,7 @@ class _DataObliviousInstruction(InstructionBase): def assignee_subscript_deps(self): return frozenset() - def with_transformed_expressions(self, f, *args): + def with_transformed_expressions(self, f, assignee_f=None): return self.copy( predicates=frozenset( f(pred) for pred in self.predicates)) diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index bb6ae44c9bf8daefef5f6564fccbec58ba72a708..9e54bc25d09d031e5907f68f8b8fb34dfadad94a 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1726,8 +1726,8 @@ def get_subkernels(kernel): See also :class:`loopy.schedule.CallKernel`. """ from loopy.kernel import KernelState - if kernel.state != KernelState.SCHEDULED: - raise LoopyError("Kernel must be scheduled") + if kernel.state != KernelState.LINEARIZED: + raise LoopyError("Kernel must be linearized") from loopy.schedule import CallKernel @@ -1743,7 +1743,7 @@ def get_subkernel_to_insn_id_map(kernel): kernel must be scheduled. """ from loopy.kernel import KernelState - if kernel.state != KernelState.SCHEDULED: + if kernel.state != KernelState.LINEARIZED: raise LoopyError("Kernel must be scheduled") from loopy.schedule import ( diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 0983c5e0d513d51a04a3f6cf3033904435ef1412..032cdc2760597f1fa6f701a8a88252312deac797 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1841,7 +1841,7 @@ def generate_loop_schedules(kernel, debug_args={}): def generate_loop_schedules_inner(kernel, debug_args={}): from loopy.kernel import KernelState - if kernel.state not in (KernelState.PREPROCESSED, KernelState.SCHEDULED): + if kernel.state not in (KernelState.PREPROCESSED, KernelState.LINEARIZED): raise LoopyError("cannot schedule a kernel that has not been " "preprocessed") @@ -1852,7 +1852,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): debug = ScheduleDebugger(**debug_args) - preschedule = kernel.schedule if kernel.state == KernelState.SCHEDULED else () + preschedule = kernel.schedule if kernel.state == KernelState.LINEARIZED else () prescheduled_inames = set( insn.iname @@ -1904,7 +1904,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): unscheduled_insn_ids=set(insn.id for insn in kernel.instructions), scheduled_insn_ids=frozenset(), - within_subkernel=kernel.state != KernelState.SCHEDULED, + within_subkernel=kernel.state != KernelState.LINEARIZED, may_schedule_global_barriers=True, preschedule=preschedule, @@ -1973,11 +1973,11 @@ def generate_loop_schedules_inner(kernel, debug_args={}): new_kernel = kernel.copy( schedule=gen_sched, - state=KernelState.SCHEDULED) + state=KernelState.LINEARIZED) from loopy.schedule.device_mapping import \ map_schedule_onto_host_or_device - if kernel.state != KernelState.SCHEDULED: + if kernel.state != KernelState.LINEARIZED: # Device mapper only gets run once. new_kernel = map_schedule_onto_host_or_device(new_kernel) diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py index 59afb07d2e9b7713dbe86c2c5aef7356decbbcff..d45c1ecbdc7ea091ce7d1a3899e82c14bb6fef2b 100644 --- a/loopy/schedule/device_mapping.py +++ b/loopy/schedule/device_mapping.py @@ -31,7 +31,7 @@ from loopy.schedule.tools import get_block_boundaries def map_schedule_onto_host_or_device(kernel): # FIXME: Should be idempotent. from loopy.kernel import KernelState - assert kernel.state == KernelState.SCHEDULED + assert kernel.state == KernelState.LINEARIZED from functools import partial device_prog_name_gen = partial( diff --git a/loopy/symbolic.py b/loopy/symbolic.py index ccac5e199d2b53e202dd735ffd8dfe20a7dc29a2..d3261b110eef73eb34769e8702af272875613c2c 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -971,7 +971,8 @@ class RuleAwareIdentityMapper(IdentityMapper): # may perform tasks entirely unrelated to subst rules, so # we must map assignees, too. self.map_instruction(kernel, - insn.with_transformed_expressions(self, kernel, insn)) + insn.with_transformed_expressions( + lambda expr: self(expr, kernel, insn))) for insn in kernel.instructions] return kernel.copy(instructions=new_insns) diff --git a/loopy/transform/data.py b/loopy/transform/data.py index a6a2d7b4fe4ba94caa8cbe112a5cf90719ceb643..1f0161c06868da4a7c71ba1ebf9eab8ef02eeb3d 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -285,15 +285,15 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, if temporary_name is None: temporary_name = var_name_gen("%s_fetch" % c_name) - arg = kernel.arg_dict[var_name] + var_descr = kernel.get_var_descriptor(var_name) # {{{ make parameter names and unification template parameters = [] - for i in range(arg.num_user_axes()): + for i in range(var_descr.num_user_axes()): based_on = "%s_dim_%d" % (c_name, i) - if arg.dim_names is not None: - based_on = "%s_dim_%s" % (c_name, arg.dim_names[i]) + if var_descr.dim_names is not None: + based_on = "%s_dim_%s" % (c_name, var_descr.dim_names[i]) if dim_arg_names is not None and i < len(dim_arg_names): based_on = dim_arg_names[i] @@ -322,7 +322,7 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, kernel, subst_use, sweep_inames, inames_to_be_removed = \ _process_footprint_subscripts( kernel, rule_name, sweep_inames, - footprint_subscripts, arg) + footprint_subscripts, var_descr) # Our _not_provided is actually a different object from the one in the # precompute module, but precompute acutally uses that to adjust its @@ -331,7 +331,7 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, from loopy.transform.precompute import precompute new_kernel = precompute(kernel, subst_use, sweep_inames, precompute_inames=dim_arg_names, - default_tag=default_tag, dtype=arg.dtype, + default_tag=default_tag, dtype=var_descr.dtype, fetch_bounding_box=fetch_bounding_box, temporary_name=temporary_name, temporary_address_space=temporary_address_space, diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 9f426f76bc6902fd09bd7685c73f187df935be1e..b308836c7727564dbfa9625ad39f378e8034c68c 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -229,7 +229,8 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): for insn in kernel.instructions: self.replaced_something = False - insn = insn.with_transformed_expressions(self, kernel, insn) + insn = insn.with_transformed_expressions( + lambda expr: self(expr, kernel, insn)) if self.replaced_something: insn = insn.copy( diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py index 3eee3d8f3093ce68670ab2c119f41bc385afde01..fa145fbbf3e51670f7ac42307e4ffde75df07618 100644 --- a/loopy/transform/subst.py +++ b/loopy/transform/subst.py @@ -148,8 +148,30 @@ def extract_subst(kernel, subst_name, template, parameters=()): new_insns = [] + def transform_assignee(expr): + # Assignment LHS's cannot be subst rules. Treat them + # specially. + + import pymbolic.primitives as prim + if isinstance(expr, tuple): + return tuple( + transform_assignee(expr_i) + for expr_i in expr) + + elif isinstance(expr, prim.Subscript): + return type(expr)( + expr.aggregate, + cbmapper(expr.index)) + + elif isinstance(expr, prim.Variable): + return expr + else: + raise ValueError("assignment LHS not understood") + for insn in kernel.instructions: - new_insns.append(insn.with_transformed_expressions(cbmapper)) + new_insns.append( + insn.with_transformed_expressions( + cbmapper, assignee_f=transform_assignee)) from loopy.kernel.data import SubstitutionRule new_substs = { diff --git a/test/test_transform.py b/test/test_transform.py index 6eb6697b5c192911864000781381244dfcbef631..d21d2c9da7ac28f119d3c5b475116dcc4b19e871 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -585,6 +585,40 @@ def test_extract_subst_with_iname_deps_in_templ(ctx_factory): lp.auto_test_vs_ref(knl, ctx_factory(), knl) +def test_prefetch_local_into_private(): + # https://gitlab.tiker.net/inducer/loopy/-/issues/210 + n = 32 + m = 32 + n_vecs = 32 + + knl = lp.make_kernel( + """{[k,i,j]: + 0<=k 1: exec(sys.argv[1])