From 4c89a95753cbd34902fab0b02d71a8845ac487c8 Mon Sep 17 00:00:00 2001 From: "[6~" Date: Wed, 8 Apr 2020 16:30:42 -0500 Subject: [PATCH 1/7] add_prefetch: use get_var_descriptor, to also work on temporaries --- loopy/transform/data.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/loopy/transform/data.py b/loopy/transform/data.py index a6a2d7b4f..1f0161c06 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -285,15 +285,15 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, if temporary_name is None: temporary_name = var_name_gen("%s_fetch" % c_name) - arg = kernel.arg_dict[var_name] + var_descr = kernel.get_var_descriptor(var_name) # {{{ make parameter names and unification template parameters = [] - for i in range(arg.num_user_axes()): + for i in range(var_descr.num_user_axes()): based_on = "%s_dim_%d" % (c_name, i) - if arg.dim_names is not None: - based_on = "%s_dim_%s" % (c_name, arg.dim_names[i]) + if var_descr.dim_names is not None: + based_on = "%s_dim_%s" % (c_name, var_descr.dim_names[i]) if dim_arg_names is not None and i < len(dim_arg_names): based_on = dim_arg_names[i] @@ -322,7 +322,7 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, kernel, subst_use, sweep_inames, inames_to_be_removed = \ _process_footprint_subscripts( kernel, rule_name, sweep_inames, - footprint_subscripts, arg) + footprint_subscripts, var_descr) # Our _not_provided is actually a different object from the one in the # precompute module, but precompute acutally uses that to adjust its @@ -331,7 +331,7 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, from loopy.transform.precompute import precompute new_kernel = precompute(kernel, subst_use, sweep_inames, precompute_inames=dim_arg_names, - default_tag=default_tag, dtype=arg.dtype, + default_tag=default_tag, dtype=var_descr.dtype, fetch_bounding_box=fetch_bounding_box, temporary_name=temporary_name, temporary_address_space=temporary_address_space, -- GitLab From 5c8a68cd01a8052d1b56828db2d4cb6ca0b789a6 Mon Sep 17 00:00:00 2001 From: "[6~" Date: Wed, 8 Apr 2020 16:33:24 -0500 Subject: [PATCH 2/7] Drop extra user args from 'Instruction.with_transformed_expressions --- loopy/kernel/instruction.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index bcb6faa3f..13b236afd 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -345,7 +345,7 @@ class InstructionBase(ImmutableRecord): """ raise NotImplementedError - def with_transformed_expressions(self, f, *args): + def with_transformed_expressions(self, f): """Return a new copy of *self* where *f* has been applied to every expression occurring in *self*. *args* will be passed as extra arguments (in addition to the expression) to *f*. @@ -960,10 +960,10 @@ class Assignment(MultiAssignmentBase): def assignee_subscript_deps(self): return (_get_assignee_subscript_deps(self.assignee),) - def with_transformed_expressions(self, f, *args): + def with_transformed_expressions(self, f): return self.copy( - assignee=f(self.assignee, *args), - expression=f(self.expression, *args), + assignee=f(self.assignee), + expression=f(self.expression), predicates=frozenset( f(pred, *args) for pred in self.predicates)) @@ -1358,7 +1358,7 @@ class _DataObliviousInstruction(InstructionBase): def assignee_subscript_deps(self): return frozenset() - def with_transformed_expressions(self, f, *args): + def with_transformed_expressions(self, f): return self.copy( predicates=frozenset( f(pred) for pred in self.predicates)) -- GitLab From aa1df4b11ab1ccaa2a4ba0a0beffaa60d4e23b35 Mon Sep 17 00:00:00 2001 From: "[6~" Date: Thu, 9 Apr 2020 13:21:30 -0500 Subject: [PATCH 3/7] Instruction.with_transformed_expressions: add assignee_f, refactor some uses to avoid extra args --- loopy/kernel/instruction.py | 35 ++++++++++++++++++++++------------- loopy/symbolic.py | 3 ++- loopy/transform/precompute.py | 3 ++- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 13b236afd..73760978c 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -345,10 +345,13 @@ class InstructionBase(ImmutableRecord): """ raise NotImplementedError - def with_transformed_expressions(self, f): + def with_transformed_expressions(self, f, assignee_f=None): """Return a new copy of *self* where *f* has been applied to every expression occurring in *self*. *args* will be passed as extra arguments (in addition to the expression) to *f*. + + If *assignee_f* is passed, then left-hand sides of assignments are + passed to it. If it is not given, it defaults to the same as *f*. """ raise NotImplementedError @@ -960,12 +963,15 @@ class Assignment(MultiAssignmentBase): def assignee_subscript_deps(self): return (_get_assignee_subscript_deps(self.assignee),) - def with_transformed_expressions(self, f): + def with_transformed_expressions(self, f, assignee_f=None): + if assignee_f is None: + assignee_f = f + return self.copy( - assignee=f(self.assignee), + assignee=assignee_f(self.assignee), expression=f(self.expression), predicates=frozenset( - f(pred, *args) for pred in self.predicates)) + f(pred) for pred in self.predicates)) # }}} @@ -1115,12 +1121,15 @@ class CallInstruction(MultiAssignmentBase): _get_assignee_subscript_deps(a) for a in self.assignees) - def with_transformed_expressions(self, f, *args): + def with_transformed_expressions(self, f, assignee_f=None): + if assignee_f is None: + assignee_f = f + return self.copy( - assignees=f(self.assignees, *args), - expression=f(self.expression, *args), + assignees=assignee_f(self.assignees), + expression=f(self.expression), predicates=frozenset( - f(pred, *args) for pred in self.predicates)) + f(pred) for pred in self.predicates)) # }}} @@ -1316,14 +1325,14 @@ class CInstruction(InstructionBase): _get_assignee_subscript_deps(a) for a in self.assignees) - def with_transformed_expressions(self, f, *args): + def with_transformed_expressions(self, f, assignee_f=None): return self.copy( iname_exprs=[ - (name, f(expr, *args)) + (name, f(expr)) for name, expr in self.iname_exprs], - assignees=[f(a, *args) for a in self.assignees], + assignees=[assignee_f(a) for a in self.assignees], predicates=frozenset( - f(pred, *args) for pred in self.predicates)) + f(pred) for pred in self.predicates)) # }}} @@ -1358,7 +1367,7 @@ class _DataObliviousInstruction(InstructionBase): def assignee_subscript_deps(self): return frozenset() - def with_transformed_expressions(self, f): + def with_transformed_expressions(self, f, assignee_f=None): return self.copy( predicates=frozenset( f(pred) for pred in self.predicates)) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index ccac5e199..d3261b110 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -971,7 +971,8 @@ class RuleAwareIdentityMapper(IdentityMapper): # may perform tasks entirely unrelated to subst rules, so # we must map assignees, too. self.map_instruction(kernel, - insn.with_transformed_expressions(self, kernel, insn)) + insn.with_transformed_expressions( + lambda expr: self(expr, kernel, insn))) for insn in kernel.instructions] return kernel.copy(instructions=new_insns) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 9f426f76b..b308836c7 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -229,7 +229,8 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): for insn in kernel.instructions: self.replaced_something = False - insn = insn.with_transformed_expressions(self, kernel, insn) + insn = insn.with_transformed_expressions( + lambda expr: self(expr, kernel, insn)) if self.replaced_something: insn = insn.copy( -- GitLab From dbd4476b56e48e7a9efeba41d546a80b539385da Mon Sep 17 00:00:00 2001 From: "[6~" Date: Thu, 9 Apr 2020 13:31:27 -0500 Subject: [PATCH 4/7] extract_subst: avoid transforming assignment LHS --- loopy/transform/subst.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py index 3eee3d8f3..fa145fbbf 100644 --- a/loopy/transform/subst.py +++ b/loopy/transform/subst.py @@ -148,8 +148,30 @@ def extract_subst(kernel, subst_name, template, parameters=()): new_insns = [] + def transform_assignee(expr): + # Assignment LHS's cannot be subst rules. Treat them + # specially. + + import pymbolic.primitives as prim + if isinstance(expr, tuple): + return tuple( + transform_assignee(expr_i) + for expr_i in expr) + + elif isinstance(expr, prim.Subscript): + return type(expr)( + expr.aggregate, + cbmapper(expr.index)) + + elif isinstance(expr, prim.Variable): + return expr + else: + raise ValueError("assignment LHS not understood") + for insn in kernel.instructions: - new_insns.append(insn.with_transformed_expressions(cbmapper)) + new_insns.append( + insn.with_transformed_expressions( + cbmapper, assignee_f=transform_assignee)) from loopy.kernel.data import SubstitutionRule new_substs = { -- GitLab From 20b1e1c0dc1efeca99dd94f72c7e09a0a31afae2 Mon Sep 17 00:00:00 2001 From: "[6~" Date: Thu, 9 Apr 2020 13:34:46 -0500 Subject: [PATCH 5/7] Fix references to KernelState.SCHEDULED to minimize warning avalanche --- loopy/auto_test.py | 2 +- loopy/codegen/__init__.py | 2 +- loopy/kernel/__init__.py | 2 +- loopy/kernel/tools.py | 6 +++--- loopy/schedule/__init__.py | 10 +++++----- loopy/schedule/device_mapping.py | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/loopy/auto_test.py b/loopy/auto_test.py index 6837b99a0..ca70c8489 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -534,7 +534,7 @@ def auto_test_vs_ref( from loopy.target.pyopencl import PyOpenCLTarget if test_knl.state not in [ KernelState.PREPROCESSED, - KernelState.SCHEDULED]: + KernelState.LINEARIZED]: if isinstance(test_knl.target, PyOpenCLTarget): test_knl = test_knl.copy(target=PyOpenCLTarget(ctx.devices[0])) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 11f874e1b..b4811dc99 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -388,7 +388,7 @@ def generate_code_v2(kernel): from loopy.schedule import get_one_scheduled_kernel kernel = get_one_scheduled_kernel(kernel) - if kernel.state != KernelState.SCHEDULED: + if kernel.state != KernelState.LINEARIZED: raise LoopyError("cannot generate code for a kernel that has not been " "scheduled") diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index c3cd1738d..2d926aad4 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -349,7 +349,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): if state not in [ KernelState.INITIAL, KernelState.PREPROCESSED, - KernelState.SCHEDULED, + KernelState.LINEARIZED, ]: raise ValueError("invalid value for 'state'") diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index bb6ae44c9..9e54bc25d 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1726,8 +1726,8 @@ def get_subkernels(kernel): See also :class:`loopy.schedule.CallKernel`. """ from loopy.kernel import KernelState - if kernel.state != KernelState.SCHEDULED: - raise LoopyError("Kernel must be scheduled") + if kernel.state != KernelState.LINEARIZED: + raise LoopyError("Kernel must be linearized") from loopy.schedule import CallKernel @@ -1743,7 +1743,7 @@ def get_subkernel_to_insn_id_map(kernel): kernel must be scheduled. """ from loopy.kernel import KernelState - if kernel.state != KernelState.SCHEDULED: + if kernel.state != KernelState.LINEARIZED: raise LoopyError("Kernel must be scheduled") from loopy.schedule import ( diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 0983c5e0d..032cdc276 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1841,7 +1841,7 @@ def generate_loop_schedules(kernel, debug_args={}): def generate_loop_schedules_inner(kernel, debug_args={}): from loopy.kernel import KernelState - if kernel.state not in (KernelState.PREPROCESSED, KernelState.SCHEDULED): + if kernel.state not in (KernelState.PREPROCESSED, KernelState.LINEARIZED): raise LoopyError("cannot schedule a kernel that has not been " "preprocessed") @@ -1852,7 +1852,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): debug = ScheduleDebugger(**debug_args) - preschedule = kernel.schedule if kernel.state == KernelState.SCHEDULED else () + preschedule = kernel.schedule if kernel.state == KernelState.LINEARIZED else () prescheduled_inames = set( insn.iname @@ -1904,7 +1904,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): unscheduled_insn_ids=set(insn.id for insn in kernel.instructions), scheduled_insn_ids=frozenset(), - within_subkernel=kernel.state != KernelState.SCHEDULED, + within_subkernel=kernel.state != KernelState.LINEARIZED, may_schedule_global_barriers=True, preschedule=preschedule, @@ -1973,11 +1973,11 @@ def generate_loop_schedules_inner(kernel, debug_args={}): new_kernel = kernel.copy( schedule=gen_sched, - state=KernelState.SCHEDULED) + state=KernelState.LINEARIZED) from loopy.schedule.device_mapping import \ map_schedule_onto_host_or_device - if kernel.state != KernelState.SCHEDULED: + if kernel.state != KernelState.LINEARIZED: # Device mapper only gets run once. new_kernel = map_schedule_onto_host_or_device(new_kernel) diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py index 59afb07d2..d45c1ecbd 100644 --- a/loopy/schedule/device_mapping.py +++ b/loopy/schedule/device_mapping.py @@ -31,7 +31,7 @@ from loopy.schedule.tools import get_block_boundaries def map_schedule_onto_host_or_device(kernel): # FIXME: Should be idempotent. from loopy.kernel import KernelState - assert kernel.state == KernelState.SCHEDULED + assert kernel.state == KernelState.LINEARIZED from functools import partial device_prog_name_gen = partial( -- GitLab From bc39e9fd09b76d3e61e002916d2d04d43e120914 Mon Sep 17 00:00:00 2001 From: "[6~" Date: Thu, 9 Apr 2020 13:40:01 -0500 Subject: [PATCH 6/7] Add test for local->private prefetch --- test/test_transform.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/test/test_transform.py b/test/test_transform.py index 6eb6697b5..d21d2c9da 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -585,6 +585,40 @@ def test_extract_subst_with_iname_deps_in_templ(ctx_factory): lp.auto_test_vs_ref(knl, ctx_factory(), knl) +def test_prefetch_local_into_private(): + # https://gitlab.tiker.net/inducer/loopy/-/issues/210 + n = 32 + m = 32 + n_vecs = 32 + + knl = lp.make_kernel( + """{[k,i,j]: + 0<=k 1: exec(sys.argv[1]) -- GitLab From 4c23218e03c319331aa13a79aff8f45b1e57a007 Mon Sep 17 00:00:00 2001 From: "[6~" Date: Thu, 9 Apr 2020 14:02:05 -0500 Subject: [PATCH 7/7] Add missing treatment of unspecified assignee_f in CInstruction.with_transformed_expressions --- loopy/kernel/instruction.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 73760978c..61127232a 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1326,6 +1326,9 @@ class CInstruction(InstructionBase): for a in self.assignees) def with_transformed_expressions(self, f, assignee_f=None): + if assignee_f is None: + assignee_f = f + return self.copy( iname_exprs=[ (name, f(expr)) -- GitLab