From 7ce132e7a8305da3bef12f75872baf0b51180b27 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 26 Feb 2016 19:57:56 -0600 Subject: [PATCH] Add data_flow argument to fuser --- loopy/transform/fusion.py | 62 ++++++++++++++++++++++++++++++++++----- test/test_loopy.py | 5 +++- 2 files changed, 59 insertions(+), 8 deletions(-) diff --git a/loopy/transform/fusion.py b/loopy/transform/fusion.py index bf435d3fe..1ad108b41 100644 --- a/loopy/transform/fusion.py +++ b/loopy/transform/fusion.py @@ -210,8 +210,9 @@ def _fuse_two_kernels(knla, knlb): from pymbolic.imperative.transform import \ fuse_instruction_streams_with_unique_ids - new_instructions, _ = fuse_instruction_streams_with_unique_ids( - knla.instructions, knlb.instructions) + new_instructions, old_b_id_to_new_b_id = \ + fuse_instruction_streams_with_unique_ids( + knla.instructions, knlb.instructions) # {{{ fuse assumptions @@ -283,12 +284,12 @@ def _fuse_two_kernels(knla, knlb): "target", knla.target, knlb.target), - options=knla.options) + options=knla.options), old_b_id_to_new_b_id # }}} -def fuse_kernels(kernels, suffixes=None): +def fuse_kernels(kernels, suffixes=None, data_flow=None): """Return a kernel that performs all the operations in all entries of *kernels*. @@ -296,6 +297,11 @@ def fuse_kernels(kernels, suffixes=None): :arg suffixes: If given, must be a list of strings of a length matching that of *kernels*. This will be used to disambiguate the names of temporaries, as described below. + :arg data_flow: A list of data dependencies + ``[(var_name, from_kernel, to_kernel), ...]``. + Based on this, the fuser will create dependencies between all + writers of *var_name* in ``kernels[from_kernel]`` to + readers of *var_name* in ``kernels[to_kernel]``. The components of the kernels are fused as follows: @@ -321,9 +327,16 @@ def fuse_kernels(kernels, suffixes=None): * The resulting kernel will contain all instructions from each entry of *kernels*. Clashing instruction IDs will be renamed to ensure uniqueness. + + .. versionchanged:: 2016.2 + + *data_flow* was added in version 2016.2 """ kernels = list(kernels) + if data_flow is None: + data_flow = [] + if suffixes: suffixes = list(suffixes) if len(suffixes) != len(kernels): @@ -356,9 +369,44 @@ def fuse_kernels(kernels, suffixes=None): # }}} - result = kernels.pop(0) - while kernels: - result = _fuse_two_kernels(result, kernels.pop(0)) + kernel_insn_ids = [] + result = None + + for knlb in kernels: + if result is None: + result = knlb + kernel_insn_ids.append([ + insn.id for insn in knlb.instructions]) + else: + result, old_b_id_to_new_b_id = _fuse_two_kernels( + knla=result, + knlb=knlb) + + kernel_insn_ids.append([ + old_b_id_to_new_b_id[insn.id] + for insn in knlb.instructions]) + + # {{{ realize data_flow dependencies + + id_to_insn = result.id_to_insn.copy() + + for var_name, from_kernel, to_kernel in data_flow: + from_writer_ids = frozenset( + insn_id + for insn_id in kernel_insn_ids[from_kernel] + if var_name in id_to_insn[insn_id].assignee_var_names()) + + for insn_id in kernel_insn_ids[to_kernel]: + insn = id_to_insn[insn_id] + if var_name in insn.read_dependency_names(): + insn = insn.copy(depends_on=insn.depends_on | from_writer_ids) + + id_to_insn[insn_id] = insn + + result = result.copy( + instructions=list(six.itervalues(id_to_insn))) + + # }}} return result diff --git a/test/test_loopy.py b/test/test_loopy.py index 1fed3289a..48ed2e2e7 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2269,7 +2269,10 @@ def test_finite_difference_expr_subst(ctx_factory): lp.GlobalArg("u", shape="n+2"), ]) - fused_knl = lp.fuse_kernels([fin_diff_knl, flux_knl]) + fused_knl = lp.fuse_kernels([fin_diff_knl, flux_knl], + data_flow=[ + ("f", 1, 0) + ]) fused_knl = lp.set_options(fused_knl, write_cl=True) evt, _ = fused_knl(queue, u=u, h=np.float32(1e-1)) -- GitLab