diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index c13fd295baaaf93c306466d84b946a5275a6103a..88fae486564dcb8b2d524bc34e11d43255f4e689 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -1132,6 +1132,27 @@ def expand_cses(instructions, inames_to_dup, cse_prefix="cse_expr"): # }}} +# {{{ add_sequential_dependencies + +def add_sequential_dependencies(knl): + new_insns = [] + prev_insn = None + for insn in knl.instructions: + if prev_insn is not None: + depon = insn.depends_on + if depon is None: + depon = frozenset() + insn = insn.copy(depends_on=depon | frozenset((prev_insn.id,))) + + new_insns.append(insn) + + prev_insn = insn + + return knl.copy(instructions=new_insns) + +# }}} + + # {{{ temporary variable creation def create_temporaries(knl, default_order): @@ -1527,6 +1548,13 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): string representation :arg target: an instance of :class:`loopy.TargetBase`, or *None*, to use the default target. + :arg seq_dependencies: If *True*, dependencies that sequentially + connect the given *instructions* will be added. Defaults to + *False*. + + .. versionchanged:: 2016.3 + + *seq_dependencies* added. """ defines = kwargs.pop("defines", {}) @@ -1536,6 +1564,7 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): options = kwargs.pop("options", None) flags = kwargs.pop("flags", None) target = kwargs.pop("target", None) + seq_dependencies = kwargs.pop("seq_dependencies", False) if defines: from warnings import warn @@ -1636,6 +1665,9 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): target=target, **kwargs) + if seq_dependencies: + knl = add_sequential_dependencies(knl) + assert len(knl.instructions) == len(inames_to_dup) from loopy import duplicate_inames diff --git a/test/test_loopy.py b/test/test_loopy.py index a204c666cfa830d54f8380c5a077854460efd726..501b14881821a7494db35e179f860e908cbae6c7 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1356,6 +1356,26 @@ def test_regression_persistent_hash(): assert lkb(knl1) != lkb(knl2) +def test_sequential_dependencies(ctx_factory): + ctx = ctx_factory() + + knl = lp.make_kernel( + "{[i]: 0<=i aa = 5jf + <> bb = 5j + a[i] = imag(aa) + b[i] = imag(bb) + c[i] = 5f + end + """, seq_dependencies=True) + + print(knl.stringify(with_dependencies=True)) + + lp.auto_test_vs_ref(knl, ctx, knl, parameters=dict(n=5)) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1])