From 43b1c178fa3b10b7662d4379597ddb891905da99 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 13:12:16 -0500 Subject: [PATCH 1/6] replace_instruction_ids: Respect no_sync_with (closes #51). --- loopy/transform/instruction.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index 7c9c96886..6d7a676b0 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -34,7 +34,6 @@ def find_instructions(kernel, insn_match): match = parse_match(insn_match) return [insn for insn in kernel.instructions if match(kernel, insn)] - # }}} @@ -171,6 +170,7 @@ def replace_instruction_ids(kernel, replacements): for insn in kernel.instructions: changed = False new_depends_on = [] + new_no_sync_with = [] for dep in insn.depends_on: if dep in replacements: @@ -179,8 +179,18 @@ def replace_instruction_ids(kernel, replacements): else: new_depends_on.append(dep) + for insn_id, scope in insn.no_sync_with: + if insn_id in replacements: + new_no_sync_with.extend( + (repl, scope) for repl in replacements[insn_id]) + changed = True + else: + new_no_sync_with.append((insn_id, scope)) + new_insns.append( - insn.copy(depends_on=frozenset(new_depends_on)) + insn.copy( + depends_on=frozenset(new_depends_on), + no_sync_with=frozenset(new_no_sync_with)) if changed else insn) return kernel.copy(instructions=new_insns) -- GitLab From 808eccf46985a3180c5059ceea20e89d7c225bda Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 13:14:46 -0500 Subject: [PATCH 2/6] Implement loopy.add_nosync() as a transformation. --- loopy/__init__.py | 4 +- loopy/transform/instruction.py | 73 ++++++++++++++++++++++++++++++++++ test/test_transform.py | 36 +++++++++++++++++ 3 files changed, 112 insertions(+), 1 deletion(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 6cbb3362e..53dd9c8ee 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -75,7 +75,8 @@ from loopy.transform.instruction import ( set_instruction_priority, add_dependency, remove_instructions, replace_instruction_ids, - tag_instructions) + tag_instructions, + add_nosync) from loopy.transform.data import ( add_prefetch, change_arg_to_image, @@ -189,6 +190,7 @@ __all__ = [ "remove_instructions", "replace_instruction_ids", "tag_instructions", + "add_nosync", "extract_subst", "expand_subst", "assignment_to_subst", "find_rules_matching", "find_one_rule_matching", diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index 6d7a676b0..410274f90 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -217,4 +217,77 @@ def tag_instructions(kernel, new_tag, within=None): # }}} +# {{{ add nosync + +def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False): + """Add a *no_sync_with* directive between *source* and *sink*. + *no_sync_with* is only added if a (syntactic) dependency edge + is present or if the instruction pair is in a conflicting group + (this does not check for memory dependencies). + + :arg kernel: + :arg source: Either a single instruction id, or any instruction id + match understood by :func:`loopy.match.parse_match`. + :arg sink: Either a single instruction id, or any instruction id + match understood by :func:`loopy.match.parse_match`. + :arg scope: A string which is a valid *no_sync_with* scope. + :arg bidirectional: A :class:`bool`. If *True*, add a *no_sync_with* + to both the source and sink instructions, otherwise the directive + is only added to the sink instructions. + :arg force: A :class:`bool`. If *True*, will add a *no_sync_with* + even without the presence of a syntactic dependency edge/ + conflicting instruction group. + + :return: The updated kernel + """ + + if isinstance(source, str) and source in kernel.id_to_insn: + sources = frozenset([source]) + else: + sources = frozenset( + source.id for source in find_instructions(kernel, source)) + + if isinstance(sink, str) and sink in kernel.id_to_insn: + sinks = frozenset([sink]) + else: + sinks = frozenset( + sink.id for sink in find_instructions(kernel, sink)) + + def insns_in_conflicting_groups(insn1_id, insn2_id): + insn1 = kernel.id_to_insn[insn1_id] + insn2 = kernel.id_to_insn[insn2_id] + return ( + bool(insn1.groups & insn2.conflicts_with_groups) + or + bool(insn2.groups & insn1.conflicts_with_groups)) + + from collections import defaultdict + nosync_to_add = defaultdict(set) + + for sink in sinks: + for source in sources: + + needs_nosync = force or ( + source in kernel.recursive_insn_dep_map()[sink] + or insns_in_conflicting_groups(source, sink)) + + if not needs_nosync: + continue + + nosync_to_add[sink].add((source, scope)) + if bidirectional: + nosync_to_add[source].add((sink, scope)) + + new_instructions = list(kernel.instructions) + + for i, insn in enumerate(new_instructions): + if insn.id in nosync_to_add: + new_instructions[i] = insn.copy(no_sync_with=insn.no_sync_with + | frozenset(nosync_to_add[insn.id])) + + return kernel.copy(instructions=new_instructions) + +# }}} + + # vim: foldmethod=marker diff --git a/test/test_transform.py b/test/test_transform.py index ac5a26f6a..b5fcdf04c 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -402,6 +402,42 @@ def test_precompute_with_preexisting_inames_fail(): precompute_inames="ii,jj") +def test_add_nosync(): + orig_knl = lp.make_kernel("{[i]: 0<=i<10}", + """ + <>tmp[i] = 10 {id=insn1} + <>tmp2[i] = 10 {id=insn2} + + <>tmp3[2*i] = 0 {id=insn3} + <>tmp4 = 1 + tmp3[2*i] {id=insn4} + + <>tmp5[i] = 0 {id=insn5,groups=g1} + tmp5[i] = 1 {id=insn6,conflicts=g1} + """) + + orig_knl = lp.set_temporary_scope(orig_knl, "tmp3", "local") + orig_knl = lp.set_temporary_scope(orig_knl, "tmp5", "local") + + # No dependency present - don't add nosync + knl = lp.add_nosync(orig_knl, "any", "writes:tmp", "writes:tmp2") + assert frozenset() == knl.id_to_insn["insn2"].no_sync_with + + # Dependency present + knl = lp.add_nosync(orig_knl, "local", "writes:tmp3", "reads:tmp3") + assert frozenset() == knl.id_to_insn["insn3"].no_sync_with + assert frozenset([("insn3", "local")]) == knl.id_to_insn["insn4"].no_sync_with + + # Bidirectional + knl = lp.add_nosync( + orig_knl, "local", "writes:tmp3", "reads:tmp3", bidirectional=True) + assert frozenset([("insn4", "local")]) == knl.id_to_insn["insn3"].no_sync_with + assert frozenset([("insn3", "local")]) == knl.id_to_insn["insn4"].no_sync_with + + # Groups + knl = lp.add_nosync(orig_knl, "local", "insn5", "insn6") + assert frozenset([("insn5", "local")]) == knl.id_to_insn["insn6"].no_sync_with + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From 8ef9b265cc64cc55935fe6d464aef8253020bc28 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 13:22:37 -0500 Subject: [PATCH 3/6] Remove_instructions: Update no_sync_with (closes #60). --- loopy/transform/instruction.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index 410274f90..381a3b7c9 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -154,7 +154,14 @@ def remove_instructions(kernel, insn_ids): for dep_id in depends_on & insn_ids: new_deps = new_deps | id_to_insn[dep_id].depends_on - new_insns.append(insn.copy(depends_on=frozenset(new_deps))) + # update no_sync_with + + new_no_sync_with = frozenset((insn_id, scope) + for insn_id, scope in insn.no_sync_with + if insn_id not in insn_ids) + + new_insns.append( + insn.copy(depends_on=new_deps, no_sync_with=new_no_sync_with)) return kernel.copy( instructions=new_insns) -- GitLab From 4f83a824faffd49850bc1faaeb0696d29d4e6a3a Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 13:28:15 -0500 Subject: [PATCH 4/6] Update/clarify documentation. --- loopy/transform/instruction.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index 381a3b7c9..6f5182e56 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -129,6 +129,8 @@ def remove_instructions(kernel, insn_ids): Dependencies across (one, for now) deleted isntructions are propagated. Behavior is undefined for now for chains of dependencies within the set of deleted instructions. + + This also updated *no_sync_with* for all instructions. """ if not insn_ids: @@ -228,7 +230,7 @@ def tag_instructions(kernel, new_tag, within=None): def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False): """Add a *no_sync_with* directive between *source* and *sink*. - *no_sync_with* is only added if a (syntactic) dependency edge + *no_sync_with* is only added if an (execution) dependency is present or if the instruction pair is in a conflicting group (this does not check for memory dependencies). -- GitLab From e785c228970e64ef24c6425698c6b07afc980138 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 13:30:26 -0500 Subject: [PATCH 5/6] Typo fix. --- loopy/transform/instruction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index 6f5182e56..67eebc3e0 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -130,7 +130,7 @@ def remove_instructions(kernel, insn_ids): Behavior is undefined for now for chains of dependencies within the set of deleted instructions. - This also updated *no_sync_with* for all instructions. + This also updates *no_sync_with* for all instructions. """ if not insn_ids: -- GitLab From efdf0d5e135b50e17075e92d1093b0eaac259975 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 13 Apr 2017 10:07:44 -0500 Subject: [PATCH 6/6] * Tweak *add_nosync* documentation. * Make *add_nosync* visible in the loopy docs. --- doc/ref_transform.rst | 2 ++ doc/tutorial.rst | 2 ++ loopy/transform/instruction.py | 18 ++++++++++-------- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/doc/ref_transform.rst b/doc/ref_transform.rst index 4a07b6333..d293e3ebe 100644 --- a/doc/ref_transform.rst +++ b/doc/ref_transform.rst @@ -72,6 +72,8 @@ Manipulating Instructions .. autofunction:: tag_instructions +.. autofunction:: add_nosync + Registering Library Routines ---------------------------- diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 942c7d56e..5eaa12b81 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -1479,6 +1479,8 @@ Barriers :mod:`loopy` may infer the need for a barrier when it is not necessary. The ``no_sync_with`` instruction attribute can be used to resolve this. +See also :func:`loopy.add_nosync`. + TODO .. }}} diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index 67eebc3e0..2be78f8e5 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -230,22 +230,24 @@ def tag_instructions(kernel, new_tag, within=None): def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False): """Add a *no_sync_with* directive between *source* and *sink*. - *no_sync_with* is only added if an (execution) dependency - is present or if the instruction pair is in a conflicting group - (this does not check for memory dependencies). + *no_sync_with* is only added if *sink* depends on *source* or + if the instruction pair is in a conflicting group. - :arg kernel: + This function does not check for the presence of a memory dependency. + + :arg kernel: The kernel :arg source: Either a single instruction id, or any instruction id match understood by :func:`loopy.match.parse_match`. :arg sink: Either a single instruction id, or any instruction id match understood by :func:`loopy.match.parse_match`. - :arg scope: A string which is a valid *no_sync_with* scope. + :arg scope: A valid *no_sync_with* scope. See + :attr:`loopy.InstructionBase.no_sync_with` for allowable scopes. :arg bidirectional: A :class:`bool`. If *True*, add a *no_sync_with* to both the source and sink instructions, otherwise the directive is only added to the sink instructions. - :arg force: A :class:`bool`. If *True*, will add a *no_sync_with* - even without the presence of a syntactic dependency edge/ - conflicting instruction group. + :arg force: A :class:`bool`. If *True*, add a *no_sync_with* directive + even without the presence of a dependency edge or conflicting + instruction group. :return: The updated kernel """ -- GitLab