diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst index 2d754dec23762b289d3bf30ed6a7740326b11817..8674434084077ba1f791c46123a083346715209e 100644 --- a/doc/ref_kernel.rst +++ b/doc/ref_kernel.rst @@ -249,16 +249,32 @@ These are usually key-value pairs. The following attributes are recognized: dependencies. * ``nosync=id1:id2`` prescribes that no barrier synchronization is necessary - the instructions with identifiers ``id1`` and ``id2`` to the, even if - a dependency chain exists and variables are accessed in an apparently - racy way. + for the instructions with identifiers ``id1`` and ``id2``, even if a + dependency chain exists and variables are accessed in an apparently racy + way. Identifiers here are allowed to be wildcards as defined by the Python function :func:`fnmatch.fnmatchcase`. This is helpful in conjunction with ``id_prefix``. + Identifiers (including wildcards) accept an optional `@scope` suffix, + which prescribes that no synchronization at level `scope` is needed. + This does not preclude barriers at levels different from `scope`. + Allowable `scope` values are: + + * `local` + * `global` + * `any` + + As an example, ``nosync=id1@local:id2@global`` prescribes that no local + synchronization is needed with instruction ``id1`` and no global + synchronization is needed with instruction ``id2``. + + ``nosync=id1@any`` has the same effect as ``nosync=id1``. + * ``nosync_query=...`` provides an alternative way of specifying ``nosync``, - just like ``dep_query`` and ``dep``. + just like ``dep_query`` and ``dep``. As with ``nosync``, ``nosync_query`` + accepts an optional `@scope` suffix. * ``priority=integer`` sets the instructions priority to the value ``integer``. Instructions with higher priority will be scheduled sooner, diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 71b112775095155af455abe200d94dbff5ac0c94..6b57ccf6e162d3725ee07fc4f97263c511eaf3b8 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -1230,9 +1230,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): options.append( "conflicts=%s" % ":".join(insn.conflicts_with_groups)) if insn.no_sync_with: - # FIXME: Find a syntax to express scopes. - options.append("no_sync_with=%s" % ":".join(id for id, _ in - insn.no_sync_with)) + options.append("no_sync_with=%s" % ":".join( + "%s@%s" % entry for entry in sorted(insn.no_sync_with))) if lhs: core = "%s <- %s" % ( diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 0a030b1088bfcd6cad15146ae583e8a134b5a185..9a05408bb3c945f9ba9a5b11d28b02363b39b610 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -167,6 +167,11 @@ def get_default_insn_options_dict(): } +from collections import namedtuple + +_NosyncParseResult = namedtuple("_NosyncParseResult", "expr, scope") + + def parse_insn_options(opt_dict, options_str, assignee_names=None): if options_str is None: return opt_dict @@ -175,6 +180,20 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None): result = opt_dict.copy() + def parse_nosync_option(opt_value): + if "@" in opt_value: + expr, scope = opt_value.split("@") + else: + expr = opt_value + scope = "any" + allowable_scopes = ("local", "global", "any") + if scope not in allowable_scopes: + raise ValueError( + "unknown scope for nosync option: '%s' " + "(allowable scopes are %s)" % + (scope, ', '.join("'%s'" % s for s in allowable_scopes))) + return _NosyncParseResult(expr, scope) + for option in options_str.split(","): option = option.strip() if not option: @@ -242,23 +261,24 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None): raise LoopyError("'nosync' option may not be specified " "in a 'with' block") - # TODO: Come up with a syntax that allows the user to express - # different synchronization scopes. result["no_sync_with"] = result["no_sync_with"].union(frozenset( - (intern(dep.strip()), "any") - for dep in opt_value.split(":") if dep.strip())) + (option.expr.strip(), option.scope) + for option in ( + parse_nosync_option(entry) + for entry in opt_value.split(":")) + if option.expr.strip())) elif opt_key == "nosync_query" and opt_value is not None: if is_with_block: raise LoopyError("'nosync' option may not be specified " "in a 'with' block") + match_expr, scope = parse_nosync_option(opt_value) + from loopy.match import parse_match - match = parse_match(opt_value) - # TODO: Come up with a syntax that allows the user to express - # different synchronization scopes. + match = parse_match(match_expr) result["no_sync_with"] = result["no_sync_with"].union( - frozenset([(match, "any")])) + frozenset([(match, scope)])) elif opt_key == "groups" and opt_value is not None: result["groups"] = frozenset( @@ -1627,7 +1647,7 @@ def resolve_dependencies(knl): (resolved_insn_id, nosync_scope) for nosync_dep, nosync_scope in insn.no_sync_with for resolved_insn_id in - _resolve_dependencies(knl, insn, nosync_dep)), + _resolve_dependencies(knl, insn, (nosync_dep,))), )) return knl.copy(instructions=new_insns) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 4477f5bafc7dab867af63d5152f9cbdec12a0dda..2e81c2e382561bafd18b49c81fae31905eb10e8e 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -388,10 +388,8 @@ class InstructionBase(ImmutableRecord): if self.depends_on: result.append("dep="+":".join(self.depends_on)) if self.no_sync_with: - # TODO: Come up with a syntax to express different kinds of - # synchronization scopes. result.append("nosync="+":".join( - insn_id for insn_id, _ in self.no_sync_with)) + "%s@%s" % entry for entry in self.no_sync_with)) if self.groups: result.append("groups=%s" % ":".join(self.groups)) if self.conflicts_with_groups: diff --git a/test/test_loopy.py b/test/test_loopy.py index db4a382046cc1aaf1465e81cf493415ace57e64d..ba20b5866786cfb579567dc23d61d734868c8f14 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1995,6 +1995,25 @@ def test_integer_reduction(ctx_factory): assert function(out) +def test_nosync_option_parsing(): + knl = lp.make_kernel( + "{[i]: 0 <= i < 10}", + """ + <>t = 1 {id=insn1,nosync=insn1} + t = 2 {id=insn2,nosync=insn1:insn2} + t = 3 {id=insn3,nosync=insn1@local:insn2@global:insn3@any} + t = 4 {id=insn4,nosync_query=id:insn*@local} + t = 5 {id=insn5,nosync_query=id:insn1} + """, + options=lp.Options(allow_terminal_colors=False)) + kernel_str = str(knl) + assert "# insn1,no_sync_with=insn1@any" in kernel_str + assert "# insn2,no_sync_with=insn1@any:insn2@any" in kernel_str + assert "# insn3,no_sync_with=insn1@local:insn2@global:insn3@any" in kernel_str + assert "# insn4,no_sync_with=insn1@local:insn2@local:insn3@local:insn5@local" in kernel_str # noqa + assert "# insn5,no_sync_with=insn1@any" in kernel_str + + def assert_barrier_between(knl, id1, id2, ignore_barriers_in_levels=()): from loopy.schedule import (RunInstruction, Barrier, EnterLoop, LeaveLoop) watch_for_barrier = False