diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst index 3a15e3a585afc9a9b181ae21b202e20a104ad2a1..97d71f3e04051d45a2f911eb0f7b2eca7147b96b 100644 --- a/doc/ref_kernel.rst +++ b/doc/ref_kernel.rst @@ -242,6 +242,12 @@ These are usually key-value pairs. The following attributes are recognized: heuristic and indicate that the specified list of dependencies is exhaustive. +* ``dep_query=...`` provides an alternative way of specifying instruction + dependencies. The given string is parsed as a match expression object by + :func:`loopy.match.parse_match`. Upon kernel generation, this match + expression is used to match instructions in the kernel and add them as + dependencies. + * ``nosync=id1:id2`` prescribes that no barrier synchronization is necessary the instructions with identifiers ``id1`` and ``id2`` to the, even if a dependency chain exists and variables are accessed in an apparently @@ -251,6 +257,9 @@ These are usually key-value pairs. The following attributes are recognized: function :func:`fnmatch.fnmatchcase`. This is helpful in conjunction with ``id_prefix``. +* ``nosync_query=...`` provides an alternative way of specifying ``nosync``, + just like ``dep_query`` and ``dep``. + * ``priority=integer`` sets the instructions priority to the value ``integer``. Instructions with higher priority will be scheduled sooner, if possible. Note that the scheduler may still schedule a lower-priority diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index e224bd0442233f9321293b4f53f39047507150b3..ab3035be0388877af12d71dd15b7dbb522c7b84e 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -149,9 +149,9 @@ def expand_defines_in_expr(expr, defines): def get_default_insn_options_dict(): return { - "depends_on": None, + "depends_on": frozenset(), "depends_on_is_final": False, - "no_sync_with": None, + "no_sync_with": frozenset(), "groups": frozenset(), "conflicts_with_groups": frozenset(), "insn_id": None, @@ -221,18 +221,33 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None): result["depends_on_is_final"] = True opt_value = (opt_value[1:]).strip() - result["depends_on"] = frozenset( + result["depends_on"] = result["depends_on"].union(frozenset( intern(dep.strip()) for dep in opt_value.split(":") - if dep.strip()) + if dep.strip())) + + elif opt_key == "dep_query" and opt_value is not None: + from loopy.match import parse_match + match = parse_match(opt_value) + result["depends_on"] = result["depends_on"].union(frozenset([match])) elif opt_key == "nosync" and opt_value is not None: if is_with_block: raise LoopyError("'nosync' option may not be specified " "in a 'with' block") - result["no_sync_with"] = frozenset( + result["no_sync_with"] = result["no_sync_with"].union(frozenset( intern(dep.strip()) for dep in opt_value.split(":") - if dep.strip()) + if dep.strip())) + + elif opt_key == "nosync_query" and opt_value is not None: + if is_with_block: + raise LoopyError("'nosync' option may not be specified " + "in a 'with' block") + + from loopy.match import parse_match + match = parse_match(opt_value) + result["no_sync_with"] = result["no_sync_with"].union( + frozenset([match])) elif opt_key == "groups" and opt_value is not None: result["groups"] = frozenset( @@ -555,10 +570,16 @@ def parse_instructions(instructions, defines): continue elif isinstance(insn, InstructionBase): + def intern_if_str(s): + if isinstance(s, str): + return intern(s) + else: + return s + new_instructions.append( insn.copy( id=intern(insn.id) if isinstance(insn.id, str) else insn.id, - depends_on=frozenset(intern(dep) for dep in insn.depends_on), + depends_on=frozenset(intern_if_str(dep) for dep in insn.depends_on), groups=frozenset(intern(grp) for grp in insn.groups), conflicts_with_groups=frozenset( intern(grp) for grp in insn.conflicts_with_groups), @@ -1413,43 +1434,37 @@ def apply_default_order_to_args(kernel, default_order): # }}} -# {{{ resolve wildcard insn dependencies - -def find_matching_insn_ids(knl, dep): - from fnmatch import fnmatchcase +# {{{ resolve instruction dependencies - return [ - other_insn.id - for other_insn in knl.instructions - if fnmatchcase(other_insn.id, dep)] +def _resolve_dependencies(knl, insn, deps): + from loopy import find_instructions + from loopy.match import MatchExpressionBase - -def resove_wildcard_insn_ids(knl, deps): new_deps = [] - for dep in deps: - matches = find_matching_insn_ids(knl, dep) - if matches: - new_deps.extend(matches) + for dep in deps: + if isinstance(dep, MatchExpressionBase): + for new_dep in find_instructions(knl, dep): + if new_dep.id != insn.id: + new_deps.append(new_dep.id) else: - # Uh, best we can do - new_deps.append(dep) + from fnmatch import fnmatchcase + for other_insn in knl.instructions: + if fnmatchcase(other_insn.id, dep): + new_deps.append(other_insn.id) return frozenset(new_deps) -def resolve_wildcard_deps(knl): +def resolve_dependencies(knl): new_insns = [] for insn in knl.instructions: - if insn.depends_on is not None: - insn = insn.copy( - depends_on=resove_wildcard_insn_ids(knl, insn.depends_on), - no_sync_with=resove_wildcard_insn_ids( - knl, insn.no_sync_with), - ) - - new_insns.append(insn) + new_insns.append(insn.copy( + depends_on=_resolve_dependencies(knl, insn, insn.depends_on), + no_sync_with=_resolve_dependencies( + knl, insn, insn.no_sync_with), + )) return knl.copy(instructions=new_insns) @@ -1786,7 +1801,7 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): knl = expand_defines_in_shapes(knl, defines) knl = guess_arg_shape_if_requested(knl, default_order) knl = apply_default_order_to_args(knl, default_order) - knl = resolve_wildcard_deps(knl) + knl = resolve_dependencies(knl) knl = apply_single_writer_depencency_heuristic(knl, warn_if_used=False) # ------------------------------------------------------------------------- diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index c54d1fc329a3a8797b17458dc40e489044e9374a..52891624547934843a5b953415602ac0398956ac 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -51,6 +51,17 @@ class InstructionBase(Record): May be *None* to invoke the default. + There are two extensions to this: + + - You may use `*` as a wildcard in the given IDs. This will be expanded + to all matching instruction IDs during :func:`loopy.make_kernel`. + - Instead of an instruction ID, you may pass an instance of + :class:`loopy.match.MatchExpressionBase` into the :attr:`depends_on` + :class:`frozenset`. The given expression will be used to add any + matching instructions in the kernel to :attr:`depends_on` during + :func:`loopy.make_kernel`. Note, that this is not meant as a user-facing + interface. + .. attribute:: depends_on_is_final A :class:`bool` determining whether :attr:`depends_on` constitutes @@ -82,7 +93,10 @@ class InstructionBase(Record): a :class:`frozenset` of :attr:`id` values of :class:`Instruction` instances with which no barrier synchronization is necessary, even given the existence - of a dependency chain and apparently conflicting access + of a dependency chain and apparently conflicting access. + + Note, that :attr:`no_sync_with` allows instruction matching through wildcards + and match expression, just like :attr:`depends_on`. .. rubric:: Conditionals