From 577db396135869838cc4b699d72609b3a2156471 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Sun, 8 May 2016 21:00:41 -0500 Subject: [PATCH] Add Instruction.no_sync_with --- doc/ref_kernel.rst | 15 +++++++++--- loopy/kernel/creation.py | 49 ++++++++++++++++++++++++++++------------ loopy/kernel/data.py | 36 +++++++++++++++++++++++++---- loopy/preprocess.py | 4 +++- loopy/schedule.py | 3 +++ loopy/version.py | 2 +- 6 files changed, 86 insertions(+), 23 deletions(-) diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst index d151a2128..a323fff52 100644 --- a/doc/ref_kernel.rst +++ b/doc/ref_kernel.rst @@ -217,9 +217,9 @@ These are usually key-value pairs. The following attributes are recognized: dependency is that the code generated for this instruction is required to appear textually after all of these dependees' generated code. - Identifiers here are allowed to be wildcards as defined by - the Python module :mod:`fnmatchcase`. This is helpful in conjunction - with ``id_prefix``. + Identifiers here are allowed to be wildcards as defined by the Python + function :func:`fnmatch.fnmatchcase`. This is helpful in conjunction with + ``id_prefix``. .. note:: @@ -242,6 +242,15 @@ These are usually key-value pairs. The following attributes are recognized: heuristic and indicate that the specified list of dependencies is exhaustive. +* ``nosync=id1:id2`` prescribes that no barrier synchronization is necessary + the instructions with identifiers ``id1`` and ``id2`` to the, even if + a dependency chain exists and variables are accessed in an apparently + racy way. + + Identifiers here are allowed to be wildcards as defined by the Python + function :func:`fnmatch.fnmatchcase`. This is helpful in conjunction with + ``id_prefix``. + * ``priority=integer`` sets the instructions priority to the value ``integer``. Instructions with higher priority will be scheduled sooner, if possible. Note that the scheduler may still schedule a lower-priority diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index aedc1edc6..034c9dd82 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -207,6 +207,7 @@ def parse_insn(insn): if insn_match is not None: depends_on = None depends_on_is_final = False + no_sync_with = None insn_groups = None conflicts_with_groups = None insn_id = None @@ -256,6 +257,11 @@ def parse_insn(insn): intern(dep.strip()) for dep in opt_value.split(":") if dep.strip()) + elif opt_key == "nosync" and opt_value is not None: + no_sync_with = frozenset( + intern(dep.strip()) for dep in opt_value.split(":") + if dep.strip()) + elif opt_key == "groups" and opt_value is not None: insn_groups = frozenset( intern(grp.strip()) for grp in opt_value.split(":") @@ -319,6 +325,7 @@ def parse_insn(insn): else insn_id), depends_on=depends_on, depends_on_is_final=depends_on_is_final, + no_sync_with=no_sync_with, groups=insn_groups, conflicts_with_groups=conflicts_with_groups, forced_iname_deps_is_final=forced_iname_deps_is_final, @@ -1021,25 +1028,39 @@ def apply_default_order_to_args(kernel, default_order): # {{{ resolve wildcard insn dependencies +def find_matching_insn_ids(knl, dep): + from fnmatch import fnmatchcase + + return [ + other_insn.id + for other_insn in knl.instructions + if fnmatchcase(other_insn.id, dep)] + + +def resove_wildcard_insn_ids(knl, deps): + new_deps = [] + for dep in deps: + matches = find_matching_insn_ids(knl, dep) + + if matches: + new_deps.extend(matches) + else: + # Uh, best we can do + new_deps.append(dep) + + return frozenset(new_deps) + + def resolve_wildcard_deps(knl): new_insns = [] - from fnmatch import fnmatchcase for insn in knl.instructions: if insn.depends_on is not None: - new_deps = set() - for dep in insn.depends_on: - match_count = 0 - for other_insn in knl.instructions: - if fnmatchcase(other_insn.id, dep): - new_deps.add(other_insn.id) - match_count += 1 - - if match_count == 0: - # Uh, best we can do - new_deps.add(dep) - - insn = insn.copy(depends_on=frozenset(new_deps)) + insn = insn.copy( + depends_on=resove_wildcard_insn_ids(knl, insn.depends_on), + no_sync_with=resove_wildcard_insn_ids( + knl, insn.no_sync_with), + ) new_insns.append(insn) diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 0e0638491..c4433d5e3 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -430,6 +430,8 @@ class InstructionBase(Record): An (otherwise meaningless) identifier that is unique within a :class:`loopy.kernel.LoopKernel`. + .. rubric:: Instruction ordering + .. attribute:: depends_on a :class:`frozenset` of :attr:`id` values of :class:`Instruction` instances @@ -460,6 +462,21 @@ class InstructionBase(Record): (see :class:`InstructionBase.groups`) may not be active when this instruction is scheduled. + .. attribute:: priority + + Scheduling priority, an integer. Higher means 'execute sooner'. + Default 0. + + .. rubric :: Synchronization + + .. attribute:: no_sync_with + + a :class:`frozenset` of :attr:`id` values of :class:`Instruction` instances + with which no barrier synchronization is necessary, even given the existence + of a dependency chain and apparently conflicting writes + + .. rubric:: Conditionals + .. attribute:: predicates a :class:`frozenset` of variable names the conjunction (logical and) of @@ -467,6 +484,8 @@ class InstructionBase(Record): should be run. Each variable name may, optionally, be preceded by an exclamation point, indicating negation. + .. rubric:: Iname dependencies + .. attribute:: forced_iname_deps_is_final A :class:`bool` determining whether :attr:`forced_iname_deps` constitutes @@ -478,10 +497,7 @@ class InstructionBase(Record): dependencies *or* constitute the entire list of iname dependencies, depending on the value of :attr:`forced_iname_deps_is_final`. - .. attribute:: priority - - Scheduling priority, an integer. Higher means 'execute sooner'. - Default 0. + .. rubric:: Iname dependencies .. attribute:: boostable @@ -495,6 +511,8 @@ class InstructionBase(Record): may need to be boosted, as a heuristic help for the scheduler. Also allowed to be *None*. + .. rubric:: Tagging + .. attribute:: tags A tuple of string identifiers that can be used to identify groups @@ -512,12 +530,14 @@ class InstructionBase(Record): fields = set("id depends_on depends_on_is_final " "groups conflicts_with_groups " + "no_sync_with " "predicates " "forced_iname_deps_is_final forced_iname_deps " "priority boostable boostable_into".split()) def __init__(self, id, depends_on, depends_on_is_final, groups, conflicts_with_groups, + no_sync_with, forced_iname_deps_is_final, forced_iname_deps, priority, boostable, boostable_into, predicates, tags, insn_deps=None, insn_deps_is_final=None): @@ -541,6 +561,9 @@ class InstructionBase(Record): if conflicts_with_groups is None: conflicts_with_groups = frozenset() + if no_sync_with is None: + no_sync_with = frozenset() + if forced_iname_deps_is_final is None: forced_iname_deps_is_final = False @@ -574,6 +597,7 @@ class InstructionBase(Record): id=id, depends_on=depends_on, depends_on_is_final=depends_on_is_final, + no_sync_with=no_sync_with, groups=groups, conflicts_with_groups=conflicts_with_groups, forced_iname_deps_is_final=forced_iname_deps_is_final, forced_iname_deps=forced_iname_deps, @@ -967,6 +991,7 @@ class Assignment(InstructionBase): depends_on_is_final=None, groups=None, conflicts_with_groups=None, + no_sync_with=None, forced_iname_deps_is_final=None, forced_iname_deps=frozenset(), boostable=None, boostable_into=None, tags=None, @@ -980,6 +1005,7 @@ class Assignment(InstructionBase): depends_on_is_final=depends_on_is_final, groups=groups, conflicts_with_groups=conflicts_with_groups, + no_sync_with=no_sync_with, forced_iname_deps_is_final=forced_iname_deps_is_final, forced_iname_deps=forced_iname_deps, boostable=boostable, @@ -1134,6 +1160,7 @@ class CInstruction(InstructionBase): read_variables=frozenset(), assignees=frozenset(), id=None, depends_on=None, depends_on_is_final=None, groups=None, conflicts_with_groups=None, + no_sync_with=None, forced_iname_deps_is_final=None, forced_iname_deps=frozenset(), priority=0, boostable=None, boostable_into=None, predicates=frozenset(), tags=None, @@ -1153,6 +1180,7 @@ class CInstruction(InstructionBase): depends_on=depends_on, depends_on_is_final=depends_on_is_final, groups=groups, conflicts_with_groups=conflicts_with_groups, + no_sync_with=no_sync_with, forced_iname_deps_is_final=forced_iname_deps_is_final, forced_iname_deps=forced_iname_deps, boostable=boostable, diff --git a/loopy/preprocess.py b/loopy/preprocess.py index e30d3bcb3..93e898068 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -400,7 +400,9 @@ def add_default_dependencies(kernel): % var) if len(var_writers) == 1: - auto_deps.update(var_writers - set([insn.id])) + auto_deps.update( + var_writers + - set([insn.id])) # }}} diff --git a/loopy/schedule.py b/loopy/schedule.py index 4bacc43b9..b606ba360 100644 --- a/loopy/schedule.py +++ b/loopy/schedule.py @@ -1043,6 +1043,9 @@ def get_barrier_needing_dependency(kernel, target, source, reverse, var_kind): if reverse: source, target = target, source + if source.id in target.no_sync_with: + return None + # {{{ check that a dependency exists dep_descr = None diff --git a/loopy/version.py b/loopy/version.py index 9fa881d02..cd9f45ac3 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v24-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v25-islpy%s" % _islpy_version -- GitLab