diff --git a/doc/reference.rst b/doc/reference.rst index ac443cf624c0f94871e1c481c54b852fe2f6fa14..a1cb0efab031476bd469cbacea8b7112603e37d9 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -176,7 +176,18 @@ These are usually key-value pairs. The following attributes are recognized: (often numbers) to the given ``id_prefix``. * ``inames=i:j:k`` forces the instruction to reside within the loops over - :ref:`inames` ``i``, ``j`` and ``k``. + :ref:`inames` ``i``, ``j`` and ``k`` (and only those). + + .. note:: + + The default for the inames that the instruction depends on is + the inames used in the instruction itself plus the common + subset of inames shared by writers of all variables read by the + instruction. + + You can add a plus sign ("``+``") to the front of this option + value to indicate that you would like the inames you specify here + to be in addition to the ones found by the heuristic described above. * ``dep=id1:id2`` creates a dependency of this instruction on the instructions with identifiers ``id1`` and ``id2``. This requires that the @@ -184,7 +195,7 @@ These are usually key-value pairs. The following attributes are recognized: instructions' generated code. Identifiers here are allowed to be wildcards as defined by - :mod:`fnmatchcase`. + the Python module :mod:`fnmatchcase`. .. note:: diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index acf361dfdf0c5bc8a7d5d3fe78b28442f4a5c8bf..252a588b2528545b5d94178b937ecf621146060b 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -137,6 +137,7 @@ def expand_defines_in_expr(expr, defines): # }}} + # {{{ parse instructions INSN_RE = re.compile( @@ -181,6 +182,7 @@ def parse_insn(insn): insn_deps = None insn_id = None priority = 0 + forced_iname_deps_is_final = False forced_iname_deps = frozenset() predicates = frozenset() @@ -208,6 +210,12 @@ def parse_insn(insn): insn_deps = frozenset(dep.strip() for dep in opt_value.split(":") if dep.strip()) elif opt_key == "inames": + if opt_value.startswith("+"): + forced_iname_deps_is_final = False + opt_value = (opt_value[1:]).strip() + else: + forced_iname_deps_is_final = True + forced_iname_deps = frozenset(opt_value.split(":")) elif opt_key == "if": predicates = frozenset(opt_value.split(":")) @@ -232,6 +240,7 @@ def parse_insn(insn): return ExpressionInstruction( id=insn_id, insn_deps=insn_deps, + forced_iname_deps_is_final=forced_iname_deps_is_final, forced_iname_deps=forced_iname_deps, assignee=lhs, expression=rhs, temp_var_type=temp_var_type, @@ -288,6 +297,7 @@ def parse_if_necessary(insn, defines): # }}} + # {{{ domain parsing EMPTY_SET_DIMS_RE = re.compile(r"^\s*\{\s*\:") diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index df9b7f3ad7d871eac301ac296034045278df287c..9e9538b2ee47e155309a5bdb6e88b68931262c8c 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -356,10 +356,16 @@ class InstructionBase(Record): a :class:`frozenset` of variable names whose truth values (as defined by C) determine whether this instruction should be run + .. attribute:: forced_iname_deps_is_final + + A :class:`bool` determining whether :attr:`forced_iname_deps` constitutes + the *entire* list of iname dependencies. + .. attribute:: forced_iname_deps A :class:`frozenset` of inames that are added to the list of iname - dependencies. + dependencies *or* constitute the entire list of iname dependencies, + depending on the value of :attr:`forced_iname_deps_is_final`. .. attribute:: priority @@ -379,18 +385,24 @@ class InstructionBase(Record): Also allowed to be *None*. """ - fields = set("id insn_deps predicates forced_iname_deps " + fields = set("id insn_deps predicates " + "forced_iname_deps_is_final forced_iname_deps " "priority boostable boostable_into".split()) - def __init__(self, id, insn_deps, forced_iname_deps, priority, + def __init__(self, id, insn_deps, + forced_iname_deps_is_final, forced_iname_deps, priority, boostable, boostable_into, predicates): + if forced_iname_deps_is_final is None: + forced_iname_deps_is_final = False + assert isinstance(forced_iname_deps, frozenset) assert isinstance(insn_deps, frozenset) or insn_deps is None Record.__init__(self, id=id, insn_deps=insn_deps, + forced_iname_deps_is_final=forced_iname_deps_is_final, forced_iname_deps=forced_iname_deps, priority=priority, boostable=boostable, @@ -502,12 +514,16 @@ class ExpressionInstruction(InstructionBase): def __init__(self, assignee, expression, - id=None, forced_iname_deps=frozenset(), insn_deps=None, + id=None, + forced_iname_deps_is_final=None, + forced_iname_deps=frozenset(), + insn_deps=None, boostable=None, boostable_into=None, temp_var_type=None, priority=0, predicates=frozenset()): InstructionBase.__init__(self, id=id, + forced_iname_deps_is_final=forced_iname_deps_is_final, forced_iname_deps=forced_iname_deps, insn_deps=insn_deps, boostable=boostable, boostable_into=boostable_into, @@ -642,8 +658,10 @@ class CInstruction(InstructionBase): def __init__(self, iname_exprs, code, read_variables=frozenset(), assignees=frozenset(), - id=None, insn_deps=None, forced_iname_deps=frozenset(), priority=0, - boostable=None, boostable_into=None, predicates=frozenset()): + id=None, insn_deps=None, + forced_iname_deps_is_final=None, forced_iname_deps=frozenset(), + priority=0, boostable=None, boostable_into=None, + predicates=frozenset()): """ :arg iname_exprs: Like :attr:`iname_exprs`, but instead of tuples, simple strings pepresenting inames are also allowed. A single @@ -656,6 +674,7 @@ class CInstruction(InstructionBase): InstructionBase.__init__(self, id=id, + forced_iname_deps_is_final=forced_iname_deps_is_final, forced_iname_deps=forced_iname_deps, insn_deps=insn_deps, boostable=boostable, boostable_into=boostable_into, diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index b7793143923286941940afe3e70cd2794ce7006a..57ca8890c29a3736da6db49905ebc6894a333504 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -115,9 +115,12 @@ def find_all_insn_inames(kernel): all_write_deps[insn.id] = write_deps = insn.write_dependency_names() deps = read_deps | write_deps - iname_deps = ( - deps & kernel.all_inames() - | insn.forced_iname_deps) + if insn.forced_iname_deps_is_final: + iname_deps = insn.forced_iname_deps + else: + iname_deps = ( + deps & kernel.all_inames() + | insn.forced_iname_deps) assert isinstance(read_deps, frozenset), type(insn) assert isinstance(write_deps, frozenset), type(insn) @@ -151,6 +154,9 @@ def find_all_insn_inames(kernel): did_something = False for insn in kernel.instructions: + if insn.forced_iname_deps_is_final: + continue + # {{{ depdency-based propagation # For all variables that insn depends on, find the intersection diff --git a/test/test_loopy.py b/test/test_loopy.py index 8092e9c6f87a0c40228a427dcbc20511b3531d10..c6b5e6f3eb176ad61add44ff899cfd7fe2470f02 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1400,21 +1400,21 @@ def test_rob_stroud_bernstein(ctx_factory): 0 <= i2 < nqp1d and \ 0 <= alpha1 <= deg and 0 <= alpha2 <= deg-alpha1 }", """ - <> xi = qpts[1, i2] {inames=el} + <> xi = qpts[1, i2] {inames=+el} <> s = 1-xi <> r = xi/s - <> aind = 0 {id=aind_init,inames=i2:el} + <> aind = 0 {id=aind_init,inames=+i2:el} <> w = s**(deg-alpha1) {id=init_w} tmp[el,alpha1,i2] = tmp[el,alpha1,i2] + w * coeffs[aind] \ - {id=write_tmp,inames=alpha2} + {id=write_tmp,inames=+alpha2} w = w * r * ( deg - alpha1 - alpha2 ) / (1 + alpha2) \ {id=update_w,dep=init_w:write_tmp} aind = aind + 1 \ {id=aind_incr,\ dep=aind_init:write_tmp:update_w, \ - inames=el:i2:alpha1:alpha2} + inames=+el:i2:alpha1:alpha2} """, [ # Must declare coeffs to have "no" shape, to keep loopy