diff --git a/MEMO b/MEMO index 281e17845371a4c6c666824914bc768187818c6b..d7526fd0da6cb2e42cb43d0980f21b808263732d 100644 --- a/MEMO +++ b/MEMO @@ -40,11 +40,15 @@ Things to consider - implemented_domain may end up being smaller than requested in cse evaluations--check that! -- Auto tag assignment depends on known work group size - - Depedencies are pointwise for shared loop dimensions and global over non-shared ones (between dependent and ancestor) +- multiple insns could fight over which iname gets local axis 0 + -> complicated optimization problem + +TODO +^^^^ + - Parallel dimension splitting/merging via tags - FIXME: Deal with insns losing a seq iname dep in a CSE realization @@ -53,19 +57,13 @@ Things to consider - Every loop in loopy is opened at most once. -- Syntax to declare insn deps - - reimplement add_prefetch - user interface for dim length prescription -- make syntax for explicit loop dependencies - -- multiple insns could fight over which iname gets local axis 0 - -> complicated optimization problem - - How to determine which variables need to be duplicated for ILP? - -> Only reduction + -> Reduction + -> CSEs? - Slab decomposition for parallel dimensions - implement at the outermost nesting level regardless @@ -79,6 +77,8 @@ Things to consider Dealt with ^^^^^^^^^^ +- make syntax for explicit loop dependencies + - Implement get_problems() - CSE iname duplication might be unnecessary? diff --git a/loopy/__init__.py b/loopy/__init__.py index 6f1c9b930e51177ccde0f892a577ae3efcb7eace..ce40f0d2b98dd139a41ac27a50e40d289173c83d 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -30,7 +30,6 @@ from loopy.kernel import ScalarArg, ArrayArg, ImageArg from loopy.kernel import LoopKernel from loopy.schedule import generate_loop_schedules -from loopy.prefetch import insert_register_prefetches from loopy.compiled import CompiledKernel, drive_timing_run # }}} diff --git a/loopy/kernel.py b/loopy/kernel.py index 762a4c4b6777e0e40f40d64552fdb1b9a72b779a..59a9846b15c9c817c255a4c188d19e477b32379f 100644 --- a/loopy/kernel.py +++ b/loopy/kernel.py @@ -402,27 +402,39 @@ class LoopKernel(Record): :arg domain: a :class:`islpy.BasicSet`, or a string parseable to a basic set by the isl. Example: "{[i,j]: 0<=i < 10 and 0<= j < 9}" """ + import re + LABEL_DEP_RE = re.compile( + r"^(?:\{(?P<label>\w+)\})?" + "\s*(?P<lhs>.+)\s*=\s*(?P<rhs>.+)\s*" + "(?:\:\s*(?P<deps>[\s\w,]+))?$" + ) def parse_if_necessary(insn): from pymbolic import parse + deps = [] + label = "insn" + if isinstance(insn, Instruction): return insn if isinstance(insn, str): - lhs, rhs = insn.split("=") - elif isinstance(insn, tuple): - lhs, rhs = insn + label_dep_match = LABEL_DEP_RE.match(insn) + if label_dep_match is None: + raise RuntimeError("insn parse error") - if isinstance(lhs, str): - lhs = parse(lhs) + groups = label_dep_match.groupdict() + if groups["label"] is not None: + label = groups["label"] + if groups["deps"] is not None: + deps = [dep.trim() for dep in groups["deps"].split(",")] - if isinstance(rhs, str): + lhs = parse(groups["lhs"]) from loopy.symbolic import FunctionToPrimitiveMapper - rhs = parse(rhs) - rhs = FunctionToPrimitiveMapper()(rhs) + rhs = FunctionToPrimitiveMapper()(parse(groups["rhs"])) return Instruction( - id=self.make_unique_instruction_id(insns), + id=self.make_unique_instruction_id(insns, based_on=label), + insn_deps=deps, assignee=lhs, expression=rhs) if isinstance(domain, str): @@ -522,12 +534,6 @@ class LoopKernel(Record): if var_name not in used_vars: return var_name - @property - @memoize_method - def dim_to_name(self): - from pytools import reverse_dict - return reverse_dict(self.iname_to_dim) - @property @memoize_method def id_to_insn(self): @@ -559,10 +565,6 @@ class LoopKernel(Record): from islpy import dim_type return set(self.space.get_var_dict(dim_type.set).iterkeys()) - def inames_by_tag_type(self, tag_type): - return [iname for iname in self.all_inames() - if isinstance(self.iname_to_tag.get(iname), tag_type)] - @memoize_method def get_iname_bounds(self, iname): lower_bound_pw_aff = (self.domain diff --git a/test/test_matmul.py b/test/test_matmul.py index e342289be9e4d678bfd7794fd8d22cf7dea07603..b928b57c820524f4d93e4e68eade2dee930861ab 100644 --- a/test/test_matmul.py +++ b/test/test_matmul.py @@ -203,7 +203,7 @@ def test_plain_matrix_mul_new_ui(ctx_factory): knl = lp.LoopKernel(ctx.devices[0], "[n] -> {[i,j,k]: 0<=i,j,k<n}", [ - "c[i, j] = reduce(sum_float32, k, cse(a[i, k], lhsmat)*cse(b[k, j], rhsmat))" + "{yo} c[i, j] = reduce(sum_float32, k, cse(a[i, k], lhsmat)*cse(b[k, j], rhsmat))" ], [ lp.ArrayArg("a", dtype, shape=(n, n), order=order),