From fa54e76385649cb87d0f6a5ad0f9bb084421c2b8 Mon Sep 17 00:00:00 2001 From: Tim Warburton <timwar@caam.rice.edu> Date: Tue, 25 Oct 2011 23:36:00 -0500 Subject: [PATCH] Add iname duplication from parsed instructions. --- loopy/__init__.py | 6 +- loopy/kernel.py | 149 +++++++++++++++++++++++++++++++++++++++----- test/test_matmul.py | 22 +++---- 3 files changed, 148 insertions(+), 29 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index aa2947e47..4832d00fb 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -22,7 +22,8 @@ class LoopyAdvisory(UserWarning): from loopy.kernel import ScalarArg, ArrayArg, ImageArg -from loopy.kernel import LoopKernel, AutoFitLocalIndexTag +from loopy.kernel import make_kernel, AutoFitLocalIndexTag +from loopy.preprocess import preprocess_kernel from loopy.schedule import generate_loop_schedules from loopy.compiled import CompiledKernel, drive_timing_run @@ -233,7 +234,8 @@ def realize_cse(kernel, cse_tag, dtype, duplicate_inames=[], parallel_inames=Non dependencies = IndexVariableFinder( include_reduction_inames=False)(expr.child) - assert dependencies <= parent_inames + # FIXME: can happen with + # assert dependencies <= parent_inames for iname in parent_inames: if iname in duplicate_inames: diff --git a/loopy/kernel.py b/loopy/kernel.py index c349c1712..17c0b820b 100644 --- a/loopy/kernel.py +++ b/loopy/kernel.py @@ -225,15 +225,25 @@ class Instruction(Record): by adding dependencies on any writes to temporaries read by this instruction. :ivar idempotent: Whether the instruction may be executed repeatedly (while obeying dependencies) without changing the meaning of the program. + + The following two instance variables are only used until :func:`loopy.kernel.make_kernel` is + finished: + + :ivar temp_var_type: if not None, a type that will be assigned to the new temporary variable + created from the assignee + :ivar duplicate_inames_and_tags: a list of inames used in the instruction that will be duplicated onto + different inames. """ def __init__(self, id, assignee, expression, - forced_iname_deps=[], insn_deps=[], idempotent=None): + forced_iname_deps=[], insn_deps=[], idempotent=None, + temp_var_type=None, duplicate_inames_and_tags=[]): Record.__init__(self, id=id, assignee=assignee, expression=expression, forced_iname_deps=forced_iname_deps, - insn_deps=insn_deps, idempotent=idempotent) + insn_deps=insn_deps, idempotent=idempotent, + temp_var_type=temp_var_type, duplicate_inames_and_tags=duplicate_inames_and_tags) @memoize_method def all_inames(self): @@ -422,19 +432,27 @@ class LoopKernel(Record): Example: "{[i,j]: 0<=i < 10 and 0<= j < 9}" """ import re + + if isinstance(domain, str): + ctx = isl.Context() + domain = isl.Set.read_from_str(ctx, domain) + + DUP_ENTRY_RE = re.compile( + r"^\s*(?P<iname>\w+)\s*(?:\:\s*(?P<tag>[\w.]+))?\s*$") LABEL_DEP_RE = re.compile( r"^\s*(?:(?P<label>\w+):)?" - "\s*(?:\[(?P<iname_deps>[\s\w,]+)\])?" - "\s*(?P<lhs>.+)\s*=\s*(?P<rhs>.+?)\s*?" - "(?:\:\s*(?P<insn_deps>[\s\w,]+))?$" + "\s*(?:\[" + "(?P<iname_deps>[\s\w,]*)" + "(?:\|(?P<duplicate_inames_and_tags>[\s\w,:.]*))?" + "\])?" + "\s*(?:\<(?P<temp_var_type>.+)\>)?" + "\s*(?P<lhs>.+)\s*=\s*(?P<rhs>.+?)" + "\s*?(?:\:\s*(?P<insn_deps>[\s\w,]+))?$" ) def parse_if_necessary(insn): from pymbolic import parse - insn_deps = [] - forced_iname_deps = [] - label = "insn" if isinstance(insn, Instruction): return insn @@ -446,10 +464,44 @@ class LoopKernel(Record): groups = label_dep_match.groupdict() if groups["label"] is not None: label = groups["label"] + else: + label = "insn" if groups["insn_deps"] is not None: insn_deps = [dep.strip() for dep in groups["insn_deps"].split(",")] + else: + insn_deps = [] + if groups["iname_deps"] is not None: forced_iname_deps = [dep.strip() for dep in groups["iname_deps"].split(",")] + else: + forced_iname_deps = [] + + if groups["duplicate_inames_and_tags"] is not None: + dup_entries = [ + dep.strip() for dep in groups["duplicate_inames_and_tags"].split(",")] + duplicate_inames_and_tags = [] + for dup_entry in dup_entries: + dup_entry_match = DUP_ENTRY_RE.match(dup_entry) + if dup_entry_match is None: + raise RuntimeError( + "could not parse iname duplication entry '%s'" + % dup_entry) + + dup_groups = dup_entry_match.groupdict() + dup_iname = dup_groups["iname"] + assert dup_iname + dup_tag = AutoFitLocalIndexTag() + if dup_groups["tag"] is not None: + dup_tag = parse_tag(dup_groups["tag"]) + + duplicate_inames_and_tags.append((dup_iname, dup_tag)) + else: + duplicate_inames_and_tags = [] + + if groups["temp_var_type"] is not None: + temp_var_type = groups["temp_var_type"] + else: + temp_var_type = None lhs = parse(groups["lhs"]) from loopy.symbolic import FunctionToPrimitiveMapper @@ -459,14 +511,9 @@ class LoopKernel(Record): id=self.make_unique_instruction_id(insns, based_on=label), insn_deps=insn_deps, forced_iname_deps=forced_iname_deps, - assignee=lhs, expression=rhs) - - if isinstance(domain, str): - ctx = isl.Context() - domain = isl.Set.read_from_str(ctx, domain) - - if iname_to_dim is None: - iname_to_dim = domain.get_space().get_var_dict() + assignee=lhs, expression=rhs, + temp_var_type=temp_var_type, + duplicate_inames_and_tags=duplicate_inames_and_tags) insns = [] for insn in instructions: @@ -515,6 +562,9 @@ class LoopKernel(Record): for i in range(s.dim(dim_type.param))), assumptions)) + if iname_to_dim is None: + iname_to_dim = domain.get_space().get_var_dict() + Record.__init__(self, device=device, domain=domain, instructions=insns, args=args, @@ -722,4 +772,71 @@ class LoopKernel(Record): # }}} + + +def make_kernel(*args, **kwargs): + """Second pass of kernel creation. Think about requests for iname duplication + and temporary variable declaration received as part of string instructions. + """ + + knl = LoopKernel(*args, **kwargs) + + new_insns = [] + new_domain = knl.domain + new_temp_vars = knl.temporary_variables.copy() + new_tags = {} + + newly_created_vars = set() + + for insn in knl.instructions: + # {{{ iname duplication + + if insn.duplicate_inames_and_tags: + duplicate_inames = [iname + for iname, tag in insn.duplicate_inames_and_tags] + new_iname_tags = [tag for iname, tag in insn.duplicate_inames_and_tags] + + new_inames = [ + knl.make_unique_var_name( + iname, extra_used_vars=newly_created_vars) + for iname in duplicate_inames] + + for iname, tag in zip(new_inames, new_iname_tags): + new_tags[iname] = tag + + newly_created_vars.update(new_inames) + + from loopy.isl_helpers import duplicate_axes + new_domain = duplicate_axes(new_domain, duplicate_inames, new_inames) + + from loopy.symbolic import SubstitutionMapper + from pymbolic.mapper.substitutor import make_subst_func + old_to_new = dict( + (old_iname, var(new_iname)) + for old_iname, new_iname in zip(duplicate_inames, new_inames)) + subst_map = SubstitutionMapper(make_subst_func(old_to_new)) + + insn = insn.copy( + assignee=subst_map(insn.assignee), + expression=subst_map(insn.expression), + forced_iname_deps=[ + old_to_new.get(iname, iname) for iname in insn.forced_iname_deps], + ) + + # }}} + + new_insns.append(insn) + + new_iname_to_tag = knl.iname_to_tag.copy() + new_iname_to_tag.update(new_tags) + + return knl.copy( + instructions=new_insns, + domain=new_domain, + temporary_variables=new_temp_vars, + iname_to_tag=new_iname_to_tag) + + + + # vim: foldmethod=marker diff --git a/test/test_matmul.py b/test/test_matmul.py index bc2b0c65b..4d111ae3c 100644 --- a/test/test_matmul.py +++ b/test/test_matmul.py @@ -96,7 +96,7 @@ def test_axpy(ctx_factory): n = 20*1024**2 - knl = lp.LoopKernel(ctx.devices[0], + knl = lp.make_kernel(ctx.devices[0], "[n] -> {[i]: 0<=i<n}", [ "z[i] = a*x[i]+b*y[i]" @@ -159,7 +159,7 @@ def test_plain_matrix_mul(ctx_factory): n = get_suitable_size(ctx) - knl = lp.LoopKernel(ctx.devices[0], + knl = lp.make_kernel(ctx.devices[0], "{[i,j,k]: 0<=i,j,k<%d}" % n, [ "c[i, j] = sum_float32(k, a[i, k]*b[k, j])" @@ -211,7 +211,7 @@ def test_variable_size_matrix_mul(ctx_factory): n = get_suitable_size(ctx) - knl = lp.LoopKernel(ctx.devices[0], + knl = lp.make_kernel(ctx.devices[0], "[n] -> {[i,j,k]: 0<=i,j,k<n}", [ "label: c[i, j] = sum_float32(k, cse(a[i, k], lhsmat)*cse(b[k, j], rhsmat))" @@ -264,7 +264,7 @@ def test_rank_one(ctx_factory): n = int(get_suitable_size(ctx)**(2.7/2)) - knl = lp.LoopKernel(ctx.devices[0], + knl = lp.make_kernel(ctx.devices[0], "[n] -> {[i,j]: 0<=i,j<n}", [ "label: c[i, j] = a[i]*b[j]" @@ -358,7 +358,7 @@ def test_troublesome_premagma_fermi_matrix_mul(ctx_factory): n = 6*16*2 - knl = lp.LoopKernel(ctx.devices[0], + knl = lp.make_kernel(ctx.devices[0], "{[i,j,k]: 0<=i,j,k<%d}" % n, [ "c[i, j] = sum_float32(k, a[i, k]*b[k, j])" @@ -412,7 +412,7 @@ def test_intel_matrix_mul(ctx_factory): n = 6*16 - knl = lp.LoopKernel(ctx.devices[0], + knl = lp.make_kernel(ctx.devices[0], "{[i,j,k]: 0<=i,j,k<%d}" % n, [ "c[i, j] = sum_float32(k, a[i, k]*b[k, j])" @@ -475,7 +475,7 @@ def test_magma_fermi_matrix_mul(ctx_factory): n = 6*16*16 - knl = lp.LoopKernel(ctx.devices[0], + knl = lp.make_kernel(ctx.devices[0], "{[i,j,k]: 0<=i,j,k<%d}" % n, [ "c[i, j] = a[i, k]*b[k, j]" @@ -537,7 +537,7 @@ def test_image_matrix_mul(ctx_factory): n = get_suitable_size(ctx) - knl = lp.LoopKernel(ctx.devices[0], + knl = lp.make_kernel(ctx.devices[0], "{[i,j,k]: 0<=i,j,k<%d}" % n, [ "c[i, j] = a[i, k]*b[k, j]" @@ -590,7 +590,7 @@ def test_image_matrix_mul_ilp(ctx_factory): n = get_suitable_size(ctx) - knl = lp.LoopKernel(ctx.devices[0], + knl = lp.make_kernel(ctx.devices[0], "{[i,j,k]: 0<=i,j,k<%d}" % n, [ "c[i, j] = a[i, k]*b[k, j]" @@ -649,7 +649,7 @@ def test_fancy_matrix_mul(ctx_factory): n = get_suitable_size(ctx) - knl = lp.LoopKernel(ctx.devices[0], + knl = lp.make_kernel(ctx.devices[0], "[n] -> {[i,j,k]: 0<=i,j,k<n }", [ "c[i, j] = a[i, k]*b[k, j]" @@ -713,7 +713,7 @@ def test_dg_matrix_mul(ctx_factory): fld_strides = (1, Np_padded) - knl = lp.LoopKernel(ctx.devices[0], + knl = lp.make_kernel(ctx.devices[0], "{[i,j,k]: 0<=i,j< %d and 0<=k<%d}" % (Np, K), [ (var(mn+"fld%d" % ifld)[i, k], -- GitLab