diff --git a/MEMO b/MEMO index 23865403a6a5b91d6b9e85b76f177ecceb38b09b..63851b21f95d44cf5726fa8dbc90e577db0a33b7 100644 --- a/MEMO +++ b/MEMO @@ -46,15 +46,13 @@ Things to consider To-do ^^^^^ -- Expose iname-duplicate-and-rename as a primitive. - - Kernel fusion - ExpandingIdentityMapper extract_subst -> needs WalkMapper - duplicate_inames join_inames padding + duplicate_inames [DONE] split_iname [DONE] CSE [DONE] @@ -142,6 +140,8 @@ Future ideas Dealt with ^^^^^^^^^^ +- Expose iname-duplicate-and-rename as a primitive. + - make sure simple side effects work - Loop bounds currently may not depend on parallel dimensions diff --git a/doc/reference.rst b/doc/reference.rst index 0131a14942ff53888468dddcb7728105e6c1aa9b..af2ea1bbeb6e7c1a90432753349b46aaa86e3750 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -161,6 +161,8 @@ Wrangling inames .. autofunction:: tag_inames +.. autofunction:: duplicate_inames + Dealing with Substitution Rules ------------------------------- diff --git a/loopy/__init__.py b/loopy/__init__.py index e06ffdbe3be8253b704c5fdd218d1442f45d690c..80afc44a7055a5daba827c17ca271c7022c26780 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -81,7 +81,7 @@ __all__ = ["ValueArg", "ScalarArg", "GlobalArg", "ArrayArg", "ConstantArg", "Ima "generate_code", "CompiledKernel", "auto_test_vs_ref", "check_kernels", "make_kernel", - "split_iname", "join_inames", "tag_inames", + "split_iname", "join_inames", "tag_inames", "duplicate_inames", "split_dimension", "join_dimensions", "tag_dimensions", "extract_subst", "expand_subst", "precompute", "add_prefetch", @@ -132,6 +132,9 @@ def split_iname(kernel, split_iname, inner_length, outer_tag=None, inner_tag=None, slabs=(0, 0), do_tagged_check=True, within=None): + """ + :arg within: a stack match as understood by :func:`loopy.context_matching.parse_stack_match`. + """ from loopy.context_matching import parse_stack_match within = parse_stack_match(within) @@ -197,9 +200,6 @@ def split_iname(kernel, split_iname, inner_length, # {{{ actually modify instructions - ins = _InameSplitter(kernel, within, - split_iname, outer_iname, inner_iname, new_loop_index) - new_insns = [] for insn in kernel.instructions: if split_iname in insn.forced_iname_deps: @@ -211,8 +211,6 @@ def split_iname(kernel, split_iname, inner_length, new_forced_iname_deps = insn.forced_iname_deps insn = insn.copy( - assignee=ins(insn.assignee, insn.id), - expression=ins(insn.expression, insn.id), forced_iname_deps=new_forced_iname_deps) new_insns.append(insn) @@ -222,19 +220,23 @@ def split_iname(kernel, split_iname, inner_length, iname_slab_increments = kernel.iname_slab_increments.copy() iname_slab_increments[outer_iname] = slabs - result = (kernel + kernel = (kernel .copy(domains=new_domains, iname_slab_increments=iname_slab_increments, - substitutions=ins.get_new_substitutions(), instructions=new_insns, applied_iname_rewrites=applied_iname_rewrites, )) + ins = _InameSplitter(kernel, within, + split_iname, outer_iname, inner_iname, new_loop_index) + + kernel = ins.map_kernel(kernel) + if existing_tag is not None: - result = tag_inames(result, + kernel = tag_inames(kernel, {outer_iname: existing_tag, inner_iname: existing_tag}) - return tag_inames(result, {outer_iname: outer_tag, inner_iname: inner_tag}) + return tag_inames(kernel, {outer_iname: outer_tag, inner_iname: inner_tag}) split_dimension = MovedFunctionDeprecationWrapper(split_iname) @@ -394,6 +396,118 @@ tag_dimensions = MovedFunctionDeprecationWrapper(tag_inames) # }}} +# {{{ duplicate inames + +class _InameDuplicator(ExpandingIdentityMapper): + def __init__(self, rules, make_unique_var_name, + old_to_new, within): + ExpandingIdentityMapper.__init__(self, + rules, make_unique_var_name) + + self.old_to_new = old_to_new + self.old_inames_set = set(old_to_new.iterkeys()) + self.within = within + + def map_reduction(self, expr, expn_state): + if set(expr.inames) & self.old_inames_set and self.within(expn_state.stack): + new_inames = tuple( + self.old_to_new.get(iname, iname) + for iname in expr.inames) + + from loopy.symbolic import Reduction + return Reduction(expr.operation, new_inames, + self.rec(expr.expr, expn_state)) + else: + return ExpandingIdentityMapper.map_reduction(self, expr, expn_state) + + def map_variable(self, expr, expn_state): + new_name = self.old_to_new.get(expr.name) + + if new_name is None or not self.within(expn_state.stack): + return ExpandingIdentityMapper.map_variable(self, expr, expn_state) + else: + from pymbolic import var + return var(new_name) + +def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, + tags={}): + """ + :arg within: a stack match as understood by :func:`loopy.context_matching.parse_stack_match`. + """ + + # {{{ normalize arguments, find unique new_inames + + if isinstance(inames, str): + inames = inames.split(",") + if isinstance(new_inames, str): + new_inames = new_inames.split(",") + + from loopy.context_matching import parse_stack_match + within = parse_stack_match(within) + + if new_inames is None: + new_inames = [None] * len(inames) + + if len(new_inames) != len(inames): + raise ValueError("new_inames must have the same number of entries as inames") + + name_gen = knl.get_var_name_generator() + + for i, iname in enumerate(inames): + new_iname = new_inames[i] + + if new_iname is None: + new_iname = iname + + if suffix is not None: + new_iname += suffix + + new_iname = name_gen(new_iname) + + else: + name_gen.add_name(new_iname) + raise ValueError("new iname '%s' conflicts with existing names" + % new_iname) + + new_inames[i] = new_iname + + # }}} + + # {{{ duplicate the inames + + for old_iname, new_iname in zip(inames, new_inames): + from loopy.kernel import DomainChanger + domch = DomainChanger(knl, frozenset([old_iname])) + + from loopy.isl_helpers import duplicate_axes + knl = knl.copy( + domains=domch.get_domains_with( + duplicate_axes(domch.domain, [old_iname], [new_iname]))) + # }}} + + # {{{ change the inames in the code + + indup = _InameDuplicator(knl.substitutions, name_gen, + old_to_new=dict(zip(inames, new_inames)), + within=within) + + knl = indup.map_kernel(knl) + + # }}} + + # {{{ realize tags + + for old_iname, new_iname in zip(inames, new_inames): + new_tag = tags.get(old_iname) + if new_tag is not None: + knl = tag_inames(knl, {new_iname: new_tag}) + + # }}} + + return knl + +# }}} + # {{{ convenience: add_prefetch # {{{ process footprint_subscripts @@ -572,7 +686,6 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, _process_footprint_subscripts( kernel, rule_name, sweep_inames, footprint_subscripts, arg) - new_kernel = precompute(kernel, subst_use, sweep_inames, new_storage_axis_names=dim_arg_names, default_tag=default_tag, dtype=arg.dtype) @@ -663,13 +776,6 @@ def change_arg_to_image(knl, name): # }}} -# {{{ duplicate inames - -def duplicate_inames(knl, inames): - pass - -# }}} - diff --git a/loopy/cse.py b/loopy/cse.py index 48ef6e3566c55837e9a7c4194cdaa3be064adae8..28518cb230372279e829f0fc30da582ceb45303f 100644 --- a/loopy/cse.py +++ b/loopy/cse.py @@ -173,6 +173,9 @@ def build_global_storage_to_sweep_map(kernel, invocation_descriptors, global_stor2sweep = isl.Map.from_basic_map(stor2sweep) global_stor2sweep = global_stor2sweep.intersect_range(domain_dup_sweep) + # space for global_stor2sweep: + # [stor_axes'] -> [domain](dup_sweep_index)[dup_sweep](rn) + # {{{ check if non-footprint-building invocation descriptors fall into footprint # Make all inames except the sweep parameters. (The footprint may depend on those.) @@ -506,7 +509,7 @@ class InvocationReplacer(ExpandingIdentityMapper): stor_subscript = [] for sax_name, sax_source, sax_base_idx in zip( self.storage_axis_names, - self.storage_axis_sources, + self.storage_axis_sources, self.storage_base_indices): if sax_name not in self.non1_storage_axis_names: continue @@ -640,10 +643,6 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, from loopy.context_matching import parse_stack_match within = parse_stack_match(within) - from loopy import infer_type - if dtype is None: - dtype = infer_type - # }}} # {{{ process invocations in footprint generators, start invocation_descriptors @@ -664,6 +663,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, invocation_descriptors.append( InvocationDescriptor(args=args, expands_footprint=True, + expansion_stack=None, )) # }}} @@ -858,12 +858,18 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, # {{{ set up temp variable + from loopy import infer_type + if dtype is None: + dtype = infer_type + else: + dtype = np.dtype(dtype) + from loopy.kernel import TemporaryVariable new_temporary_variables = kernel.temporary_variables.copy() temp_var = TemporaryVariable( name=target_var_name, - dtype=np.dtype(dtype), + dtype=dtype, base_indices=(0,)*len(non1_storage_shape), shape=non1_storage_shape, is_local=None) @@ -872,13 +878,13 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, # }}} - result = kernel.copy( + kernel = kernel.copy( domains=domch.get_domains_with(new_domain), instructions=[compute_insn] + kernel.instructions, temporary_variables=new_temporary_variables) from loopy import tag_inames - return tag_inames(result, new_iname_to_tag) + return tag_inames(kernel, new_iname_to_tag) diff --git a/loopy/kernel.py b/loopy/kernel.py index 014a4f6e302f7228b7cc7a9e3836ac730a8cf82e..c00c46d3c7c9c0c51621b4f05939c86ccbda28f4 100644 --- a/loopy/kernel.py +++ b/loopy/kernel.py @@ -637,12 +637,13 @@ class _UniqueNameGenerator: return name in self.existing_names def add_name(self, name): - assert name not in self.existing_names + if self.is_name_conflicting(name): + raise ValueError("name '%s' conflicts with existing names") self.existing_names.add(name) def add_names(self, names): - assert not frozenset(names) & self.existing_names - self.existing_names.update(names) + for name in names: + self.add_name(name) def __call__(self, based_on="var"): for var_name in _generate_unique_possibilities(based_on): @@ -1288,15 +1289,16 @@ class LoopKernel(Record): return self._get_inames_domain_backend(inames) @memoize_method - def get_leaf_domain_index(self, inames): - """Find the leaf of the domain tree needed to cover all inames.""" + def get_leaf_domain_indices(self, inames): + """Find the leaves of the domain tree needed to cover all inames.""" hdm = self._get_home_domain_map() ppd = self.all_parents_per_domain() domain_indices = set() - leaf_domain_index = None + # map root -> leaf + root_to_leaf = {} for iname in inames: home_domain_index = hdm[iname] @@ -1304,27 +1306,39 @@ class LoopKernel(Record): # nothin' new continue - leaf_domain_index = home_domain_index - - all_parents = set(ppd[home_domain_index]) - if not domain_indices <= all_parents: - raise CannotBranchDomainTree("iname set '%s' requires " - "branch in domain tree (when adding '%s')" - % (", ".join(inames), iname)) + domain_parents = [home_domain_index] + ppd[home_domain_index] + current_root = domain_parents[-1] + previous_leaf = root_to_leaf.get(current_root) + + if previous_leaf is not None: + # Check that we don't branch the domain tree. + # + # Branching the domain tree is dangerous/ill-formed because + # it can introduce artificial restrictions on variables + # further up the tree. + + prev_parents = set(ppd[previous_leaf]) + if not prev_parents <= set(domain_parents): + raise CannotBranchDomainTree("iname set '%s' requires " + "branch in domain tree (when adding '%s')" + % (", ".join(inames), iname)) + else: + # We're adding a new root. That's fine. + pass - domain_indices.add(home_domain_index) - domain_indices.update(all_parents) + root_to_leaf[current_root] = home_domain_index + domain_indices.update(domain_parents) - return leaf_domain_index + return root_to_leaf.values() @memoize_method def _get_inames_domain_backend(self, inames): - leaf_dom_idx = self.get_leaf_domain_index(inames) + domain_indices = set() + for leaf_dom_idx in self.get_leaf_domain_indices(inames): + domain_indices.add(leaf_dom_idx) + domain_indices.update(self.all_parents_per_domain()[leaf_dom_idx]) - return self.combine_domains(tuple(sorted( - self.all_parents_per_domain()[leaf_dom_idx] - + [leaf_dom_idx] - ))) + return self.combine_domains(tuple(sorted(domain_indices))) # }}} @@ -1824,7 +1838,13 @@ class DomainChanger: def __init__(self, kernel, inames): self.kernel = kernel if inames: - self.leaf_domain_index = kernel.get_leaf_domain_index(inames) + ldi = kernel.get_leaf_domain_indices(inames) + if len(ldi) > 1: + raise RuntimeError("Inames '%s' require more than one leaf " + "domain, which makes the domain change that is part " + "of your current operation ambiguous." % ", ".join(inames)) + + self.leaf_domain_index, = ldi self.domain = kernel.domains[self.leaf_domain_index] else: diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 860c9ec18692e6c8f75abac4e60f60e30edd8a06..fe7b458d8469c5c6a1a3c09470e34c43c5cd1e7a 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -269,6 +269,45 @@ class ExpansionState(Record): :ivar arg_context: a dict representing current argument values """ +class SubstitutionRuleRenamer(IdentityMapper): + def __init__(self, renames): + self.renames = renames + + def map_call(self, expr): + if not isinstance(expr.function, Variable): + return IdentityMapper.map_call(self, expr) + + name, tag = parse_tagged_name(expr.function) + + new_name = self.renames.get(name) + if new_name is None: + return IdentityMapper.map_call(self, expr) + + if tag is None: + sym = Variable(new_name) + else: + sym = TaggedVariable(new_name, tag) + + return type(expr)(sym, tuple(self.rec(child) for child in expr.parameters)) + + def map_variable(self, expr): + name, tag = parse_tagged_name(expr) + + new_name = self.renames.get(name) + if new_name is None: + return IdentityMapper.map_variable(self, expr) + + if tag is None: + return Variable(new_name) + else: + return TaggedVariable(new_name, tag) + +def rename_subst_rules_in_instructions(insns, renames): + subst_renamer = SubstitutionRuleRenamer(renames) + return [ + insn.copy(expression=subst_renamer(insn.expression)) + for insn in insns] + class ExpandingIdentityMapper(IdentityMapper): """Note: the third argument dragged around by this mapper is the current expansion expansion state. @@ -278,26 +317,26 @@ class ExpandingIdentityMapper(IdentityMapper): self.old_subst_rules = old_subst_rules self.make_unique_var_name = make_unique_var_name - # maps subst rule (args, bodies) to names + # maps subst rule (args, bodies) to (names, original_name) self.subst_rule_registry = dict( - ((rule.arguments, rule.expression), name) + ((rule.arguments, rule.expression), (name, name)) for name, rule in old_subst_rules.iteritems()) # maps subst rule (args, bodies) to use counts self.subst_rule_use_count = {} - def register_subst_rule(self, name, args, body): + def register_subst_rule(self, original_name, args, body): """Returns a name (as a string) for a newly created substitution rule. """ key = (args, body) - existing_name = self.subst_rule_registry.get(key) + reg_value = self.subst_rule_registry.get(key) - if existing_name is None: - new_name = self.make_unique_var_name(name) - self.subst_rule_registry[key] = new_name + if reg_value is None: + new_name = self.make_unique_var_name(original_name) + self.subst_rule_registry[key] = (new_name, original_name) else: - new_name = existing_name + new_name, _ = reg_value self.subst_rule_use_count[key] = self.subst_rule_use_count.get(key, 0) + 1 return new_name @@ -364,32 +403,76 @@ class ExpandingIdentityMapper(IdentityMapper): return IdentityMapper.__call__(self, expr, ExpansionState( stack=stack, arg_context={})) - def get_new_substitutions(self): + def _get_new_substitutions_and_renames(self): + """This makes a new dictionary of substitutions from the ones + encountered in mapping all the encountered expressions. + It tries hard to keep substitution names the same--i.e. + if all derivative versions of a substitution rule ended + up with the same mapped version, then this version should + retain the name that the substitution rule had previously. + Unfortunately, this can't be done in a single pass, and so + the routine returns an additional dictionary *subst_renames* + of renamings to be performed on the processed expressions. + + The returned substitutions already have the rename applied + to them. + + :returns: (new_substitutions, subst_renames) + """ + from loopy.kernel import SubstitutionRule + orig_name_histogram = {} + for key, (name, orig_name) in self.subst_rule_registry.iteritems(): + if self.subst_rule_use_count.get(key, 0): + orig_name_histogram[orig_name] = \ + orig_name_histogram.get(orig_name, 0) + 1 + result = {} - for key, name in self.subst_rule_registry.iteritems(): + renames = {} + + for key, (name, orig_name) in self.subst_rule_registry.iteritems(): args, body = key if self.subst_rule_use_count.get(key, 0): + if orig_name_histogram[orig_name] == 1 and name != orig_name: + renames[name] = orig_name + name = orig_name + result[name] = SubstitutionRule( name=name, arguments=args, expression=body) - return result + # {{{ perform renames on new substitutions + + subst_renamer = SubstitutionRuleRenamer(renames) + + renamed_result = {} + for name, rule in result.iteritems(): + renamed_result[name] = rule.copy( + expression=subst_renamer(rule.expression)) + + # }}} + + return renamed_result, renames def map_kernel(self, kernel): new_insns = [ insn.copy( + # While subst rules are not allowed in assignees, the mapper + # may perform tasks entirely unrelated to subst rules, so + # we must map assignees, too. assignee=self(insn.assignee, insn.id), + expression=self(insn.expression, insn.id)) + for insn in kernel.instructions] + new_substs, renames = self._get_new_substitutions_and_renames() return kernel.copy( - substitutions=self.get_new_substitutions(), - instructions=new_insns) - + substitutions=new_substs, + instructions=rename_subst_rules_in_instructions(new_insns, renames)) # }}} diff --git a/test/test_sem_reagan.py b/test/test_sem_reagan.py index b536cc94a86cfba377210fa742830d96e3f69a2f..8ff5bf8acfe1c0197f3177d892b0193155cbb2f4 100644 --- a/test/test_sem_reagan.py +++ b/test/test_sem_reagan.py @@ -51,8 +51,8 @@ def test_tim2d(ctx_factory): knl = lp.make_kernel(ctx.devices[0], "[K] -> {[i,j,e,m,o,gi]: 0<=i,j,m,o<%d and 0<=e<K and 0<=gi<3}" % n, [ - "ur(a,b) := sum(@o, D[a,o]*u[e,o,b])", - "us(a,b) := sum(@o, D[b,o]*u[e,a,o])", + "ur(a,b) := sum(o, D[a,o]*u[e,o,b])", + "us(a,b) := sum(o, D[b,o]*u[e,a,o])", #"Gu(mat_entry,a,b) := G[mat_entry,e,m,j]*ur(m,j)", @@ -74,6 +74,9 @@ def test_tim2d(ctx_factory): ], name="semlap2D", assumptions="K>=1") + knl = lp.duplicate_inames(knl, "o", within="ur") + knl = lp.duplicate_inames(knl, "o", within="us") + seq_knl = knl def variant_orig(knl): @@ -82,11 +85,11 @@ def test_tim2d(ctx_factory): knl = lp.add_prefetch(knl, "D[:,:]") knl = lp.add_prefetch(knl, "u[e, :, :]") - knl = lp.precompute(knl, "ur(m,j)", np.float32, ["m", "j"]) - knl = lp.precompute(knl, "us(i,m)", np.float32, ["i", "m"]) + knl = lp.precompute(knl, "ur(m,j)", ["m", "j"]) + knl = lp.precompute(knl, "us(i,m)", ["i", "m"]) - knl = lp.precompute(knl, "Gux(m,j)", np.float32, ["m", "j"]) - knl = lp.precompute(knl, "Guy(i,m)", np.float32, ["i", "m"]) + knl = lp.precompute(knl, "Gux(m,j)", ["m", "j"]) + knl = lp.precompute(knl, "Guy(i,m)", ["i", "m"]) knl = lp.add_prefetch(knl, "G$x[:,e,:,:]") knl = lp.add_prefetch(knl, "G$y[:,e,:,:]")