diff --git a/loopy/__init__.py b/loopy/__init__.py index eb1e6b31a1907d253a8d97b621818d23d1e9b60a..8b9fca61318af53a3ed6ce835f83e37582647ae1 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -27,7 +27,6 @@ import six from six.moves import range, zip import islpy as isl -from islpy import dim_type from loopy.symbolic import (RuleAwareIdentityMapper, RuleAwareSubstitutionMapper, SubstitutionRuleMappingContext, @@ -54,1378 +53,143 @@ from loopy.kernel.tools import ( add_and_infer_dtypes) from loopy.kernel.creation import make_kernel, UniqueName from loopy.library.reduction import register_reduction_parser -from loopy.subst import extract_subst, expand_subst, assignment_to_subst -from loopy.precompute import precompute -from loopy.buffer import buffer_array -from loopy.fusion import fuse_kernels -from loopy.padding import (split_array_dim, split_arg_axis, find_padding_multiple, - add_padding) -from loopy.preprocess import (preprocess_kernel, realize_reduction, - infer_unknown_types) -from loopy.schedule import generate_loop_schedules, get_one_scheduled_kernel -from loopy.statistics import (get_op_poly, get_gmem_access_poly, - get_DRAM_access_poly, get_barrier_poly, stringify_stats_mapping, - sum_mem_access_to_bytes) -from loopy.codegen import generate_code, generate_body -from loopy.compiled import CompiledKernel -from loopy.options import Options -from loopy.auto_test import auto_test_vs_ref -from loopy.frontend.fortran import (c_preprocess, parse_transformed_fortran, - parse_fortran) - -__all__ = [ - "TaggedVariable", "Reduction", "LinearSubscript", - - "auto", - - "LoopKernel", - - "ValueArg", "ScalarArg", "GlobalArg", "ArrayArg", "ConstantArg", "ImageArg", - "TemporaryVariable", - - "InstructionBase", "ExpressionInstruction", "CInstruction", - - "default_function_mangler", "single_arg_function_mangler", - - "make_kernel", "UniqueName", - - "register_reduction_parser", - - "extract_subst", "expand_subst", "assignment_to_subst", - "precompute", "buffer_array", - "fuse_kernels", - "split_array_dim", "split_arg_axis", "find_padding_multiple", - "add_padding", - - "get_dot_dependency_graph", - "show_dependency_graph", - "add_dtypes", - "infer_argument_dtypes", "add_and_infer_dtypes", - - "preprocess_kernel", "realize_reduction", "infer_unknown_types", - "generate_loop_schedules", "get_one_scheduled_kernel", - "generate_code", "generate_body", - - "get_op_poly", "get_gmem_access_poly", "get_DRAM_access_poly", - "get_barrier_poly", "stringify_stats_mapping", "sum_mem_access_to_bytes", - - "CompiledKernel", - - "auto_test_vs_ref", - - "Options", - - "make_kernel", - "c_preprocess", "parse_transformed_fortran", "parse_fortran", - - "LoopyError", "LoopyWarning", - - # {{{ from this file - - "split_iname", "join_inames", "tag_inames", "duplicate_inames", - "rename_iname", "link_inames", "remove_unused_inames", - "set_loop_priority", "add_prefetch" - "find_instructions", "map_instructions", - "set_instruction_priority", "add_dependency", - "change_arg_to_image", "tag_data_axes", - "split_reduction_inward", "split_reduction_outward", - "fix_parameters", - "register_preamble_generators", - "register_symbol_manglers", - "register_function_manglers", - - "set_caching_enabled", - "CacheMode", - "make_copy_kernel", - "set_argument_order", - "affine_map_inames", - "fold_constants", - "tag_instructions", - "alias_temporaries", - "to_batched", - "remove_unused_arguments", - "find_rules_matching", - "find_one_rule_matching", - "realize_ilp", - "set_array_dim_names", - - # }}} - ] - - -# }}} - - -# {{{ split inames - -class _InameSplitter(RuleAwareIdentityMapper): - def __init__(self, rule_mapping_context, within, - split_iname, outer_iname, inner_iname, replacement_index): - super(_InameSplitter, self).__init__(rule_mapping_context) - - self.within = within - - self.split_iname = split_iname - self.outer_iname = outer_iname - self.inner_iname = inner_iname - - self.replacement_index = replacement_index - - def map_reduction(self, expr, expn_state): - if (self.split_iname in expr.inames - and self.split_iname not in expn_state.arg_context - and self.within( - expn_state.kernel, - expn_state.instruction, - expn_state.stack)): - new_inames = list(expr.inames) - new_inames.remove(self.split_iname) - new_inames.extend([self.outer_iname, self.inner_iname]) - - from loopy.symbolic import Reduction - return Reduction(expr.operation, tuple(new_inames), - self.rec(expr.expr, expn_state)) - else: - return super(_InameSplitter, self).map_reduction(expr, expn_state) - - def map_variable(self, expr, expn_state): - if (expr.name == self.split_iname - and self.split_iname not in expn_state.arg_context - and self.within( - expn_state.kernel, - expn_state.instruction, - expn_state.stack)): - return self.replacement_index - else: - return super(_InameSplitter, self).map_variable(expr, expn_state) - - -def split_iname(kernel, split_iname, inner_length, - outer_iname=None, inner_iname=None, - outer_tag=None, inner_tag=None, - slabs=(0, 0), do_tagged_check=True, - within=None): - """ - :arg within: a stack match as understood by - :func:`loopy.context_matching.parse_stack_match`. - """ - - existing_tag = kernel.iname_to_tag.get(split_iname) - from loopy.kernel.data import ForceSequentialTag - if do_tagged_check and ( - existing_tag is not None - and not isinstance(existing_tag, ForceSequentialTag)): - raise LoopyError("cannot split already tagged iname '%s'" % split_iname) - - if split_iname not in kernel.all_inames(): - raise ValueError("cannot split loop for unknown variable '%s'" % split_iname) - - applied_iname_rewrites = kernel.applied_iname_rewrites[:] - - vng = kernel.get_var_name_generator() - - if outer_iname is None: - outer_iname = vng(split_iname+"_outer") - if inner_iname is None: - inner_iname = vng(split_iname+"_inner") - - def process_set(s): - var_dict = s.get_var_dict() - - if split_iname not in var_dict: - return s - - orig_dim_type, _ = var_dict[split_iname] - - outer_var_nr = s.dim(orig_dim_type) - inner_var_nr = s.dim(orig_dim_type)+1 - - s = s.add_dims(orig_dim_type, 2) - s = s.set_dim_name(orig_dim_type, outer_var_nr, outer_iname) - s = s.set_dim_name(orig_dim_type, inner_var_nr, inner_iname) - - from loopy.isl_helpers import make_slab - - space = s.get_space() - inner_constraint_set = ( - make_slab(space, inner_iname, 0, inner_length) - # name = inner + length*outer - .add_constraint(isl.Constraint.eq_from_names( - space, { - split_iname: 1, - inner_iname: -1, - outer_iname: -inner_length}))) - - name_dim_type, name_idx = space.get_var_dict()[split_iname] - s = s.intersect(inner_constraint_set) - - if within is None: - s = s.project_out(name_dim_type, name_idx, 1) - - return s - - new_domains = [process_set(dom) for dom in kernel.domains] - - from pymbolic import var - inner = var(inner_iname) - outer = var(outer_iname) - new_loop_index = inner + outer*inner_length - - subst_map = {var(split_iname): new_loop_index} - applied_iname_rewrites.append(subst_map) - - # {{{ update forced_iname deps - - new_insns = [] - for insn in kernel.instructions: - if split_iname in insn.forced_iname_deps: - new_forced_iname_deps = ( - (insn.forced_iname_deps.copy() - - frozenset([split_iname])) - | frozenset([outer_iname, inner_iname])) - else: - new_forced_iname_deps = insn.forced_iname_deps - - insn = insn.copy( - forced_iname_deps=new_forced_iname_deps) - - new_insns.append(insn) - - # }}} - - iname_slab_increments = kernel.iname_slab_increments.copy() - iname_slab_increments[outer_iname] = slabs - - new_loop_priority = [] - for prio_iname in kernel.loop_priority: - if prio_iname == split_iname: - new_loop_priority.append(outer_iname) - new_loop_priority.append(inner_iname) - else: - new_loop_priority.append(prio_iname) - - kernel = kernel.copy( - domains=new_domains, - iname_slab_increments=iname_slab_increments, - instructions=new_insns, - applied_iname_rewrites=applied_iname_rewrites, - loop_priority=new_loop_priority) - - from loopy.context_matching import parse_stack_match - within = parse_stack_match(within) - - rule_mapping_context = SubstitutionRuleMappingContext( - kernel.substitutions, kernel.get_var_name_generator()) - ins = _InameSplitter(rule_mapping_context, within, - split_iname, outer_iname, inner_iname, new_loop_index) - - kernel = ins.map_kernel(kernel) - kernel = rule_mapping_context.finish_kernel(kernel) - - if existing_tag is not None: - kernel = tag_inames(kernel, - {outer_iname: existing_tag, inner_iname: existing_tag}) - - return tag_inames(kernel, {outer_iname: outer_tag, inner_iname: inner_tag}) - -# }}} - - -# {{{ join inames - -class _InameJoiner(RuleAwareSubstitutionMapper): - def __init__(self, rule_mapping_context, within, subst_func, - joined_inames, new_iname): - super(_InameJoiner, self).__init__(rule_mapping_context, - subst_func, within) - - self.joined_inames = set(joined_inames) - self.new_iname = new_iname - - def map_reduction(self, expr, expn_state): - expr_inames = set(expr.inames) - overlap = (self.join_inames & expr_inames - - set(expn_state.arg_context)) - if overlap and self.within( - expn_state.kernel, - expn_state.instruction, - expn_state.stack): - if overlap != expr_inames: - raise LoopyError( - "Cannot join inames '%s' if there is a reduction " - "that does not use all of the inames being joined. " - "(Found one with just '%s'.)" - % ( - ", ".join(self.joined_inames), - ", ".join(expr_inames))) - - new_inames = expr_inames - self.joined_inames - new_inames.add(self.new_iname) - - from loopy.symbolic import Reduction - return Reduction(expr.operation, tuple(new_inames), - self.rec(expr.expr, expn_state)) - else: - return super(_InameJoiner, self).map_reduction(expr, expn_state) - - -def join_inames(kernel, inames, new_iname=None, tag=None, within=None): - """ - :arg inames: fastest varying last - :arg within: a stack match as understood by - :func:`loopy.context_matching.parse_stack_match`. - """ - - # now fastest varying first - inames = inames[::-1] - - if new_iname is None: - new_iname = kernel.get_var_name_generator()("_and_".join(inames)) - - from loopy.kernel.tools import DomainChanger - domch = DomainChanger(kernel, frozenset(inames)) - for iname in inames: - if kernel.get_home_domain_index(iname) != domch.leaf_domain_index: - raise LoopyError("iname '%s' is not 'at home' in the " - "join's leaf domain" % iname) - - new_domain = domch.domain - new_dim_idx = new_domain.dim(dim_type.set) - new_domain = new_domain.add_dims(dim_type.set, 1) - new_domain = new_domain.set_dim_name(dim_type.set, new_dim_idx, new_iname) - - joint_aff = zero = isl.Aff.zero_on_domain(new_domain.space) - subst_dict = {} - base_divisor = 1 - - from pymbolic import var - - for i, iname in enumerate(inames): - iname_dt, iname_idx = zero.get_space().get_var_dict()[iname] - iname_aff = zero.add_coefficient_val(iname_dt, iname_idx, 1) - - joint_aff = joint_aff + base_divisor*iname_aff - - bounds = kernel.get_iname_bounds(iname, constants_only=True) - - from loopy.isl_helpers import ( - static_max_of_pw_aff, static_value_of_pw_aff) - from loopy.symbolic import pw_aff_to_expr - - length = int(pw_aff_to_expr( - static_max_of_pw_aff(bounds.size, constants_only=True))) - - try: - lower_bound_aff = static_value_of_pw_aff( - bounds.lower_bound_pw_aff.coalesce(), - constants_only=False) - except Exception as e: - raise type(e)("while finding lower bound of '%s': " % iname) - - my_val = var(new_iname) // base_divisor - if i+1 < len(inames): - my_val %= length - my_val += pw_aff_to_expr(lower_bound_aff) - subst_dict[iname] = my_val - - base_divisor *= length - - from loopy.isl_helpers import iname_rel_aff - new_domain = new_domain.add_constraint( - isl.Constraint.equality_from_aff( - iname_rel_aff(new_domain.get_space(), new_iname, "==", joint_aff))) - - for i, iname in enumerate(inames): - iname_to_dim = new_domain.get_space().get_var_dict() - iname_dt, iname_idx = iname_to_dim[iname] - - if within is None: - new_domain = new_domain.project_out(iname_dt, iname_idx, 1) - - def subst_forced_iname_deps(fid): - result = set() - for iname in fid: - if iname in inames: - result.add(new_iname) - else: - result.add(iname) - - return frozenset(result) - - new_insns = [ - insn.copy( - forced_iname_deps=subst_forced_iname_deps(insn.forced_iname_deps)) - for insn in kernel.instructions] - - kernel = (kernel - .copy( - instructions=new_insns, - domains=domch.get_domains_with(new_domain), - applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_dict] - )) - - from loopy.context_matching import parse_stack_match - within = parse_stack_match(within) - - from pymbolic.mapper.substitutor import make_subst_func - rule_mapping_context = SubstitutionRuleMappingContext( - kernel.substitutions, kernel.get_var_name_generator()) - ijoin = _InameJoiner(rule_mapping_context, within, - make_subst_func(subst_dict), - inames, new_iname) - - kernel = rule_mapping_context.finish_kernel( - ijoin.map_kernel(kernel)) - - if tag is not None: - kernel = tag_inames(kernel, {new_iname: tag}) - - return kernel - -# }}} - - -# {{{ tag inames - -def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): - from loopy.kernel.data import parse_tag - - iname_to_tag = dict((iname, parse_tag(tag)) - for iname, tag in six.iteritems(iname_to_tag)) - - from loopy.kernel.data import (ParallelTag, AutoLocalIndexTagBase, - ForceSequentialTag) - - new_iname_to_tag = kernel.iname_to_tag.copy() - for iname, new_tag in six.iteritems(iname_to_tag): - if iname not in kernel.all_inames(): - if ignore_nonexistent: - continue - else: - raise LoopyError("iname '%s' does not exist" % iname) - - old_tag = kernel.iname_to_tag.get(iname) - - retag_ok = False - - if isinstance(old_tag, (AutoLocalIndexTagBase, ForceSequentialTag)): - retag_ok = True - - if not retag_ok and old_tag is not None and new_tag is None: - raise ValueError("cannot untag iname '%s'" % iname) - - if iname not in kernel.all_inames(): - raise ValueError("cannot tag '%s'--not known" % iname) - - if isinstance(new_tag, ParallelTag) \ - and isinstance(old_tag, ForceSequentialTag): - raise ValueError("cannot tag '%s' as parallel--" - "iname requires sequential execution" % iname) - - if isinstance(new_tag, ForceSequentialTag) \ - and isinstance(old_tag, ParallelTag): - raise ValueError("'%s' is already tagged as parallel, " - "but is now prohibited from being parallel " - "(likely because of participation in a precompute or " - "a reduction)" % iname) - - if (not retag_ok) and (not force) \ - and old_tag is not None and (old_tag != new_tag): - raise LoopyError("'%s' is already tagged '%s'--cannot retag" - % (iname, old_tag)) - - new_iname_to_tag[iname] = new_tag - - return kernel.copy(iname_to_tag=new_iname_to_tag) - -# }}} - - -# {{{ duplicate inames - -class _InameDuplicator(RuleAwareIdentityMapper): - def __init__(self, rule_mapping_context, - old_to_new, within): - super(_InameDuplicator, self).__init__(rule_mapping_context) - - self.old_to_new = old_to_new - self.old_inames_set = set(six.iterkeys(old_to_new)) - self.within = within - - def map_reduction(self, expr, expn_state): - if (set(expr.inames) & self.old_inames_set - and self.within( - expn_state.kernel, - expn_state.instruction, - expn_state.stack)): - new_inames = tuple( - self.old_to_new.get(iname, iname) - if iname not in expn_state.arg_context - else iname - for iname in expr.inames) - - from loopy.symbolic import Reduction - return Reduction(expr.operation, new_inames, - self.rec(expr.expr, expn_state)) - else: - return super(_InameDuplicator, self).map_reduction(expr, expn_state) - - def map_variable(self, expr, expn_state): - new_name = self.old_to_new.get(expr.name) - - if (new_name is None - or expr.name in expn_state.arg_context - or not self.within( - expn_state.kernel, - expn_state.instruction, - expn_state.stack)): - return super(_InameDuplicator, self).map_variable(expr, expn_state) - else: - from pymbolic import var - return var(new_name) - - def map_instruction(self, kernel, insn): - if not self.within(kernel, insn, ()): - return insn - - new_fid = frozenset( - self.old_to_new.get(iname, iname) - for iname in insn.forced_iname_deps) - return insn.copy(forced_iname_deps=new_fid) - - -def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, - tags={}): - """ - :arg within: a stack match as understood by - :func:`loopy.context_matching.parse_stack_match`. - """ - - # {{{ normalize arguments, find unique new_inames - - if isinstance(inames, str): - inames = [iname.strip() for iname in inames.split(",")] - - if isinstance(new_inames, str): - new_inames = [iname.strip() for iname in new_inames.split(",")] - - from loopy.context_matching import parse_stack_match - within = parse_stack_match(within) - - if new_inames is None: - new_inames = [None] * len(inames) - - if len(new_inames) != len(inames): - raise ValueError("new_inames must have the same number of entries as inames") - - name_gen = knl.get_var_name_generator() - - for i, iname in enumerate(inames): - new_iname = new_inames[i] - - if new_iname is None: - new_iname = iname - - if suffix is not None: - new_iname += suffix - - new_iname = name_gen(new_iname) - - else: - if name_gen.is_name_conflicting(new_iname): - raise ValueError("new iname '%s' conflicts with existing names" - % new_iname) - - name_gen.add_name(new_iname) - - new_inames[i] = new_iname - - # }}} - - # {{{ duplicate the inames - - for old_iname, new_iname in zip(inames, new_inames): - from loopy.kernel.tools import DomainChanger - domch = DomainChanger(knl, frozenset([old_iname])) - - from loopy.isl_helpers import duplicate_axes - knl = knl.copy( - domains=domch.get_domains_with( - duplicate_axes(domch.domain, [old_iname], [new_iname]))) - - # }}} - - # {{{ change the inames in the code - - rule_mapping_context = SubstitutionRuleMappingContext( - knl.substitutions, name_gen) - indup = _InameDuplicator(rule_mapping_context, - old_to_new=dict(list(zip(inames, new_inames))), - within=within) - - knl = rule_mapping_context.finish_kernel( - indup.map_kernel(knl)) - - # }}} - - # {{{ realize tags - - for old_iname, new_iname in zip(inames, new_inames): - new_tag = tags.get(old_iname) - if new_tag is not None: - knl = tag_inames(knl, {new_iname: new_tag}) - - # }}} - - return knl - -# }}} - - -# {{{ rename_inames - -def rename_iname(knl, old_iname, new_iname, existing_ok=False, within=None): - """ - :arg within: a stack match as understood by - :func:`loopy.context_matching.parse_stack_match`. - :arg existing_ok: execute even if *new_iname* already exists - """ - - var_name_gen = knl.get_var_name_generator() - - does_exist = var_name_gen.is_name_conflicting(new_iname) - - if does_exist and not existing_ok: - raise ValueError("iname '%s' conflicts with an existing identifier" - "--cannot rename" % new_iname) - - if does_exist: - # {{{ check that the domains match up - - dom = knl.get_inames_domain(frozenset((old_iname, new_iname))) - - var_dict = dom.get_var_dict() - _, old_idx = var_dict[old_iname] - _, new_idx = var_dict[new_iname] - - par_idx = dom.dim(dim_type.param) - dom_old = dom.move_dims( - dim_type.param, par_idx, dim_type.set, old_idx, 1) - dom_old = dom_old.move_dims( - dim_type.set, dom_old.dim(dim_type.set), dim_type.param, par_idx, 1) - dom_old = dom_old.project_out( - dim_type.set, new_idx if new_idx < old_idx else new_idx - 1, 1) - - par_idx = dom.dim(dim_type.param) - dom_new = dom.move_dims( - dim_type.param, par_idx, dim_type.set, new_idx, 1) - dom_new = dom_new.move_dims( - dim_type.set, dom_new.dim(dim_type.set), dim_type.param, par_idx, 1) - dom_new = dom_new.project_out( - dim_type.set, old_idx if old_idx < new_idx else old_idx - 1, 1) - - if not (dom_old <= dom_new and dom_new <= dom_old): - raise LoopyError( - "inames {old} and {new} do not iterate over the same domain" - .format(old=old_iname, new=new_iname)) - - # }}} - - from pymbolic import var - subst_dict = {old_iname: var(new_iname)} - - from loopy.context_matching import parse_stack_match - within = parse_stack_match(within) - - from pymbolic.mapper.substitutor import make_subst_func - rule_mapping_context = SubstitutionRuleMappingContext( - knl.substitutions, var_name_gen) - ijoin = RuleAwareSubstitutionMapper(rule_mapping_context, - make_subst_func(subst_dict), within) - - knl = rule_mapping_context.finish_kernel( - ijoin.map_kernel(knl)) - - new_instructions = [] - for insn in knl.instructions: - if (old_iname in insn.forced_iname_deps - and within(knl, insn, ())): - insn = insn.copy( - forced_iname_deps=( - (insn.forced_iname_deps - frozenset([old_iname])) - | frozenset([new_iname]))) - - new_instructions.append(insn) - - knl = knl.copy(instructions=new_instructions) - - else: - knl = duplicate_inames( - knl, [old_iname], within=within, new_inames=[new_iname]) - - knl = remove_unused_inames(knl, [old_iname]) - - return knl - -# }}} - - -# {{{ link inames - -def link_inames(knl, inames, new_iname, within=None, tag=None): - # {{{ normalize arguments - - if isinstance(inames, str): - inames = inames.split(",") - - var_name_gen = knl.get_var_name_generator() - new_iname = var_name_gen(new_iname) - - # }}} - - # {{{ ensure that each iname is used at most once in each instruction - - inames_set = set(inames) - - if 0: - # FIXME! - for insn in knl.instructions: - insn_inames = knl.insn_inames(insn.id) | insn.reduction_inames() - - if len(insn_inames & inames_set) > 1: - raise LoopyError("To-be-linked inames '%s' are used in " - "instruction '%s'. No more than one such iname can " - "be used in one instruction." - % (", ".join(insn_inames & inames_set), insn.id)) - - # }}} - - from loopy.kernel.tools import DomainChanger - domch = DomainChanger(knl, tuple(inames)) - - # {{{ ensure that projections are identical - - unrelated_dom_inames = list( - set(domch.domain.get_var_names(dim_type.set)) - - inames_set) - - domain = domch.domain - - # move all inames to be linked to end to prevent shuffly confusion - for iname in inames: - dt, index = domain.get_var_dict()[iname] - assert dt == dim_type.set - - # move to tail of param dim_type - domain = domain.move_dims( - dim_type.param, domain.dim(dim_type.param), - dt, index, 1) - # move to tail of set dim_type - domain = domain.move_dims( - dim_type.set, domain.dim(dim_type.set), - dim_type.param, domain.dim(dim_type.param)-1, 1) - - projections = [ - domch.domain.project_out_except( - unrelated_dom_inames + [iname], [dim_type.set]) - for iname in inames] - - all_equal = True - first_proj = projections[0] - for proj in projections[1:]: - all_equal = all_equal and (proj <= first_proj and first_proj <= proj) - - if not all_equal: - raise LoopyError("Inames cannot be linked because their domain " - "constraints are not the same.") - - del domain # messed up for testing, do not use - - # }}} - - # change the domain - from loopy.isl_helpers import duplicate_axes - knl = knl.copy( - domains=domch.get_domains_with( - duplicate_axes(domch.domain, [inames[0]], [new_iname]))) - - # {{{ change the code - - from pymbolic import var - subst_dict = dict((iname, var(new_iname)) for iname in inames) - - from loopy.context_matching import parse_stack_match - within = parse_stack_match(within) - - from pymbolic.mapper.substitutor import make_subst_func - rule_mapping_context = SubstitutionRuleMappingContext( - knl.substitutions, var_name_gen) - ijoin = RuleAwareSubstitutionMapper(rule_mapping_context, - make_subst_func(subst_dict), within) - - knl = rule_mapping_context.finish_kernel( - ijoin.map_kernel(knl)) - - # }}} - - knl = remove_unused_inames(knl, inames) - - if tag is not None: - knl = tag_inames(knl, {new_iname: tag}) - - return knl - -# }}} - - -# {{{ remove unused inames - -def remove_unused_inames(knl, inames=None): - """Delete those among *inames* that are unused, i.e. project them - out of the domain. If these inames pose implicit restrictions on - other inames, these restrictions will persist as existentially - quantified variables. - - :arg inames: may be an iterable of inames or a string of comma-separated inames. - """ - - # {{{ normalize arguments - - if inames is None: - inames = knl.all_inames() - elif isinstance(inames, str): - inames = inames.split(",") - - # }}} - - # {{{ check which inames are unused - - inames = set(inames) - used_inames = set() - for insn in knl.instructions: - used_inames.update(knl.insn_inames(insn.id)) - - unused_inames = inames - used_inames - - # }}} - - # {{{ remove them - - from loopy.kernel.tools import DomainChanger - - for iname in unused_inames: - domch = DomainChanger(knl, (iname,)) - - dom = domch.domain - dt, idx = dom.get_var_dict()[iname] - dom = dom.project_out(dt, idx, 1) - - knl = knl.copy(domains=domch.get_domains_with(dom)) - - # }}} - - return knl - -# }}} - - -# {{{ set loop priority - -def set_loop_priority(kernel, loop_priority): - """Indicates the textual order in which loops should be entered in the - kernel code. Note that this priority has an advisory role only. If the - kernel logically requires a different nesting, priority is ignored. - Priority is only considered if loop nesting is ambiguous. - - :arg: an iterable of inames, or, for brevity, a comma-seaprated string of - inames - """ - - if isinstance(loop_priority, str): - loop_priority = [s.strip() for s in loop_priority.split(",")] - - return kernel.copy(loop_priority=loop_priority) - -# }}} - - -# {{{ convenience: add_prefetch - -# {{{ process footprint_subscripts - -def _add_kernel_axis(kernel, axis_name, start, stop, base_inames): - from loopy.kernel.tools import DomainChanger - domch = DomainChanger(kernel, base_inames) - - domain = domch.domain - new_dim_idx = domain.dim(dim_type.set) - domain = (domain - .insert_dims(dim_type.set, new_dim_idx, 1) - .set_dim_name(dim_type.set, new_dim_idx, axis_name)) - - from loopy.symbolic import get_dependencies - deps = get_dependencies(start) | get_dependencies(stop) - assert deps <= kernel.all_params() - - param_names = domain.get_var_names(dim_type.param) - for dep in deps: - if dep not in param_names: - new_dim_idx = domain.dim(dim_type.param) - domain = (domain - .insert_dims(dim_type.param, new_dim_idx, 1) - .set_dim_name(dim_type.param, new_dim_idx, dep)) - - from loopy.isl_helpers import make_slab - slab = make_slab(domain.get_space(), axis_name, start, stop) - - domain = domain & slab - - return kernel.copy(domains=domch.get_domains_with(domain)) - - -def _process_footprint_subscripts(kernel, rule_name, sweep_inames, - footprint_subscripts, arg): - """Track applied iname rewrites, deal with slice specifiers ':'.""" - - name_gen = kernel.get_var_name_generator() - - from pymbolic.primitives import Variable - - if footprint_subscripts is None: - return kernel, rule_name, sweep_inames, [] - - if not isinstance(footprint_subscripts, (list, tuple)): - footprint_subscripts = [footprint_subscripts] - - inames_to_be_removed = [] - - new_footprint_subscripts = [] - for fsub in footprint_subscripts: - if isinstance(fsub, str): - from loopy.symbolic import parse - fsub = parse(fsub) - - if not isinstance(fsub, tuple): - fsub = (fsub,) - - if len(fsub) != arg.num_user_axes(): - raise ValueError("sweep index '%s' has the wrong number of dimensions" - % str(fsub)) - - for subst_map in kernel.applied_iname_rewrites: - from loopy.symbolic import SubstitutionMapper - from pymbolic.mapper.substitutor import make_subst_func - fsub = SubstitutionMapper(make_subst_func(subst_map))(fsub) - - from loopy.symbolic import get_dependencies - fsub_dependencies = get_dependencies(fsub) - - new_fsub = [] - for axis_nr, fsub_axis in enumerate(fsub): - from pymbolic.primitives import Slice - if isinstance(fsub_axis, Slice): - if fsub_axis.children != (None,): - raise NotImplementedError("add_prefetch only " - "supports full slices") - - axis_name = name_gen( - based_on="%s_fetch_axis_%d" % (arg.name, axis_nr)) - - kernel = _add_kernel_axis(kernel, axis_name, 0, arg.shape[axis_nr], - frozenset(sweep_inames) | fsub_dependencies) - sweep_inames = sweep_inames + [axis_name] - - inames_to_be_removed.append(axis_name) - new_fsub.append(Variable(axis_name)) - - else: - new_fsub.append(fsub_axis) - - new_footprint_subscripts.append(tuple(new_fsub)) - del new_fsub - - footprint_subscripts = new_footprint_subscripts - del new_footprint_subscripts - - subst_use = [Variable(rule_name)(*si) for si in footprint_subscripts] - return kernel, subst_use, sweep_inames, inames_to_be_removed - -# }}} - - -def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, - default_tag="l.auto", rule_name=None, - temporary_name=None, temporary_is_local=None, - footprint_subscripts=None, - fetch_bounding_box=False): - """Prefetch all accesses to the variable *var_name*, with all accesses - being swept through *sweep_inames*. - - :arg dim_arg_names: List of names representing each fetch axis. - :arg rule_name: base name of the generated temporary variable. - :arg footprint_subscripts: A list of tuples indicating the index (i.e. - subscript) tuples used to generate the footprint. - - If only one such set of indices is desired, this may also be specified - directly by putting an index expression into *var_name*. Substitutions - such as those occurring in dimension splits are recorded and also - applied to these indices. - - This function combines :func:`extract_subst` and :func:`precompute`. - """ - - # {{{ fish indexing out of var_name and into footprint_subscripts - - from loopy.symbolic import parse - parsed_var_name = parse(var_name) - - from pymbolic.primitives import Variable, Subscript - if isinstance(parsed_var_name, Variable): - # nothing to see - pass - elif isinstance(parsed_var_name, Subscript): - if footprint_subscripts is not None: - raise TypeError("if footprint_subscripts is specified, then var_name " - "may not contain a subscript") - - assert isinstance(parsed_var_name.aggregate, Variable) - footprint_subscripts = [parsed_var_name.index] - parsed_var_name = parsed_var_name.aggregate - else: - raise ValueError("var_name must either be a variable name or a subscript") - - # }}} - - # {{{ fish out tag - - from loopy.symbolic import TaggedVariable - if isinstance(parsed_var_name, TaggedVariable): - var_name = parsed_var_name.name - tag = parsed_var_name.tag - else: - var_name = parsed_var_name.name - tag = None - - # }}} - - c_name = var_name - if tag is not None: - c_name = c_name + "_" + tag - - var_name_gen = kernel.get_var_name_generator() - - if rule_name is None: - rule_name = var_name_gen("%s_fetch_rule" % c_name) - if temporary_name is None: - temporary_name = var_name_gen("%s_fetch" % c_name) - - arg = kernel.arg_dict[var_name] - - # {{{ make parameter names and unification template - - parameters = [] - for i in range(arg.num_user_axes()): - based_on = "%s_dim_%d" % (c_name, i) - if arg.dim_names is not None: - based_on = "%s_dim_%s" % (c_name, arg.dim_names[i]) - if dim_arg_names is not None and i < len(dim_arg_names): - based_on = dim_arg_names[i] - - par_name = var_name_gen(based_on=based_on) - parameters.append(par_name) - - from pymbolic import var - uni_template = parsed_var_name - if len(parameters) > 1: - uni_template = uni_template.index( - tuple(var(par_name) for par_name in parameters)) - elif len(parameters) == 1: - uni_template = uni_template.index(var(parameters[0])) - - # }}} - - kernel = extract_subst(kernel, rule_name, uni_template, parameters) - if isinstance(sweep_inames, str): - sweep_inames = [s.strip() for s in sweep_inames.split(",")] - else: - # copy, standardize to list - sweep_inames = list(sweep_inames) - - kernel, subst_use, sweep_inames, inames_to_be_removed = \ - _process_footprint_subscripts( - kernel, rule_name, sweep_inames, - footprint_subscripts, arg) - new_kernel = precompute(kernel, subst_use, sweep_inames, - precompute_inames=dim_arg_names, - default_tag=default_tag, dtype=arg.dtype, - fetch_bounding_box=fetch_bounding_box, - temporary_name=temporary_name, - temporary_is_local=temporary_is_local) - - # {{{ remove inames that were temporarily added by slice sweeps - - new_domains = new_kernel.domains[:] - - for iname in inames_to_be_removed: - home_domain_index = kernel.get_home_domain_index(iname) - domain = new_domains[home_domain_index] - - dt, idx = domain.get_var_dict()[iname] - assert dt == dim_type.set - - new_domains[home_domain_index] = domain.project_out(dt, idx, 1) - - new_kernel = new_kernel.copy(domains=new_domains) - - # }}} - - # If the rule survived past precompute() (i.e. some accesses fell outside - # the footprint), get rid of it before moving on. - if rule_name in new_kernel.substitutions: - return expand_subst(new_kernel, "... > id:"+rule_name) - else: - return new_kernel - -# }}} - - -# {{{ instruction processing - -def find_instructions(kernel, insn_match): - from loopy.context_matching import parse_match - match = parse_match(insn_match) - return [insn for insn in kernel.instructions if match(kernel, insn)] - - -def map_instructions(kernel, insn_match, f): - from loopy.context_matching import parse_match - match = parse_match(insn_match) - - new_insns = [] - - for insn in kernel.instructions: - if match(kernel, insn): - new_insns.append(f(insn)) - else: - new_insns.append(insn) - - return kernel.copy(instructions=new_insns) - - -def set_instruction_priority(kernel, insn_match, priority): - """Set the priority of instructions matching *insn_match* to *priority*. - - *insn_match* may be any instruction id match understood by - :func:`loopy.context_matching.parse_match`. - """ - - def set_prio(insn): - return insn.copy(priority=priority) - - return map_instructions(kernel, insn_match, set_prio) - - -def add_dependency(kernel, insn_match, dependency): - """Add the instruction dependency *dependency* to the instructions matched - by *insn_match*. - - *insn_match* may be any instruction id match understood by - :func:`loopy.context_matching.parse_match`. - """ +# {{{ import transforms - if dependency not in kernel.id_to_insn: - raise LoopyError("cannot add dependency on non-existent instruction ID '%s'" - % dependency) +from loopy.transform.iname import ( + assume, set_loop_priority, + split_iname, join_inames, tag_inames, duplicate_inames, + rename_iname, link_inames, remove_unused_inames, + affine_map_inames) - def add_dep(insn): - new_deps = insn.insn_deps - added_deps = frozenset([dependency]) - if new_deps is None: - new_deps = added_deps - else: - new_deps = new_deps | added_deps - - return insn.copy(insn_deps=new_deps) - - return map_instructions(kernel, insn_match, add_dep) - - -def remove_instructions(kernel, insn_ids): - """Return a new kernel with instructions in *insn_ids* removed. - - Dependencies across (one, for now) deleted isntructions are propagated. - Behavior is undefined for now for chains of dependencies within the - set of deleted instructions. - """ - - if not insn_ids: - return kernel - - assert isinstance(insn_ids, set) - id_to_insn = kernel.id_to_insn - - new_insns = [] - for insn in kernel.instructions: - if insn.id in insn_ids: - continue - - # transitively propagate dependencies - # (only one level for now) - if insn.insn_deps is None: - insn_deps = frozenset() - else: - insn_deps = insn.insn_deps - - new_deps = insn_deps - insn_ids - - for dep_id in insn_deps & insn_ids: - new_deps = new_deps | id_to_insn[dep_id].insn_deps - - new_insns.append(insn.copy(insn_deps=frozenset(new_deps))) +from loopy.transform.instruction import ( + find_instructions, map_instructions, + set_instruction_priority, add_dependency, + remove_instructions, tag_instructions) - return kernel.copy( - instructions=new_insns) - -# }}} +from loopy.transform.data import ( + add_prefetch, change_arg_to_image, tag_data_axes, + set_array_dim_names, remove_unused_arguments, + alias_temporaries, set_argument_order + ) +from loopy.transform.subst import (extract_subst, + assignment_to_subst, expand_subst, find_rules_matching, + find_one_rule_matching) -# {{{ change variable kinds +from loopy.transform.precompute import precompute +from loopy.transform.buffer import buffer_array +from loopy.transform.fusion import fuse_kernels -def change_arg_to_image(knl, name): - new_args = [] - for arg in knl.args: - if arg.name == name: - assert arg.offset == 0 - assert arg.shape is not None - new_args.append(ImageArg(arg.name, dtype=arg.dtype, shape=arg.shape)) - else: - new_args.append(arg) +from loopy.transform.arithmetic import ( + split_reduction_inward, + split_reduction_outward, fold_constants) - return knl.copy(args=new_args) +from loopy.transform.padding import ( + split_array_dim, split_arg_axis, find_padding_multiple, + add_padding) # }}} +from loopy.preprocess import (preprocess_kernel, realize_reduction, + infer_unknown_types) +from loopy.schedule import generate_loop_schedules, get_one_scheduled_kernel +from loopy.statistics import (get_op_poly, get_gmem_access_poly, + get_DRAM_access_poly, get_barrier_poly, stringify_stats_mapping, + sum_mem_access_to_bytes) +from loopy.codegen import generate_code, generate_body +from loopy.compiled import CompiledKernel +from loopy.options import Options +from loopy.auto_test import auto_test_vs_ref +from loopy.frontend.fortran import (c_preprocess, parse_transformed_fortran, + parse_fortran) -# {{{ tag data axes - -def tag_data_axes(knl, ary_names, dim_tags): - from loopy.kernel.tools import ArrayChanger - - if isinstance(ary_names, str): - ary_names = ary_names.split(",") +__all__ = [ + "TaggedVariable", "Reduction", "LinearSubscript", - for ary_name in ary_names: - achng = ArrayChanger(knl, ary_name) - ary = achng.get() + "auto", - from loopy.kernel.array import parse_array_dim_tags - new_dim_tags = parse_array_dim_tags(dim_tags, - n_axes=ary.num_user_axes(), - use_increasing_target_axes=ary.max_target_axes > 1) + "LoopKernel", - ary = ary.copy(dim_tags=tuple(new_dim_tags)) + "ValueArg", "ScalarArg", "GlobalArg", "ArrayArg", "ConstantArg", "ImageArg", + "TemporaryVariable", - knl = achng.with_changed_array(ary) + "InstructionBase", "ExpressionInstruction", "CInstruction", - return knl + "default_function_mangler", "single_arg_function_mangler", -# }}} + "make_kernel", "UniqueName", + "register_reduction_parser", -# {{{ split_reduction - -class _ReductionSplitter(RuleAwareIdentityMapper): - def __init__(self, rule_mapping_context, within, inames, direction): - super(_ReductionSplitter, self).__init__( - rule_mapping_context) - - self.within = within - self.inames = inames - self.direction = direction - - def map_reduction(self, expr, expn_state): - if set(expr.inames) & set(expn_state.arg_context): - # FIXME - raise NotImplementedError() - - if (self.inames <= set(expr.inames) - and self.within( - expn_state.kernel, - expn_state.instruction, - expn_state.stack)): - leftover_inames = set(expr.inames) - self.inames - - from loopy.symbolic import Reduction - if self.direction == "in": - return Reduction(expr.operation, tuple(leftover_inames), - Reduction(expr.operation, tuple(self.inames), - self.rec(expr.expr, expn_state))) - elif self.direction == "out": - return Reduction(expr.operation, tuple(self.inames), - Reduction(expr.operation, tuple(leftover_inames), - self.rec(expr.expr, expn_state))) - else: - assert False - else: - return super(_ReductionSplitter, self).map_reduction(expr, expn_state) + # {{{ transforms + "assume", "set_loop_priority", + "split_iname", "join_inames", "tag_inames", "duplicate_inames", + "rename_iname", "link_inames", "remove_unused_inames", + "affine_map_inames", -def _split_reduction(kernel, inames, direction, within=None): - if direction not in ["in", "out"]: - raise ValueError("invalid value for 'direction': %s" % direction) + "add_prefetch", "change_arg_to_image", "tag_data_axes", + "set_array_dim_names", "remove_unused_arguments", + "alias_temporaries", "set_argument_order", - if isinstance(inames, str): - inames = inames.split(",") - inames = set(inames) + "find_instructions", "map_instructions", + "set_instruction_priority", "add_dependency", + "remove_instructions", "tag_instructions", - from loopy.context_matching import parse_stack_match - within = parse_stack_match(within) + "extract_subst", "expand_subst", "assignment_to_subst", + "find_rules_matching", "find_one_rule_matching", - rule_mapping_context = SubstitutionRuleMappingContext( - kernel.substitutions, kernel.get_var_name_generator()) - rsplit = _ReductionSplitter(rule_mapping_context, - within, inames, direction) - return rule_mapping_context.finish_kernel( - rsplit.map_kernel(kernel)) + "precompute", "buffer_array", + "fuse_kernels", + "split_reduction_inward", "split_reduction_outward", + "fold_constants", -def split_reduction_inward(kernel, inames, within=None): - """Takes a reduction of the form:: + "split_array_dim", "split_arg_axis", "find_padding_multiple", + "add_padding", - sum([i,j,k], ...) + # }}} - and splits it into two nested reductions:: + "get_dot_dependency_graph", + "show_dependency_graph", + "add_dtypes", + "infer_argument_dtypes", "add_and_infer_dtypes", - sum([j,k], sum([i], ...)) + "preprocess_kernel", "realize_reduction", "infer_unknown_types", + "generate_loop_schedules", "get_one_scheduled_kernel", + "generate_code", "generate_body", - In this case, *inames* would have been ``"i"`` indicating that - the iname ``i`` should be made the iname governing the inner reduction. + "get_op_poly", "get_gmem_access_poly", "get_DRAM_access_poly", + "get_barrier_poly", "stringify_stats_mapping", "sum_mem_access_to_bytes", - :arg inames: A list of inames, or a comma-separated string that can - be parsed into those - """ + "CompiledKernel", - return _split_reduction(kernel, inames, "in", within) + "auto_test_vs_ref", + "Options", -def split_reduction_outward(kernel, inames, within=None): - """Takes a reduction of the form:: + "make_kernel", + "c_preprocess", "parse_transformed_fortran", "parse_fortran", - sum([i,j,k], ...) + "LoopyError", "LoopyWarning", - and splits it into two nested reductions:: + # {{{ from this file - sum([i], sum([j,k], ...)) + "fix_parameters", + "register_preamble_generators", + "register_symbol_manglers", + "register_function_manglers", - In this case, *inames* would have been ``"i"`` indicating that - the iname ``i`` should be made the iname governing the outer reduction. + "set_caching_enabled", + "CacheMode", + "make_copy_kernel", + "to_batched", + "realize_ilp", - :arg inames: A list of inames, or a comma-separated string that can - be parsed into those - """ + # }}} + ] - return _split_reduction(kernel, inames, "out", within) # }}} @@ -1508,27 +272,6 @@ def fix_parameters(kernel, **value_dict): # }}} -# {{{ assume - -def assume(kernel, assumptions): - if isinstance(assumptions, str): - assumptions_set_str = "[%s] -> { : %s}" \ - % (",".join(s for s in kernel.outer_params()), - assumptions) - assumptions = isl.BasicSet.read_from_str(kernel.domains[0].get_ctx(), - assumptions_set_str) - - if not isinstance(assumptions, isl.BasicSet): - raise TypeError("'assumptions' must be a BasicSet or a string") - - old_assumptions, new_assumptions = isl.align_two(kernel.assumptions, assumptions) - - return kernel.copy( - assumptions=old_assumptions.params() & new_assumptions.params()) - -# }}} - - # {{{ set_options def set_options(kernel, *args, **kwargs): @@ -1633,7 +376,7 @@ class CacheMode(object): # }}} -# {{{ data layout change +# {{{ make copy kernel def make_copy_kernel(new_dim_tags, old_dim_tags=None): """Returns a :class:`LoopKernel` that changes the data layout @@ -1684,339 +427,6 @@ def make_copy_kernel(new_dim_tags, old_dim_tags=None): # }}} -# {{{ set argument order - -def set_argument_order(kernel, arg_names): - """ - :arg arg_names: A list (or comma-separated string) or argument - names. All arguments must be in this list. - """ - - if isinstance(arg_names, str): - arg_names = arg_names.split(",") - - new_args = [] - old_arg_dict = kernel.arg_dict.copy() - - for arg_name in arg_names: - try: - arg = old_arg_dict.pop(arg_name) - except KeyError: - raise LoopyError("unknown argument '%s'" - % arg_name) - - new_args.append(arg) - - if old_arg_dict: - raise LoopyError("incomplete argument list passed " - "to set_argument_order. Left over: '%s'" - % ", ".join(arg_name for arg_name in old_arg_dict)) - - return kernel.copy(args=new_args) - -# }}} - - -# {{{ affine map inames - -def affine_map_inames(kernel, old_inames, new_inames, equations): - """Return a new *kernel* where the affine transform - specified by *equations* has been applied to the inames. - - :arg old_inames: A list of inames to be replaced by affine transforms - of their values. - May also be a string of comma-separated inames. - - :arg new_inames: A list of new inames that are not yet used in *kernel*, - but have their values established in terms of *old_inames* by - *equations*. - May also be a string of comma-separated inames. - :arg equations: A list of equations estabilishing a relationship - between *old_inames* and *new_inames*. Each equation may be - a tuple ``(lhs, rhs)`` of expressions or a string, with left and - right hand side of the equation separated by ``=``. - """ - - # {{{ check and parse arguments - - if isinstance(new_inames, str): - new_inames = new_inames.split(",") - new_inames = [iname.strip() for iname in new_inames] - if isinstance(old_inames, str): - old_inames = old_inames.split(",") - old_inames = [iname.strip() for iname in old_inames] - if isinstance(equations, str): - equations = [equations] - - import re - eqn_re = re.compile(r"^([^=]+)=([^=]+)$") - - def parse_equation(eqn): - if isinstance(eqn, str): - eqn_match = eqn_re.match(eqn) - if not eqn_match: - raise ValueError("invalid equation: %s" % eqn) - - from loopy.symbolic import parse - lhs = parse(eqn_match.group(1)) - rhs = parse(eqn_match.group(2)) - return (lhs, rhs) - elif isinstance(eqn, tuple): - if len(eqn) != 2: - raise ValueError("unexpected length of equation tuple, " - "got %d, should be 2" % len(eqn)) - return eqn - else: - raise ValueError("unexpected type of equation" - "got %d, should be string or tuple" - % type(eqn).__name__) - - equations = [parse_equation(eqn) for eqn in equations] - - all_vars = kernel.all_variable_names() - for iname in new_inames: - if iname in all_vars: - raise LoopyError("new iname '%s' is already used in kernel" - % iname) - - for iname in old_inames: - if iname not in kernel.all_inames(): - raise LoopyError("old iname '%s' not known" % iname) - - # }}} - - # {{{ substitute iname use - - from pymbolic.algorithm import solve_affine_equations_for - old_inames_to_expr = solve_affine_equations_for(old_inames, equations) - - subst_dict = dict( - (v.name, expr) - for v, expr in old_inames_to_expr.items()) - - var_name_gen = kernel.get_var_name_generator() - - from pymbolic.mapper.substitutor import make_subst_func - from loopy.context_matching import parse_stack_match - - rule_mapping_context = SubstitutionRuleMappingContext( - kernel.substitutions, var_name_gen) - old_to_new = RuleAwareSubstitutionMapper(rule_mapping_context, - make_subst_func(subst_dict), within=parse_stack_match(None)) - - kernel = ( - rule_mapping_context.finish_kernel( - old_to_new.map_kernel(kernel)) - .copy( - applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_dict] - )) - - # }}} - - # {{{ change domains - - new_inames_set = set(new_inames) - old_inames_set = set(old_inames) - - new_domains = [] - for idom, dom in enumerate(kernel.domains): - dom_var_dict = dom.get_var_dict() - old_iname_overlap = [ - iname - for iname in old_inames - if iname in dom_var_dict] - - if not old_iname_overlap: - new_domains.append(dom) - continue - - from loopy.symbolic import get_dependencies - dom_new_inames = set() - dom_old_inames = set() - - # mapping for new inames to dim_types - new_iname_dim_types = {} - - dom_equations = [] - for iname in old_iname_overlap: - for ieqn, (lhs, rhs) in enumerate(equations): - eqn_deps = get_dependencies(lhs) | get_dependencies(rhs) - if iname in eqn_deps: - dom_new_inames.update(eqn_deps & new_inames_set) - dom_old_inames.update(eqn_deps & old_inames_set) - - if dom_old_inames: - dom_equations.append((lhs, rhs)) - - this_eqn_old_iname_dim_types = set( - dom_var_dict[old_iname][0] - for old_iname in eqn_deps & old_inames_set) - - if this_eqn_old_iname_dim_types: - if len(this_eqn_old_iname_dim_types) > 1: - raise ValueError("inames '%s' (from equation %d (0-based)) " - "in domain %d (0-based) are not " - "of a uniform dim_type" - % (", ".join(eqn_deps & old_inames_set), ieqn, idom)) - - this_eqn_new_iname_dim_type, = this_eqn_old_iname_dim_types - - for new_iname in eqn_deps & new_inames_set: - if new_iname in new_iname_dim_types: - if (this_eqn_new_iname_dim_type - != new_iname_dim_types[new_iname]): - raise ValueError("dim_type disagreement for " - "iname '%s' (from equation %d (0-based)) " - "in domain %d (0-based)" - % (new_iname, ieqn, idom)) - else: - new_iname_dim_types[new_iname] = \ - this_eqn_new_iname_dim_type - - if not dom_old_inames <= set(dom_var_dict): - raise ValueError("domain %d (0-based) does not know about " - "all old inames (specifically '%s') needed to define new inames" - % (idom, ", ".join(dom_old_inames - set(dom_var_dict)))) - - # add inames to domain with correct dim_types - dom_new_inames = list(dom_new_inames) - for iname in dom_new_inames: - dt = new_iname_dim_types[iname] - iname_idx = dom.dim(dt) - dom = dom.add_dims(dt, 1) - dom = dom.set_dim_name(dt, iname_idx, iname) - - # add equations - from loopy.symbolic import aff_from_expr - for lhs, rhs in dom_equations: - dom = dom.add_constraint( - isl.Constraint.equality_from_aff( - aff_from_expr(dom.space, rhs - lhs))) - - # project out old inames - for iname in dom_old_inames: - dt, idx = dom.get_var_dict()[iname] - dom = dom.project_out(dt, idx, 1) - - new_domains.append(dom) - - # }}} - - return kernel.copy(domains=new_domains) - -# }}} - - -# {{{ fold constants - -def fold_constants(kernel): - from loopy.symbolic import ConstantFoldingMapper - cfm = ConstantFoldingMapper() - - new_insns = [ - insn.with_transformed_expressions(cfm) - for insn in kernel.instructions] - - new_substs = dict( - (sub.name, - sub.copy(expression=cfm(sub.expression))) - for sub in six.itervalues(kernel.substitutions)) - - return kernel.copy( - instructions=new_insns, - substitutions=new_substs) - -# }}} - - -# {{{ tag_instructions - -def tag_instructions(kernel, new_tag, within=None): - from loopy.context_matching import parse_match - within = parse_match(within) - - new_insns = [] - for insn in kernel.instructions: - if within(kernel, insn): - new_insns.append( - insn.copy(tags=insn.tags + (new_tag,))) - else: - new_insns.append(insn) - - return kernel.copy(instructions=new_insns) - -# }}} - - -# {{{ alias_temporaries - -def alias_temporaries(knl, names, base_name_prefix=None): - """Sets all temporaries given by *names* to be backed by a single piece of - storage. Also introduces ordering structures ("groups") to prevent the - usage of each temporary to interfere with another. - - :arg base_name_prefix: an identifier to be used for the common storage - area - """ - gng = knl.get_group_name_generator() - group_names = [gng("tmpgrp_"+name) for name in names] - - if base_name_prefix is None: - base_name_prefix = "temp_storage" - - vng = knl.get_var_name_generator() - base_name = vng(base_name_prefix) - - names_set = set(names) - - new_insns = [] - for insn in knl.instructions: - temp_deps = insn.dependency_names() & names_set - - if not temp_deps: - new_insns.append(insn) - continue - - if len(temp_deps) > 1: - raise LoopyError("Instruction {insn} refers to multiple of the " - "temporaries being aliased, namely '{temps}'. Cannot alias." - .format( - insn=insn.id, - temps=", ".join(temp_deps))) - - temp_name, = temp_deps - temp_idx = names.index(temp_name) - group_name = group_names[temp_idx] - other_group_names = ( - frozenset(group_names[:temp_idx]) - | frozenset(group_names[temp_idx+1:])) - - new_insns.append( - insn.copy( - groups=insn.groups | frozenset([group_name]), - conflicts_with_groups=( - insn.conflicts_with_groups | other_group_names))) - - new_temporary_variables = {} - for tv in six.itervalues(knl.temporary_variables): - if tv.name in names_set: - if tv.base_storage is not None: - raise LoopyError("temporary variable '{tv}' already has " - "a defined storage array -- cannot alias" - .format(tv=tv.name)) - - new_temporary_variables[tv.name] = \ - tv.copy(base_storage=base_name) - else: - new_temporary_variables[tv.name] = tv - - return knl.copy( - instructions=new_insns, - temporary_variables=new_temporary_variables) - -# }}} - - # {{{ to_batched class _BatchVariableChanger(RuleAwareIdentityMapper): @@ -2120,51 +530,6 @@ def to_batched(knl, nbatches, batch_varying_args, batch_iname_prefix="ibatch"): # }}} -# {{{ remove_unused_arguments - -def remove_unused_arguments(knl): - new_args = [] - - refd_vars = set(knl.all_params()) - for insn in knl.instructions: - refd_vars.update(insn.dependency_names()) - - for arg in knl.args: - if arg.name in refd_vars: - new_args.append(arg) - - return knl.copy(args=new_args) - -# }}} - - -# {{{ find substitution rules by glob patterns - -def find_rules_matching(knl, pattern): - """ - :pattern: A shell-style glob pattern. - """ - - from loopy.context_matching import re_from_glob - pattern = re_from_glob(pattern) - - return [r for r in knl.substitutions if pattern.match(r)] - - -def find_one_rule_matching(knl, pattern): - rules = find_rules_matching(knl, pattern) - - if len(rules) > 1: - raise ValueError("more than one substitution rule matched '%s'" - % pattern) - if not rules: - raise ValueError("no substitution rule matched '%s'" % pattern) - - return rules[0] - -# }}} - - # {{{ realize_ilp def realize_ilp(kernel, iname): @@ -2189,27 +554,4 @@ def realize_ilp(kernel, iname): # }}} -# {{{ set_array_dim_names - -def set_array_dim_names(kernel, ary_names, dim_names): - from loopy.kernel.tools import ArrayChanger - if isinstance(ary_names, str): - ary_names = ary_names.split(",") - - if isinstance(dim_names, str): - dim_names = tuple(dim_names.split(",")) - - for ary_name in ary_names: - achng = ArrayChanger(kernel, ary_name) - ary = achng.get() - - ary = ary.copy(dim_names=dim_names) - - kernel = achng.with_changed_array(ary) - - return kernel - -# }}} - - # vim: foldmethod=marker diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 4e88192715c58f0bd9c94ba5230a46754eef401a..1b12373e4938a3f95b43ef365a383a77ba2ffba8 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -129,7 +129,7 @@ def find_all_insn_inames(kernel): all_read_deps = {} all_write_deps = {} - from loopy.subst import expand_subst + from loopy.transform.subst import expand_subst kernel = expand_subst(kernel) for insn in kernel.instructions: diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 26850f108e02a59f1b9d3372df9f8f36b11971d6..58087c10899f0b6294fac6a3c13fbbf05f6c634f 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -139,7 +139,7 @@ def infer_unknown_types(kernel, expect_completion=False): unexpanded_kernel = kernel if kernel.substitutions: - from loopy.subst import expand_subst + from loopy.transform.subst import expand_subst kernel = expand_subst(kernel) new_temp_vars = kernel.temporary_variables.copy() @@ -673,7 +673,7 @@ def preprocess_kernel(kernel, device=None): # }}} - from loopy.subst import expand_subst + from loopy.transform.subst import expand_subst kernel = expand_subst(kernel) # Ordering restriction: @@ -699,7 +699,7 @@ def preprocess_kernel(kernel, device=None): # add_axes_to_temporaries_for_ilp because reduction accumulators # need to be duplicated by this. - from loopy.ilp import add_axes_to_temporaries_for_ilp_and_vec + from loopy.transform.ilp import add_axes_to_temporaries_for_ilp_and_vec kernel = add_axes_to_temporaries_for_ilp_and_vec(kernel) kernel = mark_local_temporaries(kernel) diff --git a/loopy/transform/__init__.py b/loopy/transform/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..570b5efffb29e0ebb56b99444db19766127be596 --- /dev/null +++ b/loopy/transform/__init__.py @@ -0,0 +1,26 @@ +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + + diff --git a/loopy/transform/arithmetic.py b/loopy/transform/arithmetic.py new file mode 100644 index 0000000000000000000000000000000000000000..540c36681a6e6504e561cbbfadb8e85ec54add92 --- /dev/null +++ b/loopy/transform/arithmetic.py @@ -0,0 +1,151 @@ +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import six + +from loopy.symbolic import (RuleAwareIdentityMapper, + SubstitutionRuleMappingContext) + + +# {{{ split_reduction + +class _ReductionSplitter(RuleAwareIdentityMapper): + def __init__(self, rule_mapping_context, within, inames, direction): + super(_ReductionSplitter, self).__init__( + rule_mapping_context) + + self.within = within + self.inames = inames + self.direction = direction + + def map_reduction(self, expr, expn_state): + if set(expr.inames) & set(expn_state.arg_context): + # FIXME + raise NotImplementedError() + + if (self.inames <= set(expr.inames) + and self.within( + expn_state.kernel, + expn_state.instruction, + expn_state.stack)): + leftover_inames = set(expr.inames) - self.inames + + from loopy.symbolic import Reduction + if self.direction == "in": + return Reduction(expr.operation, tuple(leftover_inames), + Reduction(expr.operation, tuple(self.inames), + self.rec(expr.expr, expn_state))) + elif self.direction == "out": + return Reduction(expr.operation, tuple(self.inames), + Reduction(expr.operation, tuple(leftover_inames), + self.rec(expr.expr, expn_state))) + else: + assert False + else: + return super(_ReductionSplitter, self).map_reduction(expr, expn_state) + + +def _split_reduction(kernel, inames, direction, within=None): + if direction not in ["in", "out"]: + raise ValueError("invalid value for 'direction': %s" % direction) + + if isinstance(inames, str): + inames = inames.split(",") + inames = set(inames) + + from loopy.context_matching import parse_stack_match + within = parse_stack_match(within) + + rule_mapping_context = SubstitutionRuleMappingContext( + kernel.substitutions, kernel.get_var_name_generator()) + rsplit = _ReductionSplitter(rule_mapping_context, + within, inames, direction) + return rule_mapping_context.finish_kernel( + rsplit.map_kernel(kernel)) + + +def split_reduction_inward(kernel, inames, within=None): + """Takes a reduction of the form:: + + sum([i,j,k], ...) + + and splits it into two nested reductions:: + + sum([j,k], sum([i], ...)) + + In this case, *inames* would have been ``"i"`` indicating that + the iname ``i`` should be made the iname governing the inner reduction. + + :arg inames: A list of inames, or a comma-separated string that can + be parsed into those + """ + + return _split_reduction(kernel, inames, "in", within) + + +def split_reduction_outward(kernel, inames, within=None): + """Takes a reduction of the form:: + + sum([i,j,k], ...) + + and splits it into two nested reductions:: + + sum([i], sum([j,k], ...)) + + In this case, *inames* would have been ``"i"`` indicating that + the iname ``i`` should be made the iname governing the outer reduction. + + :arg inames: A list of inames, or a comma-separated string that can + be parsed into those + """ + + return _split_reduction(kernel, inames, "out", within) + +# }}} + + +# {{{ fold constants + +def fold_constants(kernel): + from loopy.symbolic import ConstantFoldingMapper + cfm = ConstantFoldingMapper() + + new_insns = [ + insn.with_transformed_expressions(cfm) + for insn in kernel.instructions] + + new_substs = dict( + (sub.name, + sub.copy(expression=cfm(sub.expression))) + for sub in six.itervalues(kernel.substitutions)) + + return kernel.copy( + instructions=new_insns, + substitutions=new_substs) + +# }}} + + +# vim: foldmethod=marker diff --git a/loopy/array_buffer_map.py b/loopy/transform/array_buffer_map.py similarity index 100% rename from loopy/array_buffer_map.py rename to loopy/transform/array_buffer_map.py diff --git a/loopy/buffer.py b/loopy/transform/buffer.py similarity index 99% rename from loopy/buffer.py rename to loopy/transform/buffer.py index 70f8a0e297bb870635541f60506091bc29bf8ecc..2dc86c4fb7a9d2c439b03b186a401e1c6b97e717 100644 --- a/loopy/buffer.py +++ b/loopy/transform/buffer.py @@ -23,7 +23,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -from loopy.array_buffer_map import (ArrayToBufferMap, NoOpArrayToBufferMap, +from loopy.transform.array_buffer_map import (ArrayToBufferMap, NoOpArrayToBufferMap, AccessDescriptor) from loopy.symbolic import (get_dependencies, RuleAwareIdentityMapper, SubstitutionRuleMappingContext, diff --git a/loopy/transform/data.py b/loopy/transform/data.py new file mode 100644 index 0000000000000000000000000000000000000000..53f3479c2cf131d9c99b0fb21d33746131af17c6 --- /dev/null +++ b/loopy/transform/data.py @@ -0,0 +1,464 @@ +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import six # noqa + +from loopy.diagnostic import LoopyError +from islpy import dim_type + +from loopy.kernel.data import ImageArg + + +# {{{ convenience: add_prefetch + +# {{{ process footprint_subscripts + +def _add_kernel_axis(kernel, axis_name, start, stop, base_inames): + from loopy.kernel.tools import DomainChanger + domch = DomainChanger(kernel, base_inames) + + domain = domch.domain + new_dim_idx = domain.dim(dim_type.set) + domain = (domain + .insert_dims(dim_type.set, new_dim_idx, 1) + .set_dim_name(dim_type.set, new_dim_idx, axis_name)) + + from loopy.symbolic import get_dependencies + deps = get_dependencies(start) | get_dependencies(stop) + assert deps <= kernel.all_params() + + param_names = domain.get_var_names(dim_type.param) + for dep in deps: + if dep not in param_names: + new_dim_idx = domain.dim(dim_type.param) + domain = (domain + .insert_dims(dim_type.param, new_dim_idx, 1) + .set_dim_name(dim_type.param, new_dim_idx, dep)) + + from loopy.isl_helpers import make_slab + slab = make_slab(domain.get_space(), axis_name, start, stop) + + domain = domain & slab + + return kernel.copy(domains=domch.get_domains_with(domain)) + + +def _process_footprint_subscripts(kernel, rule_name, sweep_inames, + footprint_subscripts, arg): + """Track applied iname rewrites, deal with slice specifiers ':'.""" + + name_gen = kernel.get_var_name_generator() + + from pymbolic.primitives import Variable + + if footprint_subscripts is None: + return kernel, rule_name, sweep_inames, [] + + if not isinstance(footprint_subscripts, (list, tuple)): + footprint_subscripts = [footprint_subscripts] + + inames_to_be_removed = [] + + new_footprint_subscripts = [] + for fsub in footprint_subscripts: + if isinstance(fsub, str): + from loopy.symbolic import parse + fsub = parse(fsub) + + if not isinstance(fsub, tuple): + fsub = (fsub,) + + if len(fsub) != arg.num_user_axes(): + raise ValueError("sweep index '%s' has the wrong number of dimensions" + % str(fsub)) + + for subst_map in kernel.applied_iname_rewrites: + from loopy.symbolic import SubstitutionMapper + from pymbolic.mapper.substitutor import make_subst_func + fsub = SubstitutionMapper(make_subst_func(subst_map))(fsub) + + from loopy.symbolic import get_dependencies + fsub_dependencies = get_dependencies(fsub) + + new_fsub = [] + for axis_nr, fsub_axis in enumerate(fsub): + from pymbolic.primitives import Slice + if isinstance(fsub_axis, Slice): + if fsub_axis.children != (None,): + raise NotImplementedError("add_prefetch only " + "supports full slices") + + axis_name = name_gen( + based_on="%s_fetch_axis_%d" % (arg.name, axis_nr)) + + kernel = _add_kernel_axis(kernel, axis_name, 0, arg.shape[axis_nr], + frozenset(sweep_inames) | fsub_dependencies) + sweep_inames = sweep_inames + [axis_name] + + inames_to_be_removed.append(axis_name) + new_fsub.append(Variable(axis_name)) + + else: + new_fsub.append(fsub_axis) + + new_footprint_subscripts.append(tuple(new_fsub)) + del new_fsub + + footprint_subscripts = new_footprint_subscripts + del new_footprint_subscripts + + subst_use = [Variable(rule_name)(*si) for si in footprint_subscripts] + return kernel, subst_use, sweep_inames, inames_to_be_removed + +# }}} + + +def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, + default_tag="l.auto", rule_name=None, + temporary_name=None, temporary_is_local=None, + footprint_subscripts=None, + fetch_bounding_box=False): + """Prefetch all accesses to the variable *var_name*, with all accesses + being swept through *sweep_inames*. + + :arg dim_arg_names: List of names representing each fetch axis. + :arg rule_name: base name of the generated temporary variable. + :arg footprint_subscripts: A list of tuples indicating the index (i.e. + subscript) tuples used to generate the footprint. + + If only one such set of indices is desired, this may also be specified + directly by putting an index expression into *var_name*. Substitutions + such as those occurring in dimension splits are recorded and also + applied to these indices. + + This function combines :func:`extract_subst` and :func:`precompute`. + """ + + # {{{ fish indexing out of var_name and into footprint_subscripts + + from loopy.symbolic import parse + parsed_var_name = parse(var_name) + + from pymbolic.primitives import Variable, Subscript + if isinstance(parsed_var_name, Variable): + # nothing to see + pass + elif isinstance(parsed_var_name, Subscript): + if footprint_subscripts is not None: + raise TypeError("if footprint_subscripts is specified, then var_name " + "may not contain a subscript") + + assert isinstance(parsed_var_name.aggregate, Variable) + footprint_subscripts = [parsed_var_name.index] + parsed_var_name = parsed_var_name.aggregate + else: + raise ValueError("var_name must either be a variable name or a subscript") + + # }}} + + # {{{ fish out tag + + from loopy.symbolic import TaggedVariable + if isinstance(parsed_var_name, TaggedVariable): + var_name = parsed_var_name.name + tag = parsed_var_name.tag + else: + var_name = parsed_var_name.name + tag = None + + # }}} + + c_name = var_name + if tag is not None: + c_name = c_name + "_" + tag + + var_name_gen = kernel.get_var_name_generator() + + if rule_name is None: + rule_name = var_name_gen("%s_fetch_rule" % c_name) + if temporary_name is None: + temporary_name = var_name_gen("%s_fetch" % c_name) + + arg = kernel.arg_dict[var_name] + + # {{{ make parameter names and unification template + + parameters = [] + for i in range(arg.num_user_axes()): + based_on = "%s_dim_%d" % (c_name, i) + if arg.dim_names is not None: + based_on = "%s_dim_%s" % (c_name, arg.dim_names[i]) + if dim_arg_names is not None and i < len(dim_arg_names): + based_on = dim_arg_names[i] + + par_name = var_name_gen(based_on=based_on) + parameters.append(par_name) + + from pymbolic import var + uni_template = parsed_var_name + if len(parameters) > 1: + uni_template = uni_template.index( + tuple(var(par_name) for par_name in parameters)) + elif len(parameters) == 1: + uni_template = uni_template.index(var(parameters[0])) + + # }}} + + from loopy.transform.subst import extract_subst + kernel = extract_subst(kernel, rule_name, uni_template, parameters) + + if isinstance(sweep_inames, str): + sweep_inames = [s.strip() for s in sweep_inames.split(",")] + else: + # copy, standardize to list + sweep_inames = list(sweep_inames) + + kernel, subst_use, sweep_inames, inames_to_be_removed = \ + _process_footprint_subscripts( + kernel, rule_name, sweep_inames, + footprint_subscripts, arg) + + from loopy.transform.precompute import precompute + new_kernel = precompute(kernel, subst_use, sweep_inames, + precompute_inames=dim_arg_names, + default_tag=default_tag, dtype=arg.dtype, + fetch_bounding_box=fetch_bounding_box, + temporary_name=temporary_name, + temporary_is_local=temporary_is_local) + + # {{{ remove inames that were temporarily added by slice sweeps + + new_domains = new_kernel.domains[:] + + for iname in inames_to_be_removed: + home_domain_index = kernel.get_home_domain_index(iname) + domain = new_domains[home_domain_index] + + dt, idx = domain.get_var_dict()[iname] + assert dt == dim_type.set + + new_domains[home_domain_index] = domain.project_out(dt, idx, 1) + + new_kernel = new_kernel.copy(domains=new_domains) + + # }}} + + # If the rule survived past precompute() (i.e. some accesses fell outside + # the footprint), get rid of it before moving on. + if rule_name in new_kernel.substitutions: + from loopy.transform.subst import expand_subst + return expand_subst(new_kernel, "... > id:"+rule_name) + else: + return new_kernel + +# }}} + + +# {{{ change variable kinds + +def change_arg_to_image(knl, name): + new_args = [] + for arg in knl.args: + if arg.name == name: + assert arg.offset == 0 + assert arg.shape is not None + new_args.append(ImageArg(arg.name, dtype=arg.dtype, shape=arg.shape)) + else: + new_args.append(arg) + + return knl.copy(args=new_args) + +# }}} + + +# {{{ tag data axes + +def tag_data_axes(knl, ary_names, dim_tags): + from loopy.kernel.tools import ArrayChanger + + if isinstance(ary_names, str): + ary_names = ary_names.split(",") + + for ary_name in ary_names: + achng = ArrayChanger(knl, ary_name) + ary = achng.get() + + from loopy.kernel.array import parse_array_dim_tags + new_dim_tags = parse_array_dim_tags(dim_tags, + n_axes=ary.num_user_axes(), + use_increasing_target_axes=ary.max_target_axes > 1) + + ary = ary.copy(dim_tags=tuple(new_dim_tags)) + + knl = achng.with_changed_array(ary) + + return knl + +# }}} + + +# {{{ set_array_dim_names + +def set_array_dim_names(kernel, ary_names, dim_names): + from loopy.kernel.tools import ArrayChanger + if isinstance(ary_names, str): + ary_names = ary_names.split(",") + + if isinstance(dim_names, str): + dim_names = tuple(dim_names.split(",")) + + for ary_name in ary_names: + achng = ArrayChanger(kernel, ary_name) + ary = achng.get() + + ary = ary.copy(dim_names=dim_names) + + kernel = achng.with_changed_array(ary) + + return kernel + +# }}} + + +# {{{ remove_unused_arguments + +def remove_unused_arguments(knl): + new_args = [] + + refd_vars = set(knl.all_params()) + for insn in knl.instructions: + refd_vars.update(insn.dependency_names()) + + for arg in knl.args: + if arg.name in refd_vars: + new_args.append(arg) + + return knl.copy(args=new_args) + +# }}} + + +# {{{ alias_temporaries + +def alias_temporaries(knl, names, base_name_prefix=None): + """Sets all temporaries given by *names* to be backed by a single piece of + storage. Also introduces ordering structures ("groups") to prevent the + usage of each temporary to interfere with another. + + :arg base_name_prefix: an identifier to be used for the common storage + area + """ + gng = knl.get_group_name_generator() + group_names = [gng("tmpgrp_"+name) for name in names] + + if base_name_prefix is None: + base_name_prefix = "temp_storage" + + vng = knl.get_var_name_generator() + base_name = vng(base_name_prefix) + + names_set = set(names) + + new_insns = [] + for insn in knl.instructions: + temp_deps = insn.dependency_names() & names_set + + if not temp_deps: + new_insns.append(insn) + continue + + if len(temp_deps) > 1: + raise LoopyError("Instruction {insn} refers to multiple of the " + "temporaries being aliased, namely '{temps}'. Cannot alias." + .format( + insn=insn.id, + temps=", ".join(temp_deps))) + + temp_name, = temp_deps + temp_idx = names.index(temp_name) + group_name = group_names[temp_idx] + other_group_names = ( + frozenset(group_names[:temp_idx]) + | frozenset(group_names[temp_idx+1:])) + + new_insns.append( + insn.copy( + groups=insn.groups | frozenset([group_name]), + conflicts_with_groups=( + insn.conflicts_with_groups | other_group_names))) + + new_temporary_variables = {} + for tv in six.itervalues(knl.temporary_variables): + if tv.name in names_set: + if tv.base_storage is not None: + raise LoopyError("temporary variable '{tv}' already has " + "a defined storage array -- cannot alias" + .format(tv=tv.name)) + + new_temporary_variables[tv.name] = \ + tv.copy(base_storage=base_name) + else: + new_temporary_variables[tv.name] = tv + + return knl.copy( + instructions=new_insns, + temporary_variables=new_temporary_variables) + +# }}} + + +# {{{ set argument order + +def set_argument_order(kernel, arg_names): + """ + :arg arg_names: A list (or comma-separated string) or argument + names. All arguments must be in this list. + """ + + if isinstance(arg_names, str): + arg_names = arg_names.split(",") + + new_args = [] + old_arg_dict = kernel.arg_dict.copy() + + for arg_name in arg_names: + try: + arg = old_arg_dict.pop(arg_name) + except KeyError: + raise LoopyError("unknown argument '%s'" + % arg_name) + + new_args.append(arg) + + if old_arg_dict: + raise LoopyError("incomplete argument list passed " + "to set_argument_order. Left over: '%s'" + % ", ".join(arg_name for arg_name in old_arg_dict)) + + return kernel.copy(args=new_args) + +# }}} + + +# vim: foldmethod=marker diff --git a/loopy/diff.py b/loopy/transform/diff.py similarity index 100% rename from loopy/diff.py rename to loopy/transform/diff.py diff --git a/loopy/fusion.py b/loopy/transform/fusion.py similarity index 100% rename from loopy/fusion.py rename to loopy/transform/fusion.py diff --git a/loopy/ilp.py b/loopy/transform/ilp.py similarity index 100% rename from loopy/ilp.py rename to loopy/transform/ilp.py diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py new file mode 100644 index 0000000000000000000000000000000000000000..d92a3061583be0070f2e40eb2f82361b1f39496f --- /dev/null +++ b/loopy/transform/iname.py @@ -0,0 +1,1042 @@ +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import six +from six.moves import zip + +import islpy as isl +from islpy import dim_type + +from loopy.symbolic import ( + RuleAwareIdentityMapper, RuleAwareSubstitutionMapper, + SubstitutionRuleMappingContext) +from loopy.diagnostic import LoopyError + + +# {{{ assume + +def assume(kernel, assumptions): + if isinstance(assumptions, str): + assumptions_set_str = "[%s] -> { : %s}" \ + % (",".join(s for s in kernel.outer_params()), + assumptions) + assumptions = isl.BasicSet.read_from_str(kernel.domains[0].get_ctx(), + assumptions_set_str) + + if not isinstance(assumptions, isl.BasicSet): + raise TypeError("'assumptions' must be a BasicSet or a string") + + old_assumptions, new_assumptions = isl.align_two(kernel.assumptions, assumptions) + + return kernel.copy( + assumptions=old_assumptions.params() & new_assumptions.params()) + +# }}} + + +# {{{ set loop priority + +def set_loop_priority(kernel, loop_priority): + """Indicates the textual order in which loops should be entered in the + kernel code. Note that this priority has an advisory role only. If the + kernel logically requires a different nesting, priority is ignored. + Priority is only considered if loop nesting is ambiguous. + + :arg: an iterable of inames, or, for brevity, a comma-seaprated string of + inames + """ + + if isinstance(loop_priority, str): + loop_priority = [s.strip() for s in loop_priority.split(",")] + + return kernel.copy(loop_priority=loop_priority) + +# }}} + + +# {{{ split inames + +class _InameSplitter(RuleAwareIdentityMapper): + def __init__(self, rule_mapping_context, within, + split_iname, outer_iname, inner_iname, replacement_index): + super(_InameSplitter, self).__init__(rule_mapping_context) + + self.within = within + + self.split_iname = split_iname + self.outer_iname = outer_iname + self.inner_iname = inner_iname + + self.replacement_index = replacement_index + + def map_reduction(self, expr, expn_state): + if (self.split_iname in expr.inames + and self.split_iname not in expn_state.arg_context + and self.within( + expn_state.kernel, + expn_state.instruction, + expn_state.stack)): + new_inames = list(expr.inames) + new_inames.remove(self.split_iname) + new_inames.extend([self.outer_iname, self.inner_iname]) + + from loopy.symbolic import Reduction + return Reduction(expr.operation, tuple(new_inames), + self.rec(expr.expr, expn_state)) + else: + return super(_InameSplitter, self).map_reduction(expr, expn_state) + + def map_variable(self, expr, expn_state): + if (expr.name == self.split_iname + and self.split_iname not in expn_state.arg_context + and self.within( + expn_state.kernel, + expn_state.instruction, + expn_state.stack)): + return self.replacement_index + else: + return super(_InameSplitter, self).map_variable(expr, expn_state) + + +def split_iname(kernel, split_iname, inner_length, + outer_iname=None, inner_iname=None, + outer_tag=None, inner_tag=None, + slabs=(0, 0), do_tagged_check=True, + within=None): + """ + :arg within: a stack match as understood by + :func:`loopy.context_matching.parse_stack_match`. + """ + + existing_tag = kernel.iname_to_tag.get(split_iname) + from loopy.kernel.data import ForceSequentialTag + if do_tagged_check and ( + existing_tag is not None + and not isinstance(existing_tag, ForceSequentialTag)): + raise LoopyError("cannot split already tagged iname '%s'" % split_iname) + + if split_iname not in kernel.all_inames(): + raise ValueError("cannot split loop for unknown variable '%s'" % split_iname) + + applied_iname_rewrites = kernel.applied_iname_rewrites[:] + + vng = kernel.get_var_name_generator() + + if outer_iname is None: + outer_iname = vng(split_iname+"_outer") + if inner_iname is None: + inner_iname = vng(split_iname+"_inner") + + def process_set(s): + var_dict = s.get_var_dict() + + if split_iname not in var_dict: + return s + + orig_dim_type, _ = var_dict[split_iname] + + outer_var_nr = s.dim(orig_dim_type) + inner_var_nr = s.dim(orig_dim_type)+1 + + s = s.add_dims(orig_dim_type, 2) + s = s.set_dim_name(orig_dim_type, outer_var_nr, outer_iname) + s = s.set_dim_name(orig_dim_type, inner_var_nr, inner_iname) + + from loopy.isl_helpers import make_slab + + space = s.get_space() + inner_constraint_set = ( + make_slab(space, inner_iname, 0, inner_length) + # name = inner + length*outer + .add_constraint(isl.Constraint.eq_from_names( + space, { + split_iname: 1, + inner_iname: -1, + outer_iname: -inner_length}))) + + name_dim_type, name_idx = space.get_var_dict()[split_iname] + s = s.intersect(inner_constraint_set) + + if within is None: + s = s.project_out(name_dim_type, name_idx, 1) + + return s + + new_domains = [process_set(dom) for dom in kernel.domains] + + from pymbolic import var + inner = var(inner_iname) + outer = var(outer_iname) + new_loop_index = inner + outer*inner_length + + subst_map = {var(split_iname): new_loop_index} + applied_iname_rewrites.append(subst_map) + + # {{{ update forced_iname deps + + new_insns = [] + for insn in kernel.instructions: + if split_iname in insn.forced_iname_deps: + new_forced_iname_deps = ( + (insn.forced_iname_deps.copy() + - frozenset([split_iname])) + | frozenset([outer_iname, inner_iname])) + else: + new_forced_iname_deps = insn.forced_iname_deps + + insn = insn.copy( + forced_iname_deps=new_forced_iname_deps) + + new_insns.append(insn) + + # }}} + + iname_slab_increments = kernel.iname_slab_increments.copy() + iname_slab_increments[outer_iname] = slabs + + new_loop_priority = [] + for prio_iname in kernel.loop_priority: + if prio_iname == split_iname: + new_loop_priority.append(outer_iname) + new_loop_priority.append(inner_iname) + else: + new_loop_priority.append(prio_iname) + + kernel = kernel.copy( + domains=new_domains, + iname_slab_increments=iname_slab_increments, + instructions=new_insns, + applied_iname_rewrites=applied_iname_rewrites, + loop_priority=new_loop_priority) + + from loopy.context_matching import parse_stack_match + within = parse_stack_match(within) + + rule_mapping_context = SubstitutionRuleMappingContext( + kernel.substitutions, kernel.get_var_name_generator()) + ins = _InameSplitter(rule_mapping_context, within, + split_iname, outer_iname, inner_iname, new_loop_index) + + kernel = ins.map_kernel(kernel) + kernel = rule_mapping_context.finish_kernel(kernel) + + if existing_tag is not None: + kernel = tag_inames(kernel, + {outer_iname: existing_tag, inner_iname: existing_tag}) + + return tag_inames(kernel, {outer_iname: outer_tag, inner_iname: inner_tag}) + +# }}} + + +# {{{ join inames + +class _InameJoiner(RuleAwareSubstitutionMapper): + def __init__(self, rule_mapping_context, within, subst_func, + joined_inames, new_iname): + super(_InameJoiner, self).__init__(rule_mapping_context, + subst_func, within) + + self.joined_inames = set(joined_inames) + self.new_iname = new_iname + + def map_reduction(self, expr, expn_state): + expr_inames = set(expr.inames) + overlap = (self.join_inames & expr_inames + - set(expn_state.arg_context)) + if overlap and self.within( + expn_state.kernel, + expn_state.instruction, + expn_state.stack): + if overlap != expr_inames: + raise LoopyError( + "Cannot join inames '%s' if there is a reduction " + "that does not use all of the inames being joined. " + "(Found one with just '%s'.)" + % ( + ", ".join(self.joined_inames), + ", ".join(expr_inames))) + + new_inames = expr_inames - self.joined_inames + new_inames.add(self.new_iname) + + from loopy.symbolic import Reduction + return Reduction(expr.operation, tuple(new_inames), + self.rec(expr.expr, expn_state)) + else: + return super(_InameJoiner, self).map_reduction(expr, expn_state) + + +def join_inames(kernel, inames, new_iname=None, tag=None, within=None): + """ + :arg inames: fastest varying last + :arg within: a stack match as understood by + :func:`loopy.context_matching.parse_stack_match`. + """ + + # now fastest varying first + inames = inames[::-1] + + if new_iname is None: + new_iname = kernel.get_var_name_generator()("_and_".join(inames)) + + from loopy.kernel.tools import DomainChanger + domch = DomainChanger(kernel, frozenset(inames)) + for iname in inames: + if kernel.get_home_domain_index(iname) != domch.leaf_domain_index: + raise LoopyError("iname '%s' is not 'at home' in the " + "join's leaf domain" % iname) + + new_domain = domch.domain + new_dim_idx = new_domain.dim(dim_type.set) + new_domain = new_domain.add_dims(dim_type.set, 1) + new_domain = new_domain.set_dim_name(dim_type.set, new_dim_idx, new_iname) + + joint_aff = zero = isl.Aff.zero_on_domain(new_domain.space) + subst_dict = {} + base_divisor = 1 + + from pymbolic import var + + for i, iname in enumerate(inames): + iname_dt, iname_idx = zero.get_space().get_var_dict()[iname] + iname_aff = zero.add_coefficient_val(iname_dt, iname_idx, 1) + + joint_aff = joint_aff + base_divisor*iname_aff + + bounds = kernel.get_iname_bounds(iname, constants_only=True) + + from loopy.isl_helpers import ( + static_max_of_pw_aff, static_value_of_pw_aff) + from loopy.symbolic import pw_aff_to_expr + + length = int(pw_aff_to_expr( + static_max_of_pw_aff(bounds.size, constants_only=True))) + + try: + lower_bound_aff = static_value_of_pw_aff( + bounds.lower_bound_pw_aff.coalesce(), + constants_only=False) + except Exception as e: + raise type(e)("while finding lower bound of '%s': " % iname) + + my_val = var(new_iname) // base_divisor + if i+1 < len(inames): + my_val %= length + my_val += pw_aff_to_expr(lower_bound_aff) + subst_dict[iname] = my_val + + base_divisor *= length + + from loopy.isl_helpers import iname_rel_aff + new_domain = new_domain.add_constraint( + isl.Constraint.equality_from_aff( + iname_rel_aff(new_domain.get_space(), new_iname, "==", joint_aff))) + + for i, iname in enumerate(inames): + iname_to_dim = new_domain.get_space().get_var_dict() + iname_dt, iname_idx = iname_to_dim[iname] + + if within is None: + new_domain = new_domain.project_out(iname_dt, iname_idx, 1) + + def subst_forced_iname_deps(fid): + result = set() + for iname in fid: + if iname in inames: + result.add(new_iname) + else: + result.add(iname) + + return frozenset(result) + + new_insns = [ + insn.copy( + forced_iname_deps=subst_forced_iname_deps(insn.forced_iname_deps)) + for insn in kernel.instructions] + + kernel = (kernel + .copy( + instructions=new_insns, + domains=domch.get_domains_with(new_domain), + applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_dict] + )) + + from loopy.context_matching import parse_stack_match + within = parse_stack_match(within) + + from pymbolic.mapper.substitutor import make_subst_func + rule_mapping_context = SubstitutionRuleMappingContext( + kernel.substitutions, kernel.get_var_name_generator()) + ijoin = _InameJoiner(rule_mapping_context, within, + make_subst_func(subst_dict), + inames, new_iname) + + kernel = rule_mapping_context.finish_kernel( + ijoin.map_kernel(kernel)) + + if tag is not None: + kernel = tag_inames(kernel, {new_iname: tag}) + + return kernel + +# }}} + + +# {{{ tag inames + +def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): + from loopy.kernel.data import parse_tag + + iname_to_tag = dict((iname, parse_tag(tag)) + for iname, tag in six.iteritems(iname_to_tag)) + + from loopy.kernel.data import (ParallelTag, AutoLocalIndexTagBase, + ForceSequentialTag) + + new_iname_to_tag = kernel.iname_to_tag.copy() + for iname, new_tag in six.iteritems(iname_to_tag): + if iname not in kernel.all_inames(): + if ignore_nonexistent: + continue + else: + raise LoopyError("iname '%s' does not exist" % iname) + + old_tag = kernel.iname_to_tag.get(iname) + + retag_ok = False + + if isinstance(old_tag, (AutoLocalIndexTagBase, ForceSequentialTag)): + retag_ok = True + + if not retag_ok and old_tag is not None and new_tag is None: + raise ValueError("cannot untag iname '%s'" % iname) + + if iname not in kernel.all_inames(): + raise ValueError("cannot tag '%s'--not known" % iname) + + if isinstance(new_tag, ParallelTag) \ + and isinstance(old_tag, ForceSequentialTag): + raise ValueError("cannot tag '%s' as parallel--" + "iname requires sequential execution" % iname) + + if isinstance(new_tag, ForceSequentialTag) \ + and isinstance(old_tag, ParallelTag): + raise ValueError("'%s' is already tagged as parallel, " + "but is now prohibited from being parallel " + "(likely because of participation in a precompute or " + "a reduction)" % iname) + + if (not retag_ok) and (not force) \ + and old_tag is not None and (old_tag != new_tag): + raise LoopyError("'%s' is already tagged '%s'--cannot retag" + % (iname, old_tag)) + + new_iname_to_tag[iname] = new_tag + + return kernel.copy(iname_to_tag=new_iname_to_tag) + +# }}} + + +# {{{ duplicate inames + +class _InameDuplicator(RuleAwareIdentityMapper): + def __init__(self, rule_mapping_context, + old_to_new, within): + super(_InameDuplicator, self).__init__(rule_mapping_context) + + self.old_to_new = old_to_new + self.old_inames_set = set(six.iterkeys(old_to_new)) + self.within = within + + def map_reduction(self, expr, expn_state): + if (set(expr.inames) & self.old_inames_set + and self.within( + expn_state.kernel, + expn_state.instruction, + expn_state.stack)): + new_inames = tuple( + self.old_to_new.get(iname, iname) + if iname not in expn_state.arg_context + else iname + for iname in expr.inames) + + from loopy.symbolic import Reduction + return Reduction(expr.operation, new_inames, + self.rec(expr.expr, expn_state)) + else: + return super(_InameDuplicator, self).map_reduction(expr, expn_state) + + def map_variable(self, expr, expn_state): + new_name = self.old_to_new.get(expr.name) + + if (new_name is None + or expr.name in expn_state.arg_context + or not self.within( + expn_state.kernel, + expn_state.instruction, + expn_state.stack)): + return super(_InameDuplicator, self).map_variable(expr, expn_state) + else: + from pymbolic import var + return var(new_name) + + def map_instruction(self, kernel, insn): + if not self.within(kernel, insn, ()): + return insn + + new_fid = frozenset( + self.old_to_new.get(iname, iname) + for iname in insn.forced_iname_deps) + return insn.copy(forced_iname_deps=new_fid) + + +def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, + tags={}): + """ + :arg within: a stack match as understood by + :func:`loopy.context_matching.parse_stack_match`. + """ + + # {{{ normalize arguments, find unique new_inames + + if isinstance(inames, str): + inames = [iname.strip() for iname in inames.split(",")] + + if isinstance(new_inames, str): + new_inames = [iname.strip() for iname in new_inames.split(",")] + + from loopy.context_matching import parse_stack_match + within = parse_stack_match(within) + + if new_inames is None: + new_inames = [None] * len(inames) + + if len(new_inames) != len(inames): + raise ValueError("new_inames must have the same number of entries as inames") + + name_gen = knl.get_var_name_generator() + + for i, iname in enumerate(inames): + new_iname = new_inames[i] + + if new_iname is None: + new_iname = iname + + if suffix is not None: + new_iname += suffix + + new_iname = name_gen(new_iname) + + else: + if name_gen.is_name_conflicting(new_iname): + raise ValueError("new iname '%s' conflicts with existing names" + % new_iname) + + name_gen.add_name(new_iname) + + new_inames[i] = new_iname + + # }}} + + # {{{ duplicate the inames + + for old_iname, new_iname in zip(inames, new_inames): + from loopy.kernel.tools import DomainChanger + domch = DomainChanger(knl, frozenset([old_iname])) + + from loopy.isl_helpers import duplicate_axes + knl = knl.copy( + domains=domch.get_domains_with( + duplicate_axes(domch.domain, [old_iname], [new_iname]))) + + # }}} + + # {{{ change the inames in the code + + rule_mapping_context = SubstitutionRuleMappingContext( + knl.substitutions, name_gen) + indup = _InameDuplicator(rule_mapping_context, + old_to_new=dict(list(zip(inames, new_inames))), + within=within) + + knl = rule_mapping_context.finish_kernel( + indup.map_kernel(knl)) + + # }}} + + # {{{ realize tags + + for old_iname, new_iname in zip(inames, new_inames): + new_tag = tags.get(old_iname) + if new_tag is not None: + knl = tag_inames(knl, {new_iname: new_tag}) + + # }}} + + return knl + +# }}} + + +# {{{ rename_inames + +def rename_iname(knl, old_iname, new_iname, existing_ok=False, within=None): + """ + :arg within: a stack match as understood by + :func:`loopy.context_matching.parse_stack_match`. + :arg existing_ok: execute even if *new_iname* already exists + """ + + var_name_gen = knl.get_var_name_generator() + + does_exist = var_name_gen.is_name_conflicting(new_iname) + + if does_exist and not existing_ok: + raise ValueError("iname '%s' conflicts with an existing identifier" + "--cannot rename" % new_iname) + + if does_exist: + # {{{ check that the domains match up + + dom = knl.get_inames_domain(frozenset((old_iname, new_iname))) + + var_dict = dom.get_var_dict() + _, old_idx = var_dict[old_iname] + _, new_idx = var_dict[new_iname] + + par_idx = dom.dim(dim_type.param) + dom_old = dom.move_dims( + dim_type.param, par_idx, dim_type.set, old_idx, 1) + dom_old = dom_old.move_dims( + dim_type.set, dom_old.dim(dim_type.set), dim_type.param, par_idx, 1) + dom_old = dom_old.project_out( + dim_type.set, new_idx if new_idx < old_idx else new_idx - 1, 1) + + par_idx = dom.dim(dim_type.param) + dom_new = dom.move_dims( + dim_type.param, par_idx, dim_type.set, new_idx, 1) + dom_new = dom_new.move_dims( + dim_type.set, dom_new.dim(dim_type.set), dim_type.param, par_idx, 1) + dom_new = dom_new.project_out( + dim_type.set, old_idx if old_idx < new_idx else old_idx - 1, 1) + + if not (dom_old <= dom_new and dom_new <= dom_old): + raise LoopyError( + "inames {old} and {new} do not iterate over the same domain" + .format(old=old_iname, new=new_iname)) + + # }}} + + from pymbolic import var + subst_dict = {old_iname: var(new_iname)} + + from loopy.context_matching import parse_stack_match + within = parse_stack_match(within) + + from pymbolic.mapper.substitutor import make_subst_func + rule_mapping_context = SubstitutionRuleMappingContext( + knl.substitutions, var_name_gen) + ijoin = RuleAwareSubstitutionMapper(rule_mapping_context, + make_subst_func(subst_dict), within) + + knl = rule_mapping_context.finish_kernel( + ijoin.map_kernel(knl)) + + new_instructions = [] + for insn in knl.instructions: + if (old_iname in insn.forced_iname_deps + and within(knl, insn, ())): + insn = insn.copy( + forced_iname_deps=( + (insn.forced_iname_deps - frozenset([old_iname])) + | frozenset([new_iname]))) + + new_instructions.append(insn) + + knl = knl.copy(instructions=new_instructions) + + else: + knl = duplicate_inames( + knl, [old_iname], within=within, new_inames=[new_iname]) + + knl = remove_unused_inames(knl, [old_iname]) + + return knl + +# }}} + + +# {{{ link inames + +def link_inames(knl, inames, new_iname, within=None, tag=None): + # {{{ normalize arguments + + if isinstance(inames, str): + inames = inames.split(",") + + var_name_gen = knl.get_var_name_generator() + new_iname = var_name_gen(new_iname) + + # }}} + + # {{{ ensure that each iname is used at most once in each instruction + + inames_set = set(inames) + + if 0: + # FIXME! + for insn in knl.instructions: + insn_inames = knl.insn_inames(insn.id) | insn.reduction_inames() + + if len(insn_inames & inames_set) > 1: + raise LoopyError("To-be-linked inames '%s' are used in " + "instruction '%s'. No more than one such iname can " + "be used in one instruction." + % (", ".join(insn_inames & inames_set), insn.id)) + + # }}} + + from loopy.kernel.tools import DomainChanger + domch = DomainChanger(knl, tuple(inames)) + + # {{{ ensure that projections are identical + + unrelated_dom_inames = list( + set(domch.domain.get_var_names(dim_type.set)) + - inames_set) + + domain = domch.domain + + # move all inames to be linked to end to prevent shuffly confusion + for iname in inames: + dt, index = domain.get_var_dict()[iname] + assert dt == dim_type.set + + # move to tail of param dim_type + domain = domain.move_dims( + dim_type.param, domain.dim(dim_type.param), + dt, index, 1) + # move to tail of set dim_type + domain = domain.move_dims( + dim_type.set, domain.dim(dim_type.set), + dim_type.param, domain.dim(dim_type.param)-1, 1) + + projections = [ + domch.domain.project_out_except( + unrelated_dom_inames + [iname], [dim_type.set]) + for iname in inames] + + all_equal = True + first_proj = projections[0] + for proj in projections[1:]: + all_equal = all_equal and (proj <= first_proj and first_proj <= proj) + + if not all_equal: + raise LoopyError("Inames cannot be linked because their domain " + "constraints are not the same.") + + del domain # messed up for testing, do not use + + # }}} + + # change the domain + from loopy.isl_helpers import duplicate_axes + knl = knl.copy( + domains=domch.get_domains_with( + duplicate_axes(domch.domain, [inames[0]], [new_iname]))) + + # {{{ change the code + + from pymbolic import var + subst_dict = dict((iname, var(new_iname)) for iname in inames) + + from loopy.context_matching import parse_stack_match + within = parse_stack_match(within) + + from pymbolic.mapper.substitutor import make_subst_func + rule_mapping_context = SubstitutionRuleMappingContext( + knl.substitutions, var_name_gen) + ijoin = RuleAwareSubstitutionMapper(rule_mapping_context, + make_subst_func(subst_dict), within) + + knl = rule_mapping_context.finish_kernel( + ijoin.map_kernel(knl)) + + # }}} + + knl = remove_unused_inames(knl, inames) + + if tag is not None: + knl = tag_inames(knl, {new_iname: tag}) + + return knl + +# }}} + + +# {{{ remove unused inames + +def remove_unused_inames(knl, inames=None): + """Delete those among *inames* that are unused, i.e. project them + out of the domain. If these inames pose implicit restrictions on + other inames, these restrictions will persist as existentially + quantified variables. + + :arg inames: may be an iterable of inames or a string of comma-separated inames. + """ + + # {{{ normalize arguments + + if inames is None: + inames = knl.all_inames() + elif isinstance(inames, str): + inames = inames.split(",") + + # }}} + + # {{{ check which inames are unused + + inames = set(inames) + used_inames = set() + for insn in knl.instructions: + used_inames.update(knl.insn_inames(insn.id)) + + unused_inames = inames - used_inames + + # }}} + + # {{{ remove them + + from loopy.kernel.tools import DomainChanger + + for iname in unused_inames: + domch = DomainChanger(knl, (iname,)) + + dom = domch.domain + dt, idx = dom.get_var_dict()[iname] + dom = dom.project_out(dt, idx, 1) + + knl = knl.copy(domains=domch.get_domains_with(dom)) + + # }}} + + return knl + +# }}} + + +# {{{ affine map inames + +def affine_map_inames(kernel, old_inames, new_inames, equations): + """Return a new *kernel* where the affine transform + specified by *equations* has been applied to the inames. + + :arg old_inames: A list of inames to be replaced by affine transforms + of their values. + May also be a string of comma-separated inames. + + :arg new_inames: A list of new inames that are not yet used in *kernel*, + but have their values established in terms of *old_inames* by + *equations*. + May also be a string of comma-separated inames. + :arg equations: A list of equations estabilishing a relationship + between *old_inames* and *new_inames*. Each equation may be + a tuple ``(lhs, rhs)`` of expressions or a string, with left and + right hand side of the equation separated by ``=``. + """ + + # {{{ check and parse arguments + + if isinstance(new_inames, str): + new_inames = new_inames.split(",") + new_inames = [iname.strip() for iname in new_inames] + if isinstance(old_inames, str): + old_inames = old_inames.split(",") + old_inames = [iname.strip() for iname in old_inames] + if isinstance(equations, str): + equations = [equations] + + import re + eqn_re = re.compile(r"^([^=]+)=([^=]+)$") + + def parse_equation(eqn): + if isinstance(eqn, str): + eqn_match = eqn_re.match(eqn) + if not eqn_match: + raise ValueError("invalid equation: %s" % eqn) + + from loopy.symbolic import parse + lhs = parse(eqn_match.group(1)) + rhs = parse(eqn_match.group(2)) + return (lhs, rhs) + elif isinstance(eqn, tuple): + if len(eqn) != 2: + raise ValueError("unexpected length of equation tuple, " + "got %d, should be 2" % len(eqn)) + return eqn + else: + raise ValueError("unexpected type of equation" + "got %d, should be string or tuple" + % type(eqn).__name__) + + equations = [parse_equation(eqn) for eqn in equations] + + all_vars = kernel.all_variable_names() + for iname in new_inames: + if iname in all_vars: + raise LoopyError("new iname '%s' is already used in kernel" + % iname) + + for iname in old_inames: + if iname not in kernel.all_inames(): + raise LoopyError("old iname '%s' not known" % iname) + + # }}} + + # {{{ substitute iname use + + from pymbolic.algorithm import solve_affine_equations_for + old_inames_to_expr = solve_affine_equations_for(old_inames, equations) + + subst_dict = dict( + (v.name, expr) + for v, expr in old_inames_to_expr.items()) + + var_name_gen = kernel.get_var_name_generator() + + from pymbolic.mapper.substitutor import make_subst_func + from loopy.context_matching import parse_stack_match + + rule_mapping_context = SubstitutionRuleMappingContext( + kernel.substitutions, var_name_gen) + old_to_new = RuleAwareSubstitutionMapper(rule_mapping_context, + make_subst_func(subst_dict), within=parse_stack_match(None)) + + kernel = ( + rule_mapping_context.finish_kernel( + old_to_new.map_kernel(kernel)) + .copy( + applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_dict] + )) + + # }}} + + # {{{ change domains + + new_inames_set = set(new_inames) + old_inames_set = set(old_inames) + + new_domains = [] + for idom, dom in enumerate(kernel.domains): + dom_var_dict = dom.get_var_dict() + old_iname_overlap = [ + iname + for iname in old_inames + if iname in dom_var_dict] + + if not old_iname_overlap: + new_domains.append(dom) + continue + + from loopy.symbolic import get_dependencies + dom_new_inames = set() + dom_old_inames = set() + + # mapping for new inames to dim_types + new_iname_dim_types = {} + + dom_equations = [] + for iname in old_iname_overlap: + for ieqn, (lhs, rhs) in enumerate(equations): + eqn_deps = get_dependencies(lhs) | get_dependencies(rhs) + if iname in eqn_deps: + dom_new_inames.update(eqn_deps & new_inames_set) + dom_old_inames.update(eqn_deps & old_inames_set) + + if dom_old_inames: + dom_equations.append((lhs, rhs)) + + this_eqn_old_iname_dim_types = set( + dom_var_dict[old_iname][0] + for old_iname in eqn_deps & old_inames_set) + + if this_eqn_old_iname_dim_types: + if len(this_eqn_old_iname_dim_types) > 1: + raise ValueError("inames '%s' (from equation %d (0-based)) " + "in domain %d (0-based) are not " + "of a uniform dim_type" + % (", ".join(eqn_deps & old_inames_set), ieqn, idom)) + + this_eqn_new_iname_dim_type, = this_eqn_old_iname_dim_types + + for new_iname in eqn_deps & new_inames_set: + if new_iname in new_iname_dim_types: + if (this_eqn_new_iname_dim_type + != new_iname_dim_types[new_iname]): + raise ValueError("dim_type disagreement for " + "iname '%s' (from equation %d (0-based)) " + "in domain %d (0-based)" + % (new_iname, ieqn, idom)) + else: + new_iname_dim_types[new_iname] = \ + this_eqn_new_iname_dim_type + + if not dom_old_inames <= set(dom_var_dict): + raise ValueError("domain %d (0-based) does not know about " + "all old inames (specifically '%s') needed to define new inames" + % (idom, ", ".join(dom_old_inames - set(dom_var_dict)))) + + # add inames to domain with correct dim_types + dom_new_inames = list(dom_new_inames) + for iname in dom_new_inames: + dt = new_iname_dim_types[iname] + iname_idx = dom.dim(dt) + dom = dom.add_dims(dt, 1) + dom = dom.set_dim_name(dt, iname_idx, iname) + + # add equations + from loopy.symbolic import aff_from_expr + for lhs, rhs in dom_equations: + dom = dom.add_constraint( + isl.Constraint.equality_from_aff( + aff_from_expr(dom.space, rhs - lhs))) + + # project out old inames + for iname in dom_old_inames: + dt, idx = dom.get_var_dict()[iname] + dom = dom.project_out(dt, idx, 1) + + new_domains.append(dom) + + # }}} + + return kernel.copy(domains=new_domains) + +# }}} + + +# vim: foldmethod=marker diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py new file mode 100644 index 0000000000000000000000000000000000000000..bc2b68fb1069fcd41ed6b6595430c33f73b5eea1 --- /dev/null +++ b/loopy/transform/instruction.py @@ -0,0 +1,149 @@ +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import six # noqa + +from loopy.diagnostic import LoopyError + + +# {{{ instruction processing + +def find_instructions(kernel, insn_match): + from loopy.context_matching import parse_match + match = parse_match(insn_match) + return [insn for insn in kernel.instructions if match(kernel, insn)] + + +def map_instructions(kernel, insn_match, f): + from loopy.context_matching import parse_match + match = parse_match(insn_match) + + new_insns = [] + + for insn in kernel.instructions: + if match(kernel, insn): + new_insns.append(f(insn)) + else: + new_insns.append(insn) + + return kernel.copy(instructions=new_insns) + + +def set_instruction_priority(kernel, insn_match, priority): + """Set the priority of instructions matching *insn_match* to *priority*. + + *insn_match* may be any instruction id match understood by + :func:`loopy.context_matching.parse_match`. + """ + + def set_prio(insn): + return insn.copy(priority=priority) + + return map_instructions(kernel, insn_match, set_prio) + + +def add_dependency(kernel, insn_match, dependency): + """Add the instruction dependency *dependency* to the instructions matched + by *insn_match*. + + *insn_match* may be any instruction id match understood by + :func:`loopy.context_matching.parse_match`. + """ + + if dependency not in kernel.id_to_insn: + raise LoopyError("cannot add dependency on non-existent instruction ID '%s'" + % dependency) + + def add_dep(insn): + new_deps = insn.insn_deps + added_deps = frozenset([dependency]) + if new_deps is None: + new_deps = added_deps + else: + new_deps = new_deps | added_deps + + return insn.copy(insn_deps=new_deps) + + return map_instructions(kernel, insn_match, add_dep) + + +def remove_instructions(kernel, insn_ids): + """Return a new kernel with instructions in *insn_ids* removed. + + Dependencies across (one, for now) deleted isntructions are propagated. + Behavior is undefined for now for chains of dependencies within the + set of deleted instructions. + """ + + if not insn_ids: + return kernel + + assert isinstance(insn_ids, set) + id_to_insn = kernel.id_to_insn + + new_insns = [] + for insn in kernel.instructions: + if insn.id in insn_ids: + continue + + # transitively propagate dependencies + # (only one level for now) + if insn.insn_deps is None: + insn_deps = frozenset() + else: + insn_deps = insn.insn_deps + + new_deps = insn_deps - insn_ids + + for dep_id in insn_deps & insn_ids: + new_deps = new_deps | id_to_insn[dep_id].insn_deps + + new_insns.append(insn.copy(insn_deps=frozenset(new_deps))) + + return kernel.copy( + instructions=new_insns) + +# }}} + + +# {{{ tag_instructions + +def tag_instructions(kernel, new_tag, within=None): + from loopy.context_matching import parse_match + within = parse_match(within) + + new_insns = [] + for insn in kernel.instructions: + if within(kernel, insn): + new_insns.append( + insn.copy(tags=insn.tags + (new_tag,))) + else: + new_insns.append(insn) + + return kernel.copy(instructions=new_insns) + +# }}} + + +# vim: foldmethod=marker diff --git a/loopy/padding.py b/loopy/transform/padding.py similarity index 100% rename from loopy/padding.py rename to loopy/transform/padding.py diff --git a/loopy/precompute.py b/loopy/transform/precompute.py similarity index 99% rename from loopy/precompute.py rename to loopy/transform/precompute.py index a8b082d0ad36dd71aefba9e29a01b44b1b1fc01f..fc10daa49ab400e64db7d48ed42ca3e17c8f58a1 100644 --- a/loopy/precompute.py +++ b/loopy/transform/precompute.py @@ -35,7 +35,7 @@ import numpy as np from pymbolic import var -from loopy.array_buffer_map import (ArrayToBufferMap, NoOpArrayToBufferMap, +from loopy.transform.array_buffer_map import (ArrayToBufferMap, NoOpArrayToBufferMap, AccessDescriptor) diff --git a/loopy/subst.py b/loopy/transform/subst.py similarity index 95% rename from loopy/subst.py rename to loopy/transform/subst.py index 5211be2d9a7e2163f87dbf27b131497ef485e0db..982abe01a390677c943de7a1655713f3a24cf4a2 100644 --- a/loopy/subst.py +++ b/loopy/transform/subst.py @@ -43,6 +43,8 @@ class ExprDescriptor(Record): __slots__ = ["insn", "expr", "unif_var_dict"] +# {{{ extract_subst + def extract_subst(kernel, subst_name, template, parameters=()): """ :arg subst_name: The name of the substitution rule to be created. @@ -197,6 +199,8 @@ def extract_subst(kernel, subst_name, template, parameters=()): instructions=new_insns, substitutions=new_substs) +# }}} + # {{{ assignment_to_subst @@ -449,6 +453,8 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None, # }}} +# {{{ expand_subst + def expand_subst(kernel, within=None): logger.debug("%s: expand subst" % kernel.name) @@ -463,4 +469,34 @@ def expand_subst(kernel, within=None): return rule_mapping_context.finish_kernel(submap.map_kernel(kernel)) +# }}} + + +# {{{ find substitution rules by glob patterns + +def find_rules_matching(knl, pattern): + """ + :pattern: A shell-style glob pattern. + """ + + from loopy.context_matching import re_from_glob + pattern = re_from_glob(pattern) + + return [r for r in knl.substitutions if pattern.match(r)] + + +def find_one_rule_matching(knl, pattern): + rules = find_rules_matching(knl, pattern) + + if len(rules) > 1: + raise ValueError("more than one substitution rule matched '%s'" + % pattern) + if not rules: + raise ValueError("no substitution rule matched '%s'" % pattern) + + return rules[0] + +# }}} + + # vim: foldmethod=marker diff --git a/test/test_diff.py b/test/test_diff.py index a13e6d16129ceb997b55bc2990914795ed146bee..d6e2ae156f23259d16cb06040f48fefc0d565661 100644 --- a/test/test_diff.py +++ b/test/test_diff.py @@ -61,7 +61,7 @@ def test_diff(ctx_factory): knl = lp.fix_parameters(knl, n=50) - from loopy.diff import diff_kernel + from loopy.transform.diff import diff_kernel dknl, diff_map = diff_kernel(knl, "z", "x") dknl = lp.remove_unused_arguments(dknl)