diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index e33d260fba4f3f4122f35e033ecc573b41999d5d..0cf0ff3fb96c0e913e8229615042cb2a3e20ce1c 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -42,36 +42,36 @@ logger = logging.getLogger(__name__) # {{{ add and infer argument dtypes -def add_dtypes(knl, dtype_dict): +def add_dtypes(kernel, dtype_dict): """Specify remaining unspecified argument/temporary variable types. :arg dtype_dict: a mapping from variable names to :class:`numpy.dtype` instances """ - dtype_dict_remainder, new_args, new_temp_vars = _add_dtypes(knl, dtype_dict) + dtype_dict_remainder, new_args, new_temp_vars = _add_dtypes(kernel, dtype_dict) if dtype_dict_remainder: raise RuntimeError("unused argument dtypes: %s" % ", ".join(dtype_dict_remainder)) - return knl.copy(args=new_args, temporary_variables=new_temp_vars) + return kernel.copy(args=new_args, temporary_variables=new_temp_vars) -def _add_dtypes_overdetermined(knl, dtype_dict): - dtype_dict_remainder, new_args, new_temp_vars = _add_dtypes(knl, dtype_dict) +def _add_dtypes_overdetermined(kernel, dtype_dict): + dtype_dict_remainder, new_args, new_temp_vars = _add_dtypes(kernel, dtype_dict) # do not throw error for unused args - return knl.copy(args=new_args, temporary_variables=new_temp_vars) + return kernel.copy(args=new_args, temporary_variables=new_temp_vars) -def _add_dtypes(knl, dtype_dict): +def _add_dtypes(kernel, dtype_dict): dtype_dict = dtype_dict.copy() new_args = [] from loopy.types import to_loopy_type - for arg in knl.args: + for arg in kernel.args: new_dtype = dtype_dict.pop(arg.name, None) if new_dtype is not None: - new_dtype = to_loopy_type(new_dtype, target=knl.target) + new_dtype = to_loopy_type(new_dtype, target=kernel.target) if arg.dtype is not None and arg.dtype != new_dtype: raise RuntimeError( "argument '%s' already has a different dtype " @@ -81,10 +81,10 @@ def _add_dtypes(knl, dtype_dict): new_args.append(arg) - new_temp_vars = knl.temporary_variables.copy() + new_temp_vars = kernel.temporary_variables.copy() import loopy as lp - for tv_name in knl.temporary_variables: + for tv_name in kernel.temporary_variables: new_dtype = dtype_dict.pop(tv_name, None) if new_dtype is not None: new_dtype = np.dtype(new_dtype) @@ -101,12 +101,12 @@ def _add_dtypes(knl, dtype_dict): return dtype_dict, new_args, new_temp_vars -def get_arguments_with_incomplete_dtype(knl): - return [arg.name for arg in knl.args +def get_arguments_with_incomplete_dtype(kernel): + return [arg.name for arg in kernel.args if arg.dtype is None] -def add_and_infer_dtypes(knl, dtype_dict, expect_completion=False): +def add_and_infer_dtypes(kernel, dtype_dict, expect_completion=False): processed_dtype_dict = {} for k, v in six.iteritems(dtype_dict): @@ -115,17 +115,17 @@ def add_and_infer_dtypes(knl, dtype_dict, expect_completion=False): if subkey: processed_dtype_dict[subkey] = v - knl = add_dtypes(knl, processed_dtype_dict) + kernel = add_dtypes(kernel, processed_dtype_dict) from loopy.type_inference import infer_unknown_types - return infer_unknown_types(knl, expect_completion=expect_completion) + return infer_unknown_types(kernel, expect_completion=expect_completion) -def _add_and_infer_dtypes_overdetermined(knl, dtype_dict): - knl = _add_dtypes_overdetermined(knl, dtype_dict) +def _add_and_infer_dtypes_overdetermined(kernel, dtype_dict): + kernel = _add_dtypes_overdetermined(kernel, dtype_dict) from loopy.type_inference import infer_unknown_types - return infer_unknown_types(knl, expect_completion=True) + return infer_unknown_types(kernel, expect_completion=True) # }}} diff --git a/loopy/transform/add_barrier.py b/loopy/transform/add_barrier.py index a20a798cfa35c64c0cbd7097b41824dda2a35a84..151c47866d42c68d37744527d8df033060bb7142 100644 --- a/loopy/transform/add_barrier.py +++ b/loopy/transform/add_barrier.py @@ -36,7 +36,7 @@ __doc__ = """ # {{{ add_barrier -def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, +def add_barrier(kernel, insn_before="", insn_after="", id_based_on=None, tags=None, synchronization_kind="global", mem_kind=None): """Takes in a kernel that needs to be added a barrier and returns a kernel which has a barrier inserted into it. It takes input of 2 instructions and @@ -59,13 +59,13 @@ def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, mem_kind = synchronization_kind if id_based_on is None: - id = knl.make_unique_instruction_id( + id = kernel.make_unique_instruction_id( based_on=synchronization_kind[0]+"_barrier") else: - id = knl.make_unique_instruction_id(based_on=id_based_on) + id = kernel.make_unique_instruction_id(based_on=id_based_on) match = parse_match(insn_before) - insn_before_list = [insn.id for insn in knl.instructions if match(knl, + insn_before_list = [insn.id for insn in kernel.instructions if match(kernel, insn)] barrier_to_add = BarrierInstruction(depends_on=frozenset(insn_before_list), @@ -75,12 +75,12 @@ def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, synchronization_kind=synchronization_kind, mem_kind=mem_kind) - new_knl = knl.copy(instructions=knl.instructions + [barrier_to_add]) - new_knl = add_dependency(kernel=new_knl, + new_kernel = kernel.copy(instructions=kernel.instructions + [barrier_to_add]) + new_kernel = add_dependency(kernel=new_kernel, insn_match=insn_after, depends_on="id:"+id) - return new_knl + return new_kernel # }}} diff --git a/loopy/transform/batch.py b/loopy/transform/batch.py index f6568918d30f33d4c7103e40d02bdc40c38dfa1b..2bb70865a1c241ba0e0e698f9fcd14af7ecb26cf 100644 --- a/loopy/transform/batch.py +++ b/loopy/transform/batch.py @@ -102,7 +102,7 @@ def _add_unique_dim_name(name, dim_names): return (ng(name),) + tuple(dim_names) -def to_batched(knl, nbatches, batch_varying_args, batch_iname_prefix="ibatch", +def to_batched(kernel, nbatches, batch_varying_args, batch_iname_prefix="ibatch", sequential=False): """Takes in a kernel that carries out an operation and returns a kernel that carries out a batch of these operations. @@ -123,7 +123,7 @@ def to_batched(knl, nbatches, batch_varying_args, batch_iname_prefix="ibatch", from pymbolic import var - vng = knl.get_var_name_generator() + vng = kernel.get_var_name_generator() batch_iname = vng(batch_iname_prefix) batch_iname_expr = var(batch_iname) @@ -136,16 +136,16 @@ def to_batched(knl, nbatches, batch_varying_args, batch_iname_prefix="ibatch", if not isinstance(nbatches, int): batch_dom_str = "[%s] -> " % nbatches + batch_dom_str - new_args.append(ValueArg(nbatches, dtype=knl.index_dtype)) + new_args.append(ValueArg(nbatches, dtype=kernel.index_dtype)) nbatches_expr = var(nbatches) else: nbatches_expr = nbatches batch_domain = isl.BasicSet(batch_dom_str) - new_domains = [batch_domain] + knl.domains + new_domains = [batch_domain] + kernel.domains - for arg in knl.args: + for arg in kernel.args: if arg.name in batch_varying_args: if isinstance(arg, ValueArg): arg = ArrayArg(arg.name, arg.dtype, shape=(nbatches_expr,), @@ -158,14 +158,14 @@ def to_batched(knl, nbatches, batch_varying_args, batch_iname_prefix="ibatch", new_args.append(arg) - knl = knl.copy( + kernel = kernel.copy( domains=new_domains, args=new_args) if not sequential: new_temps = {} - for temp in six.itervalues(knl.temporary_variables): + for temp in six.itervalues(kernel.temporary_variables): if temp_needs_batching_if_not_sequential(temp, batch_varying_args): new_temps[temp.name] = temp.copy( shape=(nbatches_expr,) + temp.shape, @@ -174,19 +174,19 @@ def to_batched(knl, nbatches, batch_varying_args, batch_iname_prefix="ibatch", else: new_temps[temp.name] = temp - knl = knl.copy(temporary_variables=new_temps) + kernel = kernel.copy(temporary_variables=new_temps) else: import loopy as lp from loopy.kernel.data import ForceSequentialTag - knl = lp.tag_inames(knl, [(batch_iname, ForceSequentialTag())]) + kernel = lp.tag_inames(kernel, [(batch_iname, ForceSequentialTag())]) rule_mapping_context = SubstitutionRuleMappingContext( - knl.substitutions, vng) + kernel.substitutions, vng) bvc = _BatchVariableChanger(rule_mapping_context, - knl, batch_varying_args, batch_iname_expr, + kernel, batch_varying_args, batch_iname_expr, sequential=sequential) kernel = rule_mapping_context.finish_kernel( - bvc.map_kernel(knl)) + bvc.map_kernel(kernel)) batch_iname_set = frozenset([batch_iname]) kernel = kernel.copy( diff --git a/loopy/transform/data.py b/loopy/transform/data.py index 5356d49038a142945c781e58943eb86492d12b3f..116a0a349b443d05aff5f967d2425b545cbe5622 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -368,9 +368,9 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, # {{{ change variable kinds -def change_arg_to_image(knl, name): +def change_arg_to_image(kernel, name): new_args = [] - for arg in knl.args: + for arg in kernel.args: if arg.name == name: assert arg.offset == 0 assert arg.shape is not None @@ -378,14 +378,14 @@ def change_arg_to_image(knl, name): else: new_args.append(arg) - return knl.copy(args=new_args) + return kernel.copy(args=new_args) # }}} # {{{ tag array axes -def tag_array_axes(knl, ary_names, dim_tags): +def tag_array_axes(kernel, ary_names, dim_tags): """ :arg dim_tags: a tuple of :class:`loopy.kernel.array.ArrayDimImplementationTag` or a string that @@ -407,7 +407,7 @@ def tag_array_axes(knl, ary_names, dim_tags): ary_names = [ary_name.strip() for ary_name in ary_names.split(",")] for ary_name in ary_names: - achng = ArrayChanger(knl, ary_name) + achng = ArrayChanger(kernel, ary_name) ary = achng.get() from loopy.kernel.array import parse_array_dim_tags @@ -418,9 +418,9 @@ def tag_array_axes(knl, ary_names, dim_tags): ary = ary.copy(dim_tags=tuple(new_dim_tags)) - knl = achng.with_changed_array(ary) + kernel = achng.with_changed_array(ary) - return knl + return kernel tag_data_axes = MovedFunctionDeprecationWrapper(tag_array_axes) @@ -461,14 +461,14 @@ set_array_dim_names = MovedFunctionDeprecationWrapper(set_array_axis_names) # {{{ remove_unused_arguments -def remove_unused_arguments(knl): +def remove_unused_arguments(kernel): new_args = [] import loopy as lp - exp_knl = lp.expand_subst(knl) + exp_kernel = lp.expand_subst(kernel) - refd_vars = set(knl.all_params()) - for insn in exp_knl.instructions: + refd_vars = set(kernel.all_params()) + for insn in exp_kernel.instructions: refd_vars.update(insn.dependency_names()) from loopy.kernel.array import ArrayBase, FixedStrideArrayDimTag @@ -480,7 +480,7 @@ def remove_unused_arguments(knl): return set() return get_dependencies(expr) - for ary in chain(knl.args, six.itervalues(knl.temporary_variables)): + for ary in chain(kernel.args, six.itervalues(kernel.temporary_variables)): if isinstance(ary, ArrayBase): refd_vars.update( tolerant_get_deps(ary.shape) @@ -491,18 +491,18 @@ def remove_unused_arguments(knl): refd_vars.update( tolerant_get_deps(dim_tag.stride)) - for arg in knl.args: + for arg in kernel.args: if arg.name in refd_vars: new_args.append(arg) - return knl.copy(args=new_args) + return kernel.copy(args=new_args) # }}} # {{{ alias_temporaries -def alias_temporaries(knl, names, base_name_prefix=None, +def alias_temporaries(kernel, names, base_name_prefix=None, synchronize_for_exclusive_use=True): """Sets all temporaries given by *names* to be backed by a single piece of storage. @@ -522,20 +522,20 @@ def alias_temporaries(knl, names, base_name_prefix=None, ``synchronize_for_exclusive_use=True`` was the previous default behavior. """ - gng = knl.get_group_name_generator() + gng = kernel.get_group_name_generator() group_names = [gng("tmpgrp_"+name) for name in names] if base_name_prefix is None: base_name_prefix = "temp_storage" - vng = knl.get_var_name_generator() + vng = kernel.get_var_name_generator() base_name = vng(base_name_prefix) names_set = set(names) if synchronize_for_exclusive_use: new_insns = [] - for insn in knl.instructions: + for insn in kernel.instructions: temp_deps = insn.dependency_names() & names_set if not temp_deps: @@ -562,10 +562,10 @@ def alias_temporaries(knl, names, base_name_prefix=None, conflicts_with_groups=( insn.conflicts_with_groups | other_group_names))) else: - new_insns = knl.instructions + new_insns = kernel.instructions new_temporary_variables = {} - for tv in six.itervalues(knl.temporary_variables): + for tv in six.itervalues(kernel.temporary_variables): if tv.name in names_set: if tv.base_storage is not None: raise LoopyError("temporary variable '{tv}' already has " @@ -577,7 +577,7 @@ def alias_temporaries(knl, names, base_name_prefix=None, else: new_temporary_variables[tv.name] = tv - return knl.copy( + return kernel.copy( instructions=new_insns, temporary_variables=new_temporary_variables) @@ -646,7 +646,7 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False): kernel.substitutions, var_name_gen) smap = RuleAwareSubstitutionMapper(rule_mapping_context, make_subst_func(subst_dict), - within=lambda knl, insn, stack: True) + within=lambda kernel, insn, stack: True) kernel = smap.map_kernel(kernel) @@ -705,15 +705,16 @@ def set_temporary_scope(kernel, temp_var_names, scope): # {{{ reduction_arg_to_subst_rule -def reduction_arg_to_subst_rule(knl, inames, insn_match=None, subst_rule_name=None): +def reduction_arg_to_subst_rule( + kernel, inames, insn_match=None, subst_rule_name=None): if isinstance(inames, str): inames = [s.strip() for s in inames.split(",")] inames_set = frozenset(inames) - substs = knl.substitutions.copy() + substs = kernel.substitutions.copy() - var_name_gen = knl.get_var_name_generator() + var_name_gen = kernel.get_var_name_generator() def map_reduction(expr, rec, nresults=1): if frozenset(expr.inames) != inames_set: @@ -754,13 +755,13 @@ def reduction_arg_to_subst_rule(knl, inames, insn_match=None, subst_rule_name=No from loopy.kernel.data import MultiAssignmentBase new_insns = [] - for insn in knl.instructions: + for insn in kernel.instructions: if not isinstance(insn, MultiAssignmentBase): new_insns.append(insn) else: new_insns.append(insn.copy(expression=cb_mapper(insn.expression))) - return knl.copy( + return kernel.copy( instructions=new_insns, substitutions=substs) diff --git a/loopy/transform/diff.py b/loopy/transform/diff.py index 21e61075596bc2b795434716ba8a4347f5cfb173..b86a38092cf4f4c99146e5de420ea200899d7642 100644 --- a/loopy/transform/diff.py +++ b/loopy/transform/diff.py @@ -368,7 +368,7 @@ class DifferentiationContext(object): # {{{ entrypoint -def diff_kernel(knl, diff_outputs, by, diff_iname_prefix="diff_i", +def diff_kernel(kernel, diff_outputs, by, diff_iname_prefix="diff_i", batch_axes_in_by=frozenset(), copy_outputs=set()): """ @@ -380,22 +380,22 @@ def diff_kernel(knl, diff_outputs, by, diff_iname_prefix="diff_i", """ from loopy.kernel.creation import apply_single_writer_depencency_heuristic - knl = apply_single_writer_depencency_heuristic(knl, warn_if_used=True) + kernel = apply_single_writer_depencency_heuristic(kernel, warn_if_used=True) if isinstance(diff_outputs, str): diff_outputs = [ dout.strip() for dout in diff_outputs.split(",") if dout.strip()] - by_arg = knl.arg_dict[by] + by_arg = kernel.arg_dict[by] additional_shape = by_arg.shape - var_name_gen = knl.get_var_name_generator() + var_name_gen = kernel.get_var_name_generator() # {{{ differentiate instructions diff_context = DifferentiationContext( - knl, var_name_gen, by, diff_iname_prefix=diff_iname_prefix, + kernel, var_name_gen, by, diff_iname_prefix=diff_iname_prefix, additional_shape=additional_shape) result = {} diff --git a/loopy/transform/fusion.py b/loopy/transform/fusion.py index 70ad2406aabdd63ee21c448aac1091999247925e..838fbbb5568d53e25ac58a898bc7c4456cbb1412 100644 --- a/loopy/transform/fusion.py +++ b/loopy/transform/fusion.py @@ -129,16 +129,16 @@ def _merge_values(item_name, val_a, val_b): # {{{ two-kernel fusion -def _fuse_two_kernels(knla, knlb): +def _fuse_two_kernels(kernela, kernelb): from loopy.kernel import KernelState - if knla.state != KernelState.INITIAL or knlb.state != KernelState.INITIAL: + if kernela.state != KernelState.INITIAL or kernelb.state != KernelState.INITIAL: raise LoopyError("can only fuse kernels in INITIAL state") # {{{ fuse domains - new_domains = knla.domains[:] + new_domains = kernela.domains[:] - for dom_b in knlb.domains: + for dom_b in kernelb.domains: i_fuse = _find_fusable_loop_domain_index(dom_b, new_domains) if i_fuse is None: new_domains.append(dom_b) @@ -164,14 +164,14 @@ def _fuse_two_kernels(knla, knlb): # }}} - vng = knla.get_var_name_generator() + vng = kernela.get_var_name_generator() b_var_renames = {} # {{{ fuse args - new_args = knla.args[:] - for b_arg in knlb.args: - if b_arg.name not in knla.arg_dict: + new_args = kernela.args[:] + for b_arg in kernelb.args: + if b_arg.name not in kernela.arg_dict: new_arg_name = vng(b_arg.name) if new_arg_name != b_arg.name: @@ -179,21 +179,21 @@ def _fuse_two_kernels(knla, knlb): new_args.append(b_arg.copy(name=new_arg_name)) else: - if b_arg != knla.arg_dict[b_arg.name]: + if b_arg != kernela.arg_dict[b_arg.name]: raise LoopyError( "argument '{arg_name}' has inconsistent definition between " "the two kernels being merged ({arg_a} <-> {arg_b})" .format( arg_name=b_arg.name, - arg_a=str(knla.arg_dict[b_arg.name]), + arg_a=str(kernela.arg_dict[b_arg.name]), arg_b=str(b_arg))) # }}} # {{{ fuse temporaries - new_temporaries = knla.temporary_variables.copy() - for b_name, b_tv in six.iteritems(knlb.temporary_variables): + new_temporaries = kernela.temporary_variables.copy() + for b_name, b_tv in six.iteritems(kernelb.temporary_variables): assert b_name == b_tv.name new_tv_name = vng(b_name) @@ -206,18 +206,18 @@ def _fuse_two_kernels(knla, knlb): # }}} - knlb = _apply_renames_in_exprs(knlb, b_var_renames) + kernelb = _apply_renames_in_exprs(kernelb, b_var_renames) from pymbolic.imperative.transform import \ fuse_statement_streams_with_unique_ids new_instructions, old_b_id_to_new_b_id = \ fuse_statement_streams_with_unique_ids( - knla.instructions, knlb.instructions) + kernela.instructions, kernelb.instructions) # {{{ fuse assumptions - assump_a = knla.assumptions - assump_b = knlb.assumptions + assump_a = kernela.assumptions + assump_b = kernelb.assumptions assump_a, assump_b = isl.align_two(assump_a, assump_b) shared_param_names = list( @@ -240,49 +240,49 @@ def _fuse_two_kernels(knla, knlb): domains=new_domains, instructions=new_instructions, args=new_args, - name="%s_and_%s" % (knla.name, knlb.name), - preambles=_ordered_merge_lists(knla.preambles, knlb.preambles), + name="%s_and_%s" % (kernela.name, kernelb.name), + preambles=_ordered_merge_lists(kernela.preambles, kernelb.preambles), preamble_generators=_ordered_merge_lists( - knla.preamble_generators, knlb.preamble_generators), + kernela.preamble_generators, kernelb.preamble_generators), assumptions=new_assumptions, local_sizes=_merge_dicts( - "local size", knla.local_sizes, knlb.local_sizes), + "local size", kernela.local_sizes, kernelb.local_sizes), temporary_variables=new_temporaries, iname_to_tags=_merge_dicts( "iname-to-tag mapping", - knla.iname_to_tags, - knlb.iname_to_tags), + kernela.iname_to_tags, + kernelb.iname_to_tags), substitutions=_merge_dicts( "substitution", - knla.substitutions, - knlb.substitutions), + kernela.substitutions, + kernelb.substitutions), function_manglers=_ordered_merge_lists( - knla.function_manglers, - knlb.function_manglers), + kernela.function_manglers, + kernelb.function_manglers), symbol_manglers=_ordered_merge_lists( - knla.symbol_manglers, - knlb.symbol_manglers), + kernela.symbol_manglers, + kernelb.symbol_manglers), iname_slab_increments=_merge_dicts( "iname slab increment", - knla.iname_slab_increments, - knlb.iname_slab_increments), - loop_priority=knla.loop_priority.union(knlb.loop_priority), + kernela.iname_slab_increments, + kernelb.iname_slab_increments), + loop_priority=kernela.loop_priority.union(kernelb.loop_priority), silenced_warnings=_ordered_merge_lists( - knla.silenced_warnings, - knlb.silenced_warnings), + kernela.silenced_warnings, + kernelb.silenced_warnings), applied_iname_rewrites=_ordered_merge_lists( - knla.applied_iname_rewrites, - knlb.applied_iname_rewrites), + kernela.applied_iname_rewrites, + kernelb.applied_iname_rewrites), index_dtype=_merge_values( "index dtype", - knla.index_dtype, - knlb.index_dtype), + kernela.index_dtype, + kernelb.index_dtype), target=_merge_values( "target", - knla.target, - knlb.target), - options=knla.options), old_b_id_to_new_b_id + kernela.target, + kernelb.target), + options=kernela.options), old_b_id_to_new_b_id # }}} @@ -371,19 +371,19 @@ def fuse_kernels(kernels, suffixes=None, data_flow=None): kernel_insn_ids = [] result = None - for knlb in kernels: + for kernelb in kernels: if result is None: - result = knlb + result = kernelb kernel_insn_ids.append([ - insn.id for insn in knlb.instructions]) + insn.id for insn in kernelb.instructions]) else: result, old_b_id_to_new_b_id = _fuse_two_kernels( - knla=result, - knlb=knlb) + kernela=result, + kernelb=kernelb) kernel_insn_ids.append([ old_b_id_to_new_b_id[insn.id] - for insn in knlb.instructions]) + for insn in kernelb.instructions]) # {{{ realize data_flow dependencies diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 2ae00d365f41144993af63d7073477d6d021aa2f..6c7cb3365991cf92db4c0fa2a56a07e9ad07f66d 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -826,7 +826,7 @@ class _InameDuplicator(RuleAwareIdentityMapper): return insn.copy(within_inames=new_fid) -def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, +def duplicate_inames(kernel, inames, within, new_inames=None, suffix=None, tags={}): """ :arg within: a stack match as understood by @@ -850,7 +850,7 @@ def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, if len(new_inames) != len(inames): raise ValueError("new_inames must have the same number of entries as inames") - name_gen = knl.get_var_name_generator() + name_gen = kernel.get_var_name_generator() for i, iname in enumerate(inames): new_iname = new_inames[i] @@ -878,10 +878,10 @@ def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, for old_iname, new_iname in zip(inames, new_inames): from loopy.kernel.tools import DomainChanger - domch = DomainChanger(knl, frozenset([old_iname])) + domch = DomainChanger(kernel, frozenset([old_iname])) from loopy.isl_helpers import duplicate_axes - knl = knl.copy( + kernel = kernel.copy( domains=domch.get_domains_with( duplicate_axes(domch.domain, [old_iname], [new_iname]))) @@ -890,13 +890,13 @@ def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, # {{{ change the inames in the code rule_mapping_context = SubstitutionRuleMappingContext( - knl.substitutions, name_gen) + kernel.substitutions, name_gen) indup = _InameDuplicator(rule_mapping_context, old_to_new=dict(list(zip(inames, new_inames))), within=within) - knl = rule_mapping_context.finish_kernel( - indup.map_kernel(knl)) + kernel = rule_mapping_context.finish_kernel( + indup.map_kernel(kernel)) # }}} @@ -905,11 +905,11 @@ def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, for old_iname, new_iname in zip(inames, new_inames): new_tag = tags.get(old_iname) if new_tag is not None: - knl = tag_inames(knl, {new_iname: new_tag}) + kernel = tag_inames(kernel, {new_iname: new_tag}) # }}} - return knl + return kernel # }}} @@ -988,7 +988,7 @@ def _get_iname_duplication_options(insn_iname_sets, old_common_inames=frozenset( # If partitioning was empty, we have recursed successfully and yield nothing -def get_iname_duplication_options(knl, use_boostable_into=None): +def get_iname_duplication_options(kernel, use_boostable_into=None): """List options for duplication of inames, if necessary for schedulability :returns: a generator listing all options to duplicate inames, if duplication @@ -1031,29 +1031,29 @@ def get_iname_duplication_options(knl, use_boostable_into=None): concurrent_inames = set( iname - for iname in knl.all_inames() - if knl.iname_tags_of_type(iname, ConcurrentTag)) + for iname in kernel.all_inames() + if kernel.iname_tags_of_type(iname, ConcurrentTag)) # First we extract the minimal necessary information from the kernel insn_iname_sets = ( frozenset( insn.within_inames - concurrent_inames - for insn in knl.instructions) + for insn in kernel.instructions) - frozenset([frozenset([])])) # Get the duplication options as a tuple of iname and a set for iname, insns in _get_iname_duplication_options(insn_iname_sets): # Check whether this iname has a parallel tag and discard it if so - if (iname in knl.iname_to_tags - and knl.iname_tags_of_type(iname, ConcurrentTag)): + if (iname in kernel.iname_to_tags + and kernel.iname_tags_of_type(iname, ConcurrentTag)): continue # Reconstruct an object that may be passed to the within parameter of # loopy.duplicate_inames from loopy.match import Id, Or within = Or(tuple( - Id(insn.id) for insn in knl.instructions + Id(insn.id) for insn in kernel.instructions if insn.within_inames in insns)) # Only yield the result if an instruction matched. @@ -1061,31 +1061,31 @@ def get_iname_duplication_options(knl, use_boostable_into=None): yield iname, within -def has_schedulable_iname_nesting(knl): +def has_schedulable_iname_nesting(kernel): """ :returns: a :class:`bool` indicating whether this kernel needs an iname duplication in order to be schedulable. """ - return not bool(next(get_iname_duplication_options(knl), False)) + return not bool(next(get_iname_duplication_options(kernel), False)) # }}} # {{{ rename_inames -def rename_iname(knl, old_iname, new_iname, existing_ok=False, within=None): +def rename_iname(kernel, old_iname, new_iname, existing_ok=False, within=None): """ :arg within: a stack match as understood by :func:`loopy.match.parse_stack_match`. :arg existing_ok: execute even if *new_iname* already exists """ - var_name_gen = knl.get_var_name_generator() + var_name_gen = kernel.get_var_name_generator() # FIXME: Distinguish existing iname vs. existing other variable does_exist = var_name_gen.is_name_conflicting(new_iname) - if old_iname not in knl.all_inames(): + if old_iname not in kernel.all_inames(): raise LoopyError("old iname '%s' does not exist" % old_iname) if does_exist and not existing_ok: @@ -1095,7 +1095,7 @@ def rename_iname(knl, old_iname, new_iname, existing_ok=False, within=None): if does_exist: # {{{ check that the domains match up - dom = knl.get_inames_domain(frozenset((old_iname, new_iname))) + dom = kernel.get_inames_domain(frozenset((old_iname, new_iname))) var_dict = dom.get_var_dict() _, old_idx = var_dict[old_iname] @@ -1132,17 +1132,17 @@ def rename_iname(knl, old_iname, new_iname, existing_ok=False, within=None): from pymbolic.mapper.substitutor import make_subst_func rule_mapping_context = SubstitutionRuleMappingContext( - knl.substitutions, var_name_gen) + kernel.substitutions, var_name_gen) smap = RuleAwareSubstitutionMapper(rule_mapping_context, make_subst_func(subst_dict), within) - knl = rule_mapping_context.finish_kernel( - smap.map_kernel(knl)) + kernel = rule_mapping_context.finish_kernel( + smap.map_kernel(kernel)) new_instructions = [] - for insn in knl.instructions: + for insn in kernel.instructions: if (old_iname in insn.within_inames - and within(knl, insn, ())): + and within(kernel, insn, ())): insn = insn.copy( within_inames=( (insn.within_inames - frozenset([old_iname])) @@ -1150,35 +1150,35 @@ def rename_iname(knl, old_iname, new_iname, existing_ok=False, within=None): new_instructions.append(insn) - knl = knl.copy(instructions=new_instructions) + kernel = kernel.copy(instructions=new_instructions) else: - knl = duplicate_inames( - knl, [old_iname], within=within, new_inames=[new_iname]) + kernel = duplicate_inames( + kernel, [old_iname], within=within, new_inames=[new_iname]) - knl = remove_unused_inames(knl, [old_iname]) + kernel = remove_unused_inames(kernel, [old_iname]) - return knl + return kernel # }}} # {{{ remove unused inames -def get_used_inames(knl): +def get_used_inames(kernel): import loopy as lp - exp_knl = lp.expand_subst(knl) + exp_kernel = lp.expand_subst(kernel) used_inames = set() - for insn in exp_knl.instructions: + for insn in exp_kernel.instructions: used_inames.update( - exp_knl.insn_inames(insn.id) + exp_kernel.insn_inames(insn.id) | insn.reduction_inames()) return used_inames -def remove_unused_inames(knl, inames=None): +def remove_unused_inames(kernel, inames=None): """Delete those among *inames* that are unused, i.e. project them out of the domain. If these inames pose implicit restrictions on other inames, these restrictions will persist as existentially @@ -1190,7 +1190,7 @@ def remove_unused_inames(knl, inames=None): # {{{ normalize arguments if inames is None: - inames = knl.all_inames() + inames = kernel.all_inames() elif isinstance(inames, str): inames = inames.split(",") @@ -1198,7 +1198,7 @@ def remove_unused_inames(knl, inames=None): # {{{ check which inames are unused - unused_inames = set(inames) - get_used_inames(knl) + unused_inames = set(inames) - get_used_inames(kernel) # }}} @@ -1207,42 +1207,42 @@ def remove_unused_inames(knl, inames=None): from loopy.kernel.tools import DomainChanger for iname in unused_inames: - domch = DomainChanger(knl, (iname,)) + domch = DomainChanger(kernel, (iname,)) dom = domch.domain dt, idx = dom.get_var_dict()[iname] dom = dom.project_out(dt, idx, 1) - knl = knl.copy(domains=domch.get_domains_with(dom)) + kernel = kernel.copy(domains=domch.get_domains_with(dom)) # }}} - return knl + return kernel def remove_any_newly_unused_inames(transformation_func): from functools import wraps @wraps(transformation_func) - def wrapper(knl, *args, **kwargs): + def wrapper(kernel, *args, **kwargs): # check for remove_unused_inames argument, default: True remove_newly_unused_inames = kwargs.pop("remove_newly_unused_inames", True) if remove_newly_unused_inames: # determine which inames were already unused - inames_already_unused = knl.all_inames() - get_used_inames(knl) + inames_already_unused = kernel.all_inames() - get_used_inames(kernel) # call transform - transformed_knl = transformation_func(knl, *args, **kwargs) + transformed_kernel = transformation_func(kernel, *args, **kwargs) # Remove inames that are unused due to transform return remove_unused_inames( - transformed_knl, - transformed_knl.all_inames()-inames_already_unused) + transformed_kernel, + transformed_kernel.all_inames()-inames_already_unused) else: # call transform - return transformation_func(knl, *args, **kwargs) + return transformation_func(kernel, *args, **kwargs) return wrapper @@ -1732,7 +1732,7 @@ def make_reduction_inames_unique(kernel, inames=None, within=None): # {{{ add_inames_to_insn -def add_inames_to_insn(knl, inames, insn_match): +def add_inames_to_insn(kernel, inames, insn_match): """ :arg inames: a frozenset of inames that will be added to the instructions matched by *insn_match*, or a comma-separated @@ -1758,14 +1758,14 @@ def add_inames_to_insn(knl, inames, insn_match): new_instructions = [] - for insn in knl.instructions: - if match(knl, insn): + for insn in kernel.instructions: + if match(kernel, insn): new_instructions.append( insn.copy(within_inames=insn.within_inames | inames)) else: new_instructions.append(insn) - return knl.copy(instructions=new_instructions) + return kernel.copy(instructions=new_instructions) # }}} diff --git a/loopy/transform/save.py b/loopy/transform/save.py index aef13b237bb1fba52f41d5a910017608b3957161..8ee2899fb2d314da7c74b5f6d610d2d40b7d3731 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -720,7 +720,7 @@ class TemporarySaver(object): # {{{ auto save and reload across kernel calls -def save_and_reload_temporaries(knl): +def save_and_reload_temporaries(kernel): """ Add instructions to save and reload temporary variables that are live across kernel calls. @@ -743,13 +743,13 @@ def save_and_reload_temporaries(knl): :returns: The resulting kernel """ - liveness = LivenessAnalysis(knl) - saver = TemporarySaver(knl) + liveness = LivenessAnalysis(kernel) + saver = TemporarySaver(kernel) from loopy.schedule.tools import ( temporaries_read_in_subkernel, temporaries_written_in_subkernel) - for sched_idx, sched_item in enumerate(knl.schedule): + for sched_idx, sched_item in enumerate(kernel.schedule): if isinstance(sched_item, CallKernel): # Any written temporary that is live-out needs to be read into @@ -760,8 +760,8 @@ def save_and_reload_temporaries(knl): else: subkernel = sched_item.kernel_name interesting_temporaries = ( - temporaries_read_in_subkernel(knl, subkernel) - | temporaries_written_in_subkernel(knl, subkernel)) + temporaries_read_in_subkernel(kernel, subkernel) + | temporaries_written_in_subkernel(kernel, subkernel)) for temporary in liveness[sched_idx].live_out & interesting_temporaries: logger.info("reloading {0} at entry of {1}" @@ -769,13 +769,13 @@ def save_and_reload_temporaries(knl): saver.reload(temporary, sched_item.kernel_name) elif isinstance(sched_item, ReturnFromKernel): - if sched_idx == len(knl.schedule) - 1: + if sched_idx == len(kernel.schedule) - 1: # Kernel exit: nothing live interesting_temporaries = set() else: subkernel = sched_item.kernel_name interesting_temporaries = ( - temporaries_written_in_subkernel(knl, subkernel)) + temporaries_written_in_subkernel(kernel, subkernel)) for temporary in liveness[sched_idx].live_in & interesting_temporaries: logger.info("saving {0} before return of {1}" diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py index 717a051930e938457dae0ee4441325b3e631d2d9..d73ad995ca4d3926d4d413e2099d5474ebd28eeb 100644 --- a/loopy/transform/subst.py +++ b/loopy/transform/subst.py @@ -492,7 +492,7 @@ def expand_subst(kernel, within=None): # {{{ find substitution rules by glob patterns -def find_rules_matching(knl, pattern): +def find_rules_matching(kernel, pattern): """ :pattern: A shell-style glob pattern. """ @@ -500,11 +500,11 @@ def find_rules_matching(knl, pattern): from loopy.match import re_from_glob pattern = re_from_glob(pattern) - return [r for r in knl.substitutions if pattern.match(r)] + return [r for r in kernel.substitutions if pattern.match(r)] -def find_one_rule_matching(knl, pattern): - rules = find_rules_matching(knl, pattern) +def find_one_rule_matching(kernel, pattern): + rules = find_rules_matching(kernel, pattern) if len(rules) > 1: raise ValueError("more than one substitution rule matched '%s'"