diff --git a/loopy/__init__.py b/loopy/__init__.py index 9766aa37047111bddf15b61a1d14d628a6175d04..41ce634c4ced81971010f2bd319bb0ab155ea5e7 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -49,13 +49,18 @@ class LoopyAdvisory(UserWarning): # {{{ imported user interface -from loopy.kernel import ValueArg, ScalarArg, GlobalArg, ArrayArg, ConstantArg, ImageArg +from loopy.kernel.data import ( + ValueArg, ScalarArg, GlobalArg, ArrayArg, ConstantArg, ImageArg, -from loopy.kernel import (AutoFitLocalIndexTag, get_dot_dependency_graph, - LoopKernel, Instruction, default_function_mangler, single_arg_function_mangler, opencl_function_mangler, - default_preamble_generator) -from loopy.creation import make_kernel + + default_preamble_generator, + + Instruction) + +from loopy.kernel import LoopKernel +from loopy.kernel.tools import get_dot_dependency_graph +from loopy.kernel.creation import make_kernel from loopy.reduction import register_reduction_parser from loopy.subst import extract_subst, expand_subst from loopy.cse import precompute @@ -137,7 +142,7 @@ def split_iname(kernel, split_iname, inner_length, """ existing_tag = kernel.iname_to_tag.get(split_iname) - from loopy.kernel import ForceSequentialTag + from loopy.kernel.data import ForceSequentialTag if do_tagged_check and ( existing_tag is not None and not isinstance(existing_tag, ForceSequentialTag)): @@ -289,7 +294,7 @@ def join_inames(kernel, inames, new_iname=None, tag=None, within=None): if new_iname is None: new_iname = kernel.get_var_name_generator()("_and_".join(inames)) - from loopy.kernel import DomainChanger + from loopy.kernel.tools import DomainChanger domch = DomainChanger(kernel, frozenset(inames)) for iname in inames: if kernel.get_home_domain_index(iname) != domch.leaf_domain_index: @@ -389,12 +394,12 @@ join_dimensions = MovedFunctionDeprecationWrapper(join_inames) # {{{ tag inames def tag_inames(kernel, iname_to_tag, force=False): - from loopy.kernel import parse_tag + from loopy.kernel.data import parse_tag iname_to_tag = dict((iname, parse_tag(tag)) for iname, tag in iname_to_tag.iteritems()) - from loopy.kernel import (ParallelTag, AutoLocalIndexTagBase, + from loopy.kernel.data import (ParallelTag, AutoLocalIndexTagBase, ForceSequentialTag) new_iname_to_tag = kernel.iname_to_tag.copy() @@ -517,7 +522,7 @@ def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, # {{{ duplicate the inames for old_iname, new_iname in zip(inames, new_inames): - from loopy.kernel import DomainChanger + from loopy.kernel.tools import DomainChanger domch = DomainChanger(knl, frozenset([old_iname])) from loopy.isl_helpers import duplicate_axes @@ -597,7 +602,7 @@ def link_inames(knl, inames, new_iname, within=None, tag=None): # }}} - from loopy.kernel import DomainChanger + from loopy.kernel.tools import DomainChanger domch = DomainChanger(knl, tuple(inames)) # {{{ ensure that projections are identical @@ -705,7 +710,7 @@ def remove_unused_inames(knl, inames=None): # {{{ remove them - from loopy.kernel import DomainChanger + from loopy.kernel.tools import DomainChanger for iname in unused_inames: domch = DomainChanger(knl, (iname,)) @@ -727,7 +732,7 @@ def remove_unused_inames(knl, inames=None): # {{{ process footprint_subscripts def _add_kernel_axis(kernel, axis_name, start, stop, base_inames): - from loopy.kernel import DomainChanger + from loopy.kernel.tools import DomainChanger domch = DomainChanger(kernel, base_inames) domain = domch.domain diff --git a/loopy/check.py b/loopy/check.py index 65962a8eb4059d7403d06e32ab2684446743e0b5..093ddfde967e4b44aaa2e68ef8ce6cf15331509f 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -42,7 +42,7 @@ def check_for_unused_hw_axes_in_insns(kernel): # alternative: just disregard length-1 dimensions? - from loopy.kernel import LocalIndexTag, AutoLocalIndexTagBase, GroupIndexTag + from loopy.kernel.data import LocalIndexTag, AutoLocalIndexTagBase, GroupIndexTag for insn in kernel.instructions: if insn.boostable: continue @@ -78,7 +78,7 @@ def check_for_unused_hw_axes_in_insns(kernel): def check_for_double_use_of_hw_axes(kernel): - from loopy.kernel import UniqueTag + from loopy.kernel.data import UniqueTag for insn in kernel.instructions: insn_tag_keys = set() @@ -117,7 +117,7 @@ class WriteRaceConditionError(RuntimeError): def check_for_write_races(kernel): from loopy.symbolic import DependencyMapper - from loopy.kernel import ParallelTag, GroupIndexTag, LocalIndexTagBase + from loopy.kernel.data import ParallelTag, GroupIndexTag, LocalIndexTagBase depmap = DependencyMapper() iname_to_tag = kernel.iname_to_tag.get @@ -186,7 +186,7 @@ def check_for_write_races(kernel): % (insn.id, ",".join(race_inames))) def check_for_orphaned_user_hardware_axes(kernel): - from loopy.kernel import LocalIndexTag + from loopy.kernel.data import LocalIndexTag for axis in kernel.local_sizes: found = False for tag in kernel.iname_to_tag.itervalues(): @@ -199,7 +199,7 @@ def check_for_orphaned_user_hardware_axes(kernel): "has no iname mapped to it" % axis) def check_for_data_dependent_parallel_bounds(kernel): - from loopy.kernel import ParallelTag + from loopy.kernel.data import ParallelTag for i, dom in enumerate(kernel.domains): dom_inames = set(dom.get_var_names(dim_type.set)) @@ -482,7 +482,7 @@ def get_problems(kernel, parameters): msg(4, "using more local memory than available--" "possibly OK due to cache nature") - from loopy.kernel import ConstantArg + from loopy.kernel.data import ConstantArg const_arg_count = sum( 1 for arg in kernel.args if isinstance(arg, ConstantArg)) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index b69cf501c56bd4ac3af1328d556e797b536eb94a..5b43de7814779f9b02f5fe84517ac3655746899e 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -26,7 +26,6 @@ THE SOFTWARE. from pytools import Record -import numpy as np import islpy as isl @@ -175,7 +174,7 @@ def make_initial_assignments(kernel): global_size, local_size = kernel.get_grid_sizes() - from loopy.kernel import LocalIndexTag, GroupIndexTag + from loopy.kernel.data import LocalIndexTag, GroupIndexTag from pymbolic import var for iname in kernel.all_inames(): @@ -253,7 +252,7 @@ def generate_code(kernel, with_annotation=False, has_image = False - from loopy.kernel import GlobalArg, ConstantArg, ImageArg, ValueArg + from loopy.kernel.data import GlobalArg, ConstantArg, ImageArg, ValueArg args = [] for arg in kernel.args: diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 71cd536cf2ed068d2cb7c25f8bfc7152028565c6..51c0dca36b4159670e0b04ab7cd02f7e5275dcd5 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -75,7 +75,7 @@ def get_bounds_checks(domain, check_inames, implemented_domain, def get_usable_inames_for_conditional(kernel, sched_index): from loopy.schedule import EnterLoop, LeaveLoop - from loopy.kernel import ParallelTag, LocalIndexTagBase + from loopy.kernel.data import ParallelTag, LocalIndexTagBase result = set() diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index a0a2a29b7f8446e36d3d8b4a27a7d61aeb83242e..fa42edf88ab6c42681a3dba494b6d15ed1cfe3e6 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -40,7 +40,7 @@ def get_admissible_conditional_inames_for(kernel, sched_index): inames if there is a barrier nested somewhere within. """ - from loopy.kernel import LocalIndexTag, HardwareParallelTag + from loopy.kernel.data import LocalIndexTag, HardwareParallelTag from loopy.schedule import find_active_inames_at, has_barrier_within result = find_active_inames_at(kernel, sched_index) @@ -69,7 +69,7 @@ def generate_code_for_sched_index(kernel, sched_index, codegen_state): generate_unroll_loop, generate_sequential_loop_dim_code) - from loopy.kernel import (UnrolledIlpTag, UnrollTag, ForceSequentialTag, + from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, ForceSequentialTag, LoopedIlpTag) if isinstance(tag, (UnrollTag, UnrolledIlpTag)): func = generate_unroll_loop @@ -117,7 +117,7 @@ def remove_inames_for_shared_hw_axes(kernel, cond_inames): tag_key_uses = {} - from loopy.kernel import HardwareParallelTag + from loopy.kernel.data import HardwareParallelTag for iname in cond_inames: tag = kernel.iname_to_tag.get(iname) diff --git a/loopy/codegen/expression.py b/loopy/codegen/expression.py index 5772bf7bcdf01245e496fca28e820c8457dcfc22..902ed7ef2aafc6e94df3dec0103d51fc5685a181 100644 --- a/loopy/codegen/expression.py +++ b/loopy/codegen/expression.py @@ -319,8 +319,8 @@ class LoopyCCodeMapper(RecursiveMapper): enclosing_prec, type_context)) elif expr.name in self.kernel.arg_dict: arg = self.kernel.arg_dict[expr.name] - from loopy.kernel import _ShapedArg - if isinstance(arg, _ShapedArg) and arg.shape == (): + from loopy.kernel.data import ShapedArg + if isinstance(arg, ShapedArg) and arg.shape == (): return "*"+expr.name for mangler in self.kernel.symbol_manglers: @@ -354,7 +354,7 @@ class LoopyCCodeMapper(RecursiveMapper): if expr.aggregate.name in self.kernel.arg_dict: arg = self.kernel.arg_dict[expr.aggregate.name] - from loopy.kernel import ImageArg + from loopy.kernel.data import ImageArg if isinstance(arg, ImageArg): assert isinstance(expr.index, tuple) @@ -429,7 +429,7 @@ class LoopyCCodeMapper(RecursiveMapper): if expr.aggregate.name in self.kernel.arg_dict: arg = self.kernel.arg_dict[expr.aggregate.name] - from loopy.kernel import ImageArg + from loopy.kernel.data import ImageArg if isinstance(arg, ImageArg): raise RuntimeError("linear indexing doesn't work on images: %s" % expr) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 8c4502123bb9398fd65b575f8f782c5e93c72fed..37616322ecdef2c734e8365a49c98d6dd5f81031 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -156,7 +156,7 @@ def intersect_kernel_with_slab(kernel, slab, iname): # {{{ hw-parallel loop def set_up_hw_parallel_loops(kernel, sched_index, codegen_state, hw_inames_left=None): - from loopy.kernel import UniqueTag, HardwareParallelTag, LocalIndexTag, GroupIndexTag + from loopy.kernel.data import UniqueTag, HardwareParallelTag, LocalIndexTag, GroupIndexTag if hw_inames_left is None: hw_inames_left = [iname diff --git a/loopy/compiled.py b/loopy/compiled.py index 84c525fd79449b1e6ab93791935e4f9c83d7a18f..2e17f4e7408e5da47e1861449c6b41d43acbe26b 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -139,7 +139,7 @@ class CompiledKernel: def get_kernel(self, dtype_mapping_set): kernel = self.kernel - from loopy.kernel import ( + from loopy.kernel.tools import ( add_argument_dtypes, infer_argument_dtypes, get_arguments_with_incomplete_dtype) @@ -185,7 +185,7 @@ class CompiledKernel: print "[Loopy] ----------------------------------------------------" raise - from loopy.kernel import ValueArg + from loopy.kernel.data import ValueArg arg_types = [] for arg in kernel.args: @@ -374,7 +374,7 @@ class TestArgInfo(Record): def make_ref_args(kernel, queue, parameters, fill_value): - from loopy.kernel import ValueArg, GlobalArg, ImageArg + from loopy.kernel.data import ValueArg, GlobalArg, ImageArg from pymbolic import evaluate @@ -468,7 +468,7 @@ def make_ref_args(kernel, queue, parameters, def make_args(queue, kernel, arg_descriptors, parameters, fill_value): - from loopy.kernel import ValueArg, GlobalArg, ImageArg + from loopy.kernel.data import ValueArg, GlobalArg, ImageArg from pymbolic import evaluate diff --git a/loopy/creation.py b/loopy/creation.py deleted file mode 100644 index 325a56f8fd6dcc96e8d9ac89d9c66da2b7e65d68..0000000000000000000000000000000000000000 --- a/loopy/creation.py +++ /dev/null @@ -1,314 +0,0 @@ -from __future__ import division - -__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - - - - -import numpy as np -from loopy.symbolic import IdentityMapper - - -def tag_reduction_inames_as_sequential(knl): - result = set() - - def map_reduction(red_expr, rec): - rec(red_expr.expr) - result.update(red_expr.inames) - - from loopy.symbolic import ReductionCallbackMapper - for insn in knl.instructions: - ReductionCallbackMapper(map_reduction)(insn.expression) - - from loopy.kernel import ParallelTag, ForceSequentialTag - - new_iname_to_tag = {} - for iname in result: - tag = knl.iname_to_tag.get(iname) - if tag is not None and isinstance(tag, ParallelTag): - raise RuntimeError("inconsistency detected: " - "reduction iname '%s' has " - "a parallel tag" % iname) - - if tag is None: - new_iname_to_tag[iname] = ForceSequentialTag() - - from loopy import tag_inames - return tag_inames(knl, new_iname_to_tag) - -# {{{ sanity checking - -def check_for_duplicate_names(knl): - name_to_source = {} - - def add_name(name, source): - if name in name_to_source: - raise RuntimeError("invalid %s name '%s'--name already used as " - "%s" % (source, name, name_to_source[name])) - - name_to_source[name] = source - - for name in knl.all_inames(): - add_name(name, "iname") - for arg in knl.args: - add_name(arg.name, "argument") - for name in knl.temporary_variables: - add_name(name, "temporary") - for name in knl.substitutions: - add_name(name, "substitution") - -def check_for_nonexistent_iname_deps(knl): - for insn in knl.instructions: - if not set(insn.forced_iname_deps) <= knl.all_inames(): - raise ValueError("In instruction '%s': " - "cannot force dependency on inames '%s'--" - "they don't exist" % ( - insn.id, - ",".join( - set(insn.forced_iname_deps)-knl.all_inames()))) - -def check_for_multiple_writes_to_loop_bounds(knl): - from islpy import dim_type - - domain_parameters = set() - for dom in knl.domains: - domain_parameters.update(dom.get_space().get_var_dict(dim_type.param)) - - temp_var_domain_parameters = domain_parameters & set( - knl.temporary_variables) - - wmap = knl.writer_map() - for tvpar in temp_var_domain_parameters: - par_writers = wmap[tvpar] - if len(par_writers) != 1: - raise RuntimeError("there must be exactly one write to data-dependent " - "domain parameter '%s' (found %d)" % (tvpar, len(par_writers))) - - -def check_written_variable_names(knl): - admissible_vars = ( - set(arg.name for arg in knl.args) - | set(knl.temporary_variables.iterkeys())) - - for insn in knl.instructions: - var_name = insn.get_assignee_var_name() - - if var_name not in admissible_vars: - raise RuntimeError("variable '%s' not declared or not " - "allowed for writing" % var_name) - -# }}} - -# {{{ expand common subexpressions into assignments - -class CSEToAssignmentMapper(IdentityMapper): - def __init__(self, add_assignment): - self.add_assignment = add_assignment - self.expr_to_var = {} - - def map_common_subexpression(self, expr): - try: - return self.expr_to_var[expr.child] - except KeyError: - from loopy.symbolic import TypedCSE - if isinstance(expr, TypedCSE): - dtype = expr.dtype - else: - dtype = None - - child = self.rec(expr.child) - from pymbolic.primitives import Variable - if isinstance(child, Variable): - return child - - var_name = self.add_assignment(expr.prefix, child, dtype) - var = Variable(var_name) - self.expr_to_var[expr.child] = var - return var - -def expand_cses(knl): - def add_assignment(base_name, expr, dtype): - if base_name is None: - base_name = "var" - - new_var_name = var_name_gen(base_name) - - if dtype is None: - from loopy import infer_type - dtype = infer_type - else: - dtype=np.dtype(dtype) - - from loopy.kernel import TemporaryVariable - new_temp_vars[new_var_name] = TemporaryVariable( - name=new_var_name, - dtype=dtype, - is_local=None, - shape=()) - - from pymbolic.primitives import Variable - from loopy.kernel import Instruction - insn = Instruction( - id=knl.make_unique_instruction_id(extra_used_ids=newly_created_insn_ids), - assignee=Variable(new_var_name), expression=expr) - newly_created_insn_ids.add(insn.id) - new_insns.append(insn) - - return new_var_name - - cseam = CSEToAssignmentMapper(add_assignment=add_assignment) - - new_insns = [] - - var_name_gen = knl.get_var_name_generator() - - newly_created_insn_ids = set() - new_temp_vars = knl.temporary_variables.copy() - - for insn in knl.instructions: - new_insns.append(insn.copy(expression=cseam(insn.expression))) - - return knl.copy( - instructions=new_insns, - temporary_variables=new_temp_vars) - -# }}} - -# {{{ temporary variable creation - -def create_temporaries(knl): - new_insns = [] - new_temp_vars = knl.temporary_variables.copy() - - for insn in knl.instructions: - from loopy.kernel import TemporaryVariable - - if insn.temp_var_type is not None: - assignee_name = insn.get_assignee_var_name() - - assignee_indices = [] - from pymbolic.primitives import Variable - for index_expr in insn.get_assignee_indices(): - if (not isinstance(index_expr, Variable) - or not index_expr.name in knl.all_inames()): - raise RuntimeError( - "only plain inames are allowed in " - "the lvalue index when declaring the " - "variable '%s' in an instruction" - % assignee_name) - - assignee_indices.append(index_expr.name) - - base_indices, shape = \ - knl.find_var_base_indices_and_shape_from_inames( - assignee_indices, knl.cache_manager) - - if assignee_name in new_temp_vars: - raise RuntimeError("cannot create temporary variable '%s'--" - "already exists" % assignee_name) - if assignee_name in knl.arg_dict: - raise RuntimeError("cannot create temporary variable '%s'--" - "already exists as argument" % assignee_name) - - new_temp_vars[assignee_name] = TemporaryVariable( - name=assignee_name, - dtype=insn.temp_var_type, - is_local=None, - base_indices=base_indices, - shape=shape) - - insn = insn.copy(temp_var_type=None) - - new_insns.append(insn) - - return knl.copy( - instructions=new_insns, - temporary_variables=new_temp_vars) - -# }}} - -# {{{ check for reduction iname duplication - -def check_for_reduction_inames_duplication_requests(kernel): - - # {{{ helper function - - def check_reduction_inames(reduction_expr, rec): - for iname in reduction_expr.inames: - if iname.startswith("@"): - raise RuntimeError("Reduction iname duplication with '@' is no " - "longer supported. Use loopy.duplicate_inames instead.") - - # }}} - - - from loopy.symbolic import ReductionCallbackMapper - rcm = ReductionCallbackMapper(check_reduction_inames) - for insn in kernel.instructions: - rcm(insn.expression) - - for sub_name, sub_rule in kernel.substitutions.iteritems(): - rcm(sub_rule.expression) - -# }}} - -# {{{ kernel creation top-level - -def make_kernel(*args, **kwargs): - """Second pass of kernel creation. Think about requests for iname duplication - and temporary variable creation. - """ - - from loopy.kernel import LoopKernel - knl = LoopKernel(*args, **kwargs) - - from loopy import tag_inames - knl = tag_inames( - knl.copy(iname_to_tag_requests=None), - knl.iname_to_tag_requests).copy( - iname_to_tag_requests=[]) - - check_for_nonexistent_iname_deps(knl) - check_for_reduction_inames_duplication_requests(knl) - - - knl = tag_reduction_inames_as_sequential(knl) - knl = create_temporaries(knl) - knl = expand_cses(knl) - - # ------------------------------------------------------------------------- - # Ordering dependency: - # ------------------------------------------------------------------------- - # Must create temporary before checking for writes to temporary variables - # that are domain parameters. - # ------------------------------------------------------------------------- - - check_for_multiple_writes_to_loop_bounds(knl) - check_for_duplicate_names(knl) - check_written_variable_names(knl) - - return knl - -# }}} - -# vim: fdm=marker diff --git a/loopy/cse.py b/loopy/cse.py index fd0d150de7133ee7117679168b5997d46058fa43..77594b4124ef56984db87c78abc287ab3a598f87 100644 --- a/loopy/cse.py +++ b/loopy/cse.py @@ -674,7 +674,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, c_subst_name = subst_name.replace(".", "_") - from loopy.kernel import parse_tag + from loopy.kernel.data import parse_tag default_tag = parse_tag(default_tag) subst = kernel.substitutions[subst_name] @@ -786,7 +786,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, # {{{ find domain to be changed - from loopy.kernel import DomainChanger + from loopy.kernel.tools import DomainChanger domch = DomainChanger(kernel, expanding_inames) if domch.leaf_domain_index is not None: @@ -836,7 +836,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, ))) (compute_expr)) - from loopy.kernel import Instruction + from loopy.kernel.data import Instruction compute_insn = Instruction( id=kernel.make_unique_instruction_id(based_on=c_subst_name), assignee=assignee, @@ -864,7 +864,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, else: dtype = np.dtype(dtype) - from loopy.kernel import TemporaryVariable + from loopy.kernel.data import TemporaryVariable new_temporary_variables = kernel.temporary_variables.copy() temp_var = TemporaryVariable( diff --git a/loopy/kernel.py b/loopy/kernel.py deleted file mode 100644 index f0c799e5d38fe36af0ab88855de79d7b4865c3c2..0000000000000000000000000000000000000000 --- a/loopy/kernel.py +++ /dev/null @@ -1,1973 +0,0 @@ -"""Elements of loopy's user-facing language.""" - -from __future__ import division - -__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - - - - - -import numpy as np -from pytools import Record, memoize_method -import islpy as isl -from islpy import dim_type - -import re - - - - -class CannotBranchDomainTree(RuntimeError): - pass - -# {{{ index tags - -class IndexTag(Record): - __slots__ = [] - - def __hash__(self): - raise RuntimeError("use .key to hash index tags") - - - - -class ParallelTag(IndexTag): - pass - -class HardwareParallelTag(ParallelTag): - pass - -class UniqueTag(IndexTag): - @property - def key(self): - return type(self) - -class AxisTag(UniqueTag): - __slots__ = ["axis"] - - def __init__(self, axis): - Record.__init__(self, - axis=axis) - - @property - def key(self): - return (type(self), self.axis) - - def __str__(self): - return "%s.%d" % ( - self.print_name, self.axis) - -class GroupIndexTag(HardwareParallelTag, AxisTag): - print_name = "g" - -class LocalIndexTagBase(HardwareParallelTag): - pass - -class LocalIndexTag(LocalIndexTagBase, AxisTag): - print_name = "l" - -class AutoLocalIndexTagBase(LocalIndexTagBase): - pass - -class AutoFitLocalIndexTag(AutoLocalIndexTagBase): - def __str__(self): - return "l.auto" - -class IlpBaseTag(ParallelTag): - pass - -class UnrolledIlpTag(IlpBaseTag): - def __str__(self): - return "ilp.unr" - -class LoopedIlpTag(IlpBaseTag): - def __str__(self): - return "ilp.seq" - -class UnrollTag(IndexTag): - def __str__(self): - return "unr" - -class ForceSequentialTag(IndexTag): - def __str__(self): - return "forceseq" - -def parse_tag(tag): - if tag is None: - return tag - - if isinstance(tag, IndexTag): - return tag - - if not isinstance(tag, str): - raise ValueError("cannot parse tag: %s" % tag) - - if tag == "for": - return None - elif tag in ["unr"]: - return UnrollTag() - elif tag in ["ilp", "ilp.unr"]: - return UnrolledIlpTag() - elif tag == "ilp.seq": - return LoopedIlpTag() - elif tag.startswith("g."): - return GroupIndexTag(int(tag[2:])) - elif tag.startswith("l."): - axis = tag[2:] - if axis == "auto": - return AutoFitLocalIndexTag() - else: - return LocalIndexTag(int(axis)) - else: - raise ValueError("cannot parse tag: %s" % tag) - -# }}} - -# {{{ arguments - -class _ShapedArg(Record): - def __init__(self, name, dtype=None, shape=None, strides=None, order="C", - offset=0): - """ - All of the following are optional. Specify either strides or shape. - - :arg shape: - :arg strides: like numpy strides, but in multiples of - data type size - :arg order: - :arg offset: Offset from the beginning of the vector from which - the strides are counted. - """ - if dtype is not None: - dtype = np.dtype(dtype) - - def parse_if_necessary(x): - if isinstance(x, str): - from pymbolic import parse - return parse(x) - else: - return x - - def process_tuple(x): - x = parse_if_necessary(x) - if not isinstance(x, tuple): - x = (x,) - - return tuple(parse_if_necessary(xi) for xi in x) - - if strides is not None: - strides = process_tuple(strides) - - if shape is not None: - shape = process_tuple(shape) - - if strides is None and shape is not None: - from pyopencl.compyte.array import ( - f_contiguous_strides, - c_contiguous_strides) - - if order == "F": - strides = f_contiguous_strides(1, shape) - elif order == "C": - strides = c_contiguous_strides(1, shape) - else: - raise ValueError("invalid order: %s" % order) - - Record.__init__(self, - name=name, - dtype=dtype, - strides=strides, - offset=offset, - shape=shape) - - @property - @memoize_method - def numpy_strides(self): - return tuple(self.dtype.itemsize*s for s in self.strides) - - @property - def dimensions(self): - return len(self.shape) - -class GlobalArg(_ShapedArg): - def __repr__(self): - return "" % ( - self.name, self.dtype, ",".join(str(i) for i in self.shape)) - -class ArrayArg(GlobalArg): - def __init__(self, *args, **kwargs): - from warnings import warn - warn("ArrayArg is a deprecated name of GlobalArg", DeprecationWarning, - stacklevel=2) - GlobalArg.__init__(self, *args, **kwargs) - -class ConstantArg(_ShapedArg): - def __repr__(self): - return "" % ( - self.name, self.dtype, ",".join(str(i) for i in self.shape)) - -class ImageArg(Record): - def __init__(self, name, dtype=None, dimensions=None, shape=None): - dtype = np.dtype(dtype) - if shape is not None: - if dimensions is not None and dimensions != len(shape): - raise RuntimeError("cannot specify both shape and " - "disagreeing dimensions in ImageArg") - dimensions = len(shape) - else: - if not isinstance(dimensions, int): - raise RuntimeError("ImageArg: dimensions must be an integer") - - Record.__init__(self, - dimensions=dimensions, - shape=shape, - dtype=dtype, - name=name) - - - def __repr__(self): - return "" % (self.name, self.dtype) - -class ValueArg(Record): - def __init__(self, name, dtype=None, approximately=None): - if dtype is not None: - dtype = np.dtype(dtype) - - Record.__init__(self, name=name, dtype=dtype, - approximately=approximately) - - def __repr__(self): - return "" % (self.name, self.dtype) - -class ScalarArg(ValueArg): - def __init__(self, name, dtype=None, approximately=None): - from warnings import warn - warn("ScalarArg is a deprecated name of ValueArg", - DeprecationWarning, stacklevel=2) - - ValueArg.__init__(self, name, dtype, approximately) - -# }}} - -# {{{ temporary variable - -class TemporaryVariable(Record): - """ - :ivar name: - :ivar dtype: - :ivar shape: - :ivar storage_shape: - :ivar base_indices: - :ivar is_local: - """ - - def __init__(self, name, dtype, shape, is_local, base_indices=None, - storage_shape=None): - if base_indices is None: - base_indices = (0,) * len(shape) - - if shape is not None and not isinstance(shape, tuple): - shape = tuple(shape) - - Record.__init__(self, name=name, dtype=dtype, shape=shape, is_local=is_local, - base_indices=base_indices, - storage_shape=storage_shape) - - @property - def nbytes(self): - from pytools import product - return product(si for si in self.shape)*self.dtype.itemsize - -# }}} - -# {{{ subsitution rule - -class SubstitutionRule(Record): - """ - :ivar name: - :ivar arguments: - :ivar expression: - """ - - def __init__(self, name, arguments, expression): - assert isinstance(arguments, tuple) - - Record.__init__(self, - name=name, arguments=arguments, expression=expression) - - def __str__(self): - return "%s(%s) := %s" % ( - self.name, ", ".join(self.arguments), self.expression) - -# }}} - -# {{{ instruction - -class Instruction(Record): - """ - :ivar id: An (otherwise meaningless) identifier that is unique within - a :class:`LoopKernel`. - :ivar assignee: - :ivar expression: - :ivar forced_iname_deps: a set of inames that are added to the list of iname - dependencies - :ivar insn_deps: a list of ids of :class:`Instruction` instances that - *must* be executed before this one. Note that loop scheduling augments this - by adding dependencies on any writes to temporaries read by this instruction. - :ivar boostable: Whether the instruction may safely be executed - inside more loops than advertised without changing the meaning - of the program. Allowed values are *None* (for unknown), *True*, and *False*. - :ivar boostable_into: a set of inames into which the instruction - may need to be boosted, as a heuristic help for the scheduler. - :ivar priority: scheduling priority - - The following two instance variables are only used until :func:`loopy.make_kernel` is - finished: - - :ivar temp_var_type: if not None, a type that will be assigned to the new temporary variable - created from the assignee - """ - def __init__(self, - id, assignee, expression, - forced_iname_deps=frozenset(), insn_deps=set(), boostable=None, - boostable_into=None, - temp_var_type=None, priority=0): - - from loopy.symbolic import parse - if isinstance(assignee, str): - assignee = parse(assignee) - if isinstance(expression, str): - assignee = parse(expression) - - assert isinstance(forced_iname_deps, frozenset) - assert isinstance(insn_deps, set) - - Record.__init__(self, - id=id, assignee=assignee, expression=expression, - forced_iname_deps=forced_iname_deps, - insn_deps=insn_deps, boostable=boostable, - boostable_into=boostable_into, - temp_var_type=temp_var_type, - priority=priority) - - @memoize_method - def reduction_inames(self): - def map_reduction(expr, rec): - rec(expr.expr) - for iname in expr.inames: - result.add(iname) - - from loopy.symbolic import ReductionCallbackMapper - cb_mapper = ReductionCallbackMapper(map_reduction) - - result = set() - cb_mapper(self.expression) - - return result - - def __str__(self): - result = "%s: %s <- %s" % (self.id, - self.assignee, self.expression) - - if self.boostable == True: - if self.boostable_into: - result += " (boostable into '%s')" % ",".join(self.boostable_into) - else: - result += " (boostable)" - elif self.boostable == False: - result += " (not boostable)" - elif self.boostable is None: - pass - else: - raise RuntimeError("unexpected value for Instruction.boostable") - - options = [] - - if self.insn_deps: - options.append("deps="+":".join(self.insn_deps)) - if self.priority: - options.append("priority=%d" % self.priority) - - return result - - @memoize_method - def get_assignee_var_name(self): - from pymbolic.primitives import Variable, Subscript - - if isinstance(self.assignee, Variable): - var_name = self.assignee.name - elif isinstance(self.assignee, Subscript): - agg = self.assignee.aggregate - assert isinstance(agg, Variable) - var_name = agg.name - else: - raise RuntimeError("invalid lvalue '%s'" % self.assignee) - - return var_name - - @memoize_method - def get_assignee_indices(self): - from pymbolic.primitives import Variable, Subscript - - if isinstance(self.assignee, Variable): - return () - elif isinstance(self.assignee, Subscript): - result = self.assignee.index - if not isinstance(result, tuple): - result = (result,) - return result - else: - raise RuntimeError("invalid lvalue '%s'" % self.assignee) - - @memoize_method - def get_read_var_names(self): - from loopy.symbolic import get_dependencies - return get_dependencies(self.expression) - -# }}} - -# {{{ expand defines - -WORD_RE = re.compile(r"\b([a-zA-Z0-9_]+)\b") -BRACE_RE = re.compile(r"\$\{([a-zA-Z0-9_]+)\}") - -def expand_defines(insn, defines, single_valued=True): - replacements = [()] - - for find_regexp, replace_pattern in [ - (BRACE_RE, r"\$\{%s\}"), - (WORD_RE, r"\b%s\b"), - ]: - - for match in find_regexp.finditer(insn): - word = match.group(1) - - try: - value = defines[word] - except KeyError: - continue - - if isinstance(value, list): - if single_valued: - raise ValueError("multi-valued macro expansion not allowed " - "in this context (when expanding '%s')" % word) - - replacements = [ - rep+((replace_pattern % word, subval),) - for rep in replacements - for subval in value - ] - else: - replacements = [ - rep+((replace_pattern % word, value),) - for rep in replacements] - - for rep in replacements: - rep_value = insn - for pattern, val in rep: - rep_value = re.sub(pattern, str(val), rep_value) - - yield rep_value - -def expand_defines_in_expr(expr, defines): - from pymbolic.primitives import Variable - from loopy.symbolic import parse - - def subst_func(var): - if isinstance(var, Variable): - try: - var_value = defines[var.name] - except KeyError: - return None - else: - return parse(str(var_value)) - else: - return None - - from loopy.symbolic import SubstitutionMapper - return SubstitutionMapper(subst_func)(expr) - -# }}} - -# {{{ function manglers / dtype getters - -def default_function_mangler(name, arg_dtypes): - from loopy.reduction import reduction_function_mangler - - manglers = [reduction_function_mangler] - for mangler in manglers: - result = mangler(name, arg_dtypes) - if result is not None: - return result - - return None - -def opencl_function_mangler(name, arg_dtypes): - if name == "atan2" and len(arg_dtypes) == 2: - return arg_dtypes[0], name - - if len(arg_dtypes) == 1: - arg_dtype, = arg_dtypes - - if arg_dtype.kind == "c": - if arg_dtype == np.complex64: - tpname = "cfloat" - elif arg_dtype == np.complex128: - tpname = "cdouble" - else: - raise RuntimeError("unexpected complex type '%s'" % arg_dtype) - - if name in ["sqrt", "exp", "log", - "sin", "cos", "tan", - "sinh", "cosh", "tanh"]: - return arg_dtype, "%s_%s" % (tpname, name) - - if name in ["real", "imag"]: - return np.dtype(arg_dtype.type(0).real), "%s_%s" % (tpname, name) - - if name == "dot": - scalar_dtype, offset, field_name = arg_dtypes[0].fields["s0"] - return scalar_dtype, name - - return None - -def single_arg_function_mangler(name, arg_dtypes): - if len(arg_dtypes) == 1: - dtype, = arg_dtypes - return dtype, name - - return None - -def opencl_symbol_mangler(name): - # FIXME: should be more picky about exact names - if name.startswith("FLT_"): - return np.dtype(np.float32), name - elif name.startswith("DBL_"): - return np.dtype(np.float64), name - elif name.startswith("M_"): - if name.endswith("_F"): - return np.dtype(np.float32), name - else: - return np.dtype(np.float64), name - else: - return None - -# }}} - -# {{{ preamble generators - -def default_preamble_generator(seen_dtypes, seen_functions): - from loopy.reduction import reduction_preamble_generator - - for result in reduction_preamble_generator(seen_dtypes, seen_functions): - yield result - - has_double = False - has_complex = False - - for dtype in seen_dtypes: - if dtype in [np.float64, np.complex128]: - has_double = True - if dtype.kind == "c": - has_complex = True - - if has_double: - yield ("00_enable_double", """ - #pragma OPENCL EXTENSION cl_khr_fp64: enable - """) - - if has_complex: - if has_double: - yield ("10_include_complex_header", """ - #define PYOPENCL_DEFINE_CDOUBLE - - #include - """) - else: - yield ("10_include_complex_header", """ - #include - """) - - c_funcs = set(c_name for name, c_name, arg_dtypes in seen_functions) - if "int_floor_div" in c_funcs: - yield ("05_int_floor_div", """ - #define int_floor_div(a,b) \ - (( (a) - \ - ( ( (a)<0 ) != ( (b)<0 )) \ - *( (b) + ( (b)<0 ) - ( (b)>=0 ) )) \ - / (b) ) - """) - - if "int_floor_div_pos_b" in c_funcs: - yield ("05_int_floor_div_pos_b", """ - #define int_floor_div_pos_b(a,b) ( \ - ( (a) - ( ((a)<0) ? ((b)-1) : 0 ) ) / (b) \ - ) - """) - - -# }}} - -# {{{ loop kernel object - -def _generate_unique_possibilities(prefix): - yield prefix - - try_num = 0 - while True: - yield "%s_%d" % (prefix, try_num) - try_num += 1 - -class _UniqueNameGenerator: - def __init__(self, existing_names): - self.existing_names = existing_names.copy() - - def is_name_conflicting(self, name): - return name in self.existing_names - - def add_name(self, name): - if self.is_name_conflicting(name): - raise ValueError("name '%s' conflicts with existing names") - self.existing_names.add(name) - - def add_names(self, names): - for name in names: - self.add_name(name) - - def __call__(self, based_on="var"): - for var_name in _generate_unique_possibilities(based_on): - if not self.is_name_conflicting(var_name): - break - - self.existing_names.add(var_name) - return var_name - -_IDENTIFIER_RE = re.compile(r"\b([a-zA-Z_][a-zA-Z0-9_]*)\b") - -def _gather_identifiers(s): - return set(_IDENTIFIER_RE.findall(s)) - -def _parse_domains(ctx, args_and_vars, domains, defines): - result = [] - available_parameters = args_and_vars.copy() - used_inames = set() - - for dom in domains: - if isinstance(dom, str): - dom, = expand_defines(dom, defines) - - if not dom.lstrip().startswith("["): - # i.e. if no parameters are already given - ids = _gather_identifiers(dom) - parameters = ids & available_parameters - dom = "[%s] -> %s" % (",".join(parameters), dom) - - try: - dom = isl.BasicSet.read_from_str(ctx, dom) - except: - print "failed to parse domain '%s'" % dom - raise - else: - assert isinstance(dom, (isl.Set, isl.BasicSet)) - # assert dom.get_ctx() == ctx - - for i_iname in xrange(dom.dim(dim_type.set)): - iname = dom.get_dim_name(dim_type.set, i_iname) - - if iname is None: - raise RuntimeError("domain '%s' provided no iname at index " - "%d (redefined iname?)" % (dom, i_iname)) - - if iname in used_inames: - raise RuntimeError("domain '%s' redefines iname '%s' " - "that is part of a previous domain" % (dom, iname)) - - used_inames.add(iname) - available_parameters.add(iname) - - result.append(dom) - - return result - - - - -class LoopKernel(Record): - """ - :ivar device: :class:`pyopencl.Device` - :ivar domains: :class:`islpy.BasicSet` - :ivar instructions: - :ivar args: - :ivar schedule: - :ivar name: - :ivar preambles: a list of (tag, code) tuples that identify preamble snippets. - Each tag's snippet is only included once, at its first occurrence. - The preambles will be inserted in order of their tags. - :ivar preamble_generators: a list of functions of signature - (seen_dtypes, seen_functions) where seen_functions is a set of - (name, c_name, arg_dtypes), generating extra entries for `preambles`. - :ivar assumptions: the initial implemented_domain, captures assumptions - on the parameters. (an isl.Set) - :ivar local_sizes: A dictionary from integers to integers, mapping - workgroup axes to their sizes, e.g. *{0: 16}* forces axis 0 to be - length 16. - :ivar temporary_variables: - :ivar iname_to_tag: - :ivar substitutions: a mapping from substitution names to :class:`SubstitutionRule` - objects - :ivar function_manglers: list of functions of signature (name, arg_dtypes) - returning a tuple (result_dtype, c_name) - or a tuple (result_dtype, c_name, arg_dtypes), - where c_name is the C-level function to be called. - :ivar symbol_manglers: list of functions of signature (name) returning - a tuple (result_dtype, c_name), where c_name is the C-level symbol to be - evaluated. - :ivar defines: a dictionary of replacements to be made in instructions given - as strings before parsing. A macro instance intended to be replaced should - look like "MACRO" in the instruction code. The expansion given in this - parameter is allowed to be a list. In this case, instructions are generated - for *each* combination of macro values. - - These defines may also be used in the domain and in argument shapes and - strides. They are expanded only upon kernel creation. - - The following arguments are not user-facing: - - :ivar iname_slab_increments: a dictionary mapping inames to (lower_incr, - upper_incr) tuples that will be separated out in the execution to generate - 'bulk' slabs with fewer conditionals. - :ivar applied_iname_rewrites: A list of past substitution dictionaries that - were applied to the kernel. These are stored so that they may be repeated - on expressions the user specifies later. - :ivar cache_manager: - :ivar isl_context: - - The following instance variables are only used until :func:`loopy.make_kernel` is - finished: - - :ivar iname_to_tag_requests: - """ - - # {{{ constructor - - def __init__(self, device, domains, instructions, args=[], schedule=None, - name="loopy_kernel", - preambles=[], - preamble_generators=[default_preamble_generator], - assumptions=None, - local_sizes={}, - temporary_variables={}, - iname_to_tag={}, - substitutions={}, - function_manglers=[ - default_function_mangler, - opencl_function_mangler, - single_arg_function_mangler, - ], - symbol_manglers=[opencl_symbol_mangler], - defines={}, - - # non-user-facing - iname_slab_increments={}, - applied_iname_rewrites=[], - cache_manager=None, - iname_to_tag_requests=None, - index_dtype=np.int32, - isl_context=None, - - # When kernels get intersected in slab decomposition, - # their grid sizes shouldn't change. This provides - # a way to forward sub-kernel grid size requests. - get_grid_sizes=None): - """ - :arg domain: a :class:`islpy.BasicSet`, or a string parseable to a basic set by the isl. - Example: "{[i,j]: 0<=i < 10 and 0<= j < 9}" - """ - assert not iname_to_tag_requests - - import re - - if cache_manager is None: - cache_manager = SetOperationCacheManager() - - iname_to_tag_requests = {} - - # {{{ parse instructions - - INSN_RE = re.compile( - "\s*(?:\<(?P.*?)\>)?" - "\s*(?P.+?)\s*(?.+?)" - "\s*?(?:\{(?P[\s\w=,:]+)\}\s*)?$" - ) - SUBST_RE = re.compile( - r"^\s*(?P.+?)\s*:=\s*(?P.+)\s*$" - ) - - def parse_insn(insn): - insn_match = INSN_RE.match(insn) - subst_match = SUBST_RE.match(insn) - if insn_match is not None and subst_match is not None: - raise RuntimeError("instruction parse error: %s" % insn) - - if insn_match is not None: - groups = insn_match.groupdict() - elif subst_match is not None: - groups = subst_match.groupdict() - else: - raise RuntimeError("insn parse error") - - from loopy.symbolic import parse - lhs = parse(groups["lhs"]) - rhs = parse(groups["rhs"]) - - if insn_match is not None: - insn_deps = set() - insn_id = "insn" - priority = 0 - - if groups["options"] is not None: - for option in groups["options"].split(","): - option = option.strip() - if not option: - raise RuntimeError("empty option supplied") - - equal_idx = option.find("=") - if equal_idx == -1: - opt_key = option - opt_value = None - else: - opt_key = option[:equal_idx].strip() - opt_value = option[equal_idx+1:].strip() - - if opt_key == "id": - insn_id = opt_value - elif opt_key == "priority": - priority = int(opt_value) - elif opt_key == "dep": - insn_deps = set(opt_value.split(":")) - else: - raise ValueError("unrecognized instruction option '%s'" - % opt_key) - - if groups["temp_var_type"] is not None: - if groups["temp_var_type"]: - temp_var_type = np.dtype(groups["temp_var_type"]) - else: - from loopy import infer_type - temp_var_type = infer_type - else: - temp_var_type = None - - from pymbolic.primitives import Variable, Subscript - if not isinstance(lhs, (Variable, Subscript)): - raise RuntimeError("left hand side of assignment '%s' must " - "be variable or subscript" % lhs) - - parsed_instructions.append( - Instruction( - id=self.make_unique_instruction_id( - parsed_instructions, based_on=insn_id), - insn_deps=insn_deps, - forced_iname_deps=frozenset(), - assignee=lhs, expression=rhs, - temp_var_type=temp_var_type, - priority=priority)) - - elif subst_match is not None: - from pymbolic.primitives import Variable, Call - - if isinstance(lhs, Variable): - subst_name = lhs.name - arg_names = [] - elif isinstance(lhs, Call): - if not isinstance(lhs.function, Variable): - raise RuntimeError("Invalid substitution rule left-hand side") - subst_name = lhs.function.name - arg_names = [] - - for i, arg in enumerate(lhs.parameters): - if not isinstance(arg, Variable): - raise RuntimeError("Invalid substitution rule " - "left-hand side: %s--arg number %d " - "is not a variable"% (lhs, i)) - arg_names.append(arg.name) - else: - raise RuntimeError("Invalid substitution rule left-hand side") - - substitutions[subst_name] = SubstitutionRule( - name=subst_name, - arguments=tuple(arg_names), - expression=rhs) - - def parse_if_necessary(insn): - if isinstance(insn, Instruction): - if insn.id is None: - insn = insn.copy(id=self.make_unique_instruction_id(parsed_instructions)) - parsed_instructions.append(insn) - return - - if not isinstance(insn, str): - raise TypeError("Instructions must be either an Instruction " - "instance or a parseable string. got '%s' instead." - % type(insn)) - - for insn in insn.split("\n"): - comment_start = insn.find("#") - if comment_start >= 0: - insn = insn[:comment_start] - - insn = insn.strip() - if not insn: - continue - - for sub_insn in expand_defines(insn, defines, single_valued=False): - parse_insn(sub_insn) - - parsed_instructions = [] - - substitutions = substitutions.copy() - - if isinstance(instructions, str): - instructions = [instructions] - for insn in instructions: - # must construct list one-by-one to facilitate unique id generation - parse_if_necessary(insn) - - if len(set(insn.id for insn in parsed_instructions)) != len(parsed_instructions): - raise RuntimeError("instruction ids do not appear to be unique") - - # }}} - - # Ordering dependency: - # Domain construction needs to know what temporary variables are - # available. That information can only be obtained once instructions - # are parsed. - - # {{{ construct domains - - if isinstance(domains, str): - domains = [domains] - - for domain in domains: - if isinstance(domain, isl.BasicSet): - isl_context = domain.get_ctx() - if isl_context is None: - isl_context = isl.Context() - - scalar_arg_names = set(arg.name for arg in args if isinstance(arg, ValueArg)) - var_names = ( - set(temporary_variables) - | set(insn.get_assignee_var_name() - for insn in parsed_instructions - if insn.temp_var_type is not None)) - domains = _parse_domains(isl_context, scalar_arg_names | var_names, domains, - defines) - - # }}} - - # {{{ process assumptions - - if assumptions is None: - dom0_space = domains[0].get_space() - assumptions_space = isl.Space.params_alloc( - dom0_space.get_ctx(), dom0_space.dim(dim_type.param)) - for i in xrange(dom0_space.dim(dim_type.param)): - assumptions_space = assumptions_space.set_dim_name( - dim_type.param, i, dom0_space.get_dim_name(dim_type.param, i)) - assumptions = isl.BasicSet.universe(assumptions_space) - - elif isinstance(assumptions, str): - all_inames = set() - all_params = set() - for dom in domains: - all_inames.update(dom.get_var_names(dim_type.set)) - all_params.update(dom.get_var_names(dim_type.param)) - - domain_parameters = all_params-all_inames - - assumptions_set_str = "[%s] -> { : %s}" \ - % (",".join(s for s in domain_parameters), - assumptions) - assumptions = isl.BasicSet.read_from_str(domains[0].get_ctx(), - assumptions_set_str) - - assert assumptions.is_params() - - # }}} - - # {{{ expand macros in arg shapes - - processed_args = [] - for arg in args: - for arg_name in arg.name.split(","): - new_arg = arg.copy(name=arg_name) - if isinstance(arg, _ShapedArg): - if arg.shape is not None: - new_arg = new_arg.copy(shape=expand_defines_in_expr(arg.shape, defines)) - if arg.strides is not None: - new_arg = new_arg.copy(strides=expand_defines_in_expr(arg.strides, defines)) - - processed_args.append(new_arg) - - # }}} - - index_dtype = np.dtype(index_dtype) - if index_dtype.kind != 'i': - raise TypeError("index_dtype must be an integer") - if np.iinfo(index_dtype).min >= 0: - raise TypeError("index_dtype must be signed") - - if get_grid_sizes is not None: - # overwrites method down below - self.get_grid_sizes = get_grid_sizes - - Record.__init__(self, - device=device, domains=domains, - instructions=parsed_instructions, - args=processed_args, - schedule=schedule, - name=name, - preambles=preambles, - preamble_generators=preamble_generators, - assumptions=assumptions, - iname_slab_increments=iname_slab_increments, - temporary_variables=temporary_variables, - local_sizes=local_sizes, - iname_to_tag=iname_to_tag, - iname_to_tag_requests=iname_to_tag_requests, - substitutions=substitutions, - cache_manager=cache_manager, - applied_iname_rewrites=applied_iname_rewrites, - function_manglers=function_manglers, - symbol_manglers=symbol_manglers, - index_dtype=index_dtype, - isl_context=isl_context) - - # }}} - - # {{{ function mangling - - def register_function_mangler(self, mangler): - return self.copy( - function_manglers=[mangler]+self.function_manglers) - - def mangle_function(self, identifier, arg_dtypes): - for mangler in self.function_manglers: - mangle_result = mangler(identifier, arg_dtypes) - if mangle_result is not None: - return mangle_result - - return None - - # }}} - - # {{{ name wrangling - - @memoize_method - def non_iname_variable_names(self): - return (set(self.arg_dict.iterkeys()) - | set(self.temporary_variables.iterkeys())) - - @memoize_method - def all_variable_names(self): - return ( - set(self.temporary_variables.iterkeys()) - | set(self.substitutions.iterkeys()) - | set(arg.name for arg in self.args) - | set(self.all_inames())) - - def get_var_name_generator(self): - return _UniqueNameGenerator(self.all_variable_names()) - - def make_unique_instruction_id(self, insns=None, based_on="insn", extra_used_ids=set()): - if insns is None: - insns = self.instructions - - used_ids = set(insn.id for insn in insns) | extra_used_ids - - for id_str in _generate_unique_possibilities(based_on): - if id_str not in used_ids: - return id_str - - def get_var_descriptor(self, name): - try: - return self.arg_dict[name] - except KeyError: - pass - - try: - return self.temporary_variables[name] - except KeyError: - pass - - raise ValueError("nothing known about variable '%s'" % name) - - @property - @memoize_method - def id_to_insn(self): - return dict((insn.id, insn) for insn in self.instructions) - - # }}} - - # {{{ domain wrangling - - @memoize_method - def parents_per_domain(self): - """Return a list corresponding to self.domains (by index) - containing domain indices which are nested around this - domain. - - Each domains nest list walks from the leaves of the nesting - tree to the root. - """ - - # The stack of iname sets records which inames are active - # as we step through the linear list of domains. It also - # determines the granularity of inames to be popped/decactivated - # if we ascend a level. - - iname_set_stack = [] - result = [] - - writer_map = self.writer_map() - - for dom in self.domains: - parameters = set(dom.get_var_names(dim_type.param)) - inames = set(dom.get_var_names(dim_type.set)) - - # This next domain may be nested inside the previous domain. - # Or it may not, in which case we need to figure out how many - # levels of parents we need to discard in order to find the - # true parent. - - discard_level_count = 0 - while discard_level_count < len(iname_set_stack): - # {{{ check for parenthood by loop bound iname - - last_inames = iname_set_stack[-1-discard_level_count] - if last_inames & parameters: - break - - # }}} - - # {{{ check for parenthood by written variable - - is_parent_by_variable = False - for par in parameters: - if par in self.temporary_variables: - writer_insns = writer_map[par] - - if len(writer_insns) > 1: - raise RuntimeError("loop bound '%s' " - "may only be written to once" % par) - - writer_insn, = writer_insns - writer_inames = self.insn_inames(writer_insn) - - if writer_inames & last_inames: - is_parent_by_variable = True - break - - if is_parent_by_variable: - break - - # }}} - - discard_level_count += 1 - - if discard_level_count: - iname_set_stack = iname_set_stack[:-discard_level_count] - - if result: - parent = len(result)-1 - else: - parent = None - - for i in range(discard_level_count): - assert parent is not None - parent = result[parent] - - # found this domain's parent - result.append(parent) - - if iname_set_stack: - parent_inames = iname_set_stack[-1] - else: - parent_inames = set() - iname_set_stack.append(parent_inames | inames) - - return result - - @memoize_method - def all_parents_per_domain(self): - """Return a list corresponding to self.domains (by index) - containing domain indices which are nested around this - domain. - - Each domains nest list walks from the leaves of the nesting - tree to the root. - """ - result = [] - - ppd = self.parents_per_domain() - for dom, parent in zip(self.domains, ppd): - # keep walking up tree to find *all* parents - dom_result = [] - while parent is not None: - dom_result.insert(0, parent) - parent = ppd[parent] - - result.append(dom_result) - - return result - - @memoize_method - def _get_home_domain_map(self): - return dict( - (iname, i_domain) - for i_domain, dom in enumerate(self.domains) - for iname in dom.get_var_names(dim_type.set)) - - def get_home_domain_index(self, iname): - return self._get_home_domain_map()[iname] - - @memoize_method - def combine_domains(self, domains): - """ - :arg domains: domain indices of domains to be combined. More 'dominant' - domains (those which get most say on the actual dim_type of an iname) - must be later in the order. - """ - assert isinstance(domains, tuple) # for caching - - if not domains: - return isl.BasicSet.universe(isl.Space.set_alloc( - self.isl_context, 0, 0)) - - result = None - for dom_index in domains: - dom = self.domains[dom_index] - if result is None: - result = dom - else: - aligned_dom, aligned_result = isl.align_two( - dom, result, across_dim_types=True) - result = aligned_result & aligned_dom - - return result - - def get_inames_domain(self, inames): - if not inames: - return self.combine_domains(()) - - if isinstance(inames, str): - inames = frozenset([inames]) - if not isinstance(inames, frozenset): - inames = frozenset(inames) - - from warnings import warn - warn("get_inames_domain did not get a frozenset", stacklevel=2) - - return self._get_inames_domain_backend(inames) - - @memoize_method - def get_leaf_domain_indices(self, inames): - """Find the leaves of the domain tree needed to cover all inames.""" - - hdm = self._get_home_domain_map() - ppd = self.all_parents_per_domain() - - domain_indices = set() - - # map root -> leaf - root_to_leaf = {} - - for iname in inames: - home_domain_index = hdm[iname] - if home_domain_index in domain_indices: - # nothin' new - continue - - domain_parents = [home_domain_index] + ppd[home_domain_index] - current_root = domain_parents[-1] - previous_leaf = root_to_leaf.get(current_root) - - if previous_leaf is not None: - # Check that we don't branch the domain tree. - # - # Branching the domain tree is dangerous/ill-formed because - # it can introduce artificial restrictions on variables - # further up the tree. - - prev_parents = set(ppd[previous_leaf]) - if not prev_parents <= set(domain_parents): - raise CannotBranchDomainTree("iname set '%s' requires " - "branch in domain tree (when adding '%s')" - % (", ".join(inames), iname)) - else: - # We're adding a new root. That's fine. - pass - - root_to_leaf[current_root] = home_domain_index - domain_indices.update(domain_parents) - - return root_to_leaf.values() - - @memoize_method - def _get_inames_domain_backend(self, inames): - domain_indices = set() - for leaf_dom_idx in self.get_leaf_domain_indices(inames): - domain_indices.add(leaf_dom_idx) - domain_indices.update(self.all_parents_per_domain()[leaf_dom_idx]) - - return self.combine_domains(tuple(sorted(domain_indices))) - - # }}} - - # {{{ iname wrangling - - @memoize_method - def all_inames(self): - result = set() - for dom in self.domains: - result.update(dom.get_var_names(dim_type.set)) - return frozenset(result) - - @memoize_method - def all_params(self): - all_inames = self.all_inames() - - result = set() - for dom in self.domains: - result.update(set(dom.get_var_names(dim_type.param)) - all_inames) - - return frozenset(result) - - @memoize_method - def all_insn_inames(self): - """Return a mapping from instruction ids to inames inside which - they should be run. - """ - - return find_all_insn_inames(self) - - @memoize_method - def all_referenced_inames(self): - result = set() - for inames in self.all_insn_inames().itervalues(): - result.update(inames) - return result - - def insn_inames(self, insn): - if isinstance(insn, Instruction): - return self.all_insn_inames()[insn.id] - else: - return self.all_insn_inames()[insn] - - @memoize_method - def iname_to_insns(self): - result = dict( - (iname, set()) for iname in self.all_inames()) - for insn in self.instructions: - for iname in self.insn_inames(insn): - result[iname].add(insn.id) - - return result - - # }}} - - # {{{ read and written variables - - @memoize_method - def reader_map(self): - """ - :return: a dict that maps variable names to ids of insns that read that variable. - """ - result = {} - - admissible_vars = ( - set(arg.name for arg in self.args) - | set(self.temporary_variables.iterkeys())) - - for insn in self.instructions: - for var_name in insn.get_read_var_names() & admissible_vars: - result.setdefault(var_name, set()).add(insn.id) - - @memoize_method - def writer_map(self): - """ - :return: a dict that maps variable names to ids of insns that write to that variable. - """ - result = {} - - for insn in self.instructions: - var_name = insn.get_assignee_var_name() - var_names = [var_name] - - for var_name in var_names: - result.setdefault(var_name, set()).add(insn.id) - - return result - - @memoize_method - def get_read_variables(self): - result = set() - for insn in self.instructions: - result.update(insn.get_read_var_names()) - return result - - @memoize_method - def get_written_variables(self): - return frozenset( - insn.get_assignee_var_name() - for insn in self.instructions) - - # }}} - - # {{{ argument wrangling - - @property - @memoize_method - def arg_dict(self): - return dict((arg.name, arg) for arg in self.args) - - @property - @memoize_method - def scalar_loop_args(self): - if self.args is None: - return [] - else: - from pytools import flatten - loop_arg_names = list(flatten(dom.get_var_names(dim_type.param) - for dom in self.domains)) - return [arg.name for arg in self.args if isinstance(arg, ValueArg) - if arg.name in loop_arg_names] - # }}} - - # {{{ bounds finding - - @memoize_method - def get_iname_bounds(self, iname): - domain = self.get_inames_domain(frozenset([iname])) - d_var_dict = domain.get_var_dict() - - assumptions, domain = isl.align_two(self.assumptions, domain) - - dom_intersect_assumptions = assumptions & domain - - lower_bound_pw_aff = ( - self.cache_manager.dim_min( - dom_intersect_assumptions, - d_var_dict[iname][1]) - .coalesce()) - upper_bound_pw_aff = ( - self.cache_manager.dim_max( - dom_intersect_assumptions, - d_var_dict[iname][1]) - .coalesce()) - - class BoundsRecord(Record): - pass - - size = (upper_bound_pw_aff - lower_bound_pw_aff + 1) - size = size.gist(self.assumptions) - - return BoundsRecord( - lower_bound_pw_aff=lower_bound_pw_aff, - upper_bound_pw_aff=upper_bound_pw_aff, - size=size) - - def find_var_base_indices_and_shape_from_inames( - self, inames, cache_manager, context=None): - if not inames: - return [], [] - - base_indices_and_sizes = [ - cache_manager.base_index_and_length( - self.get_inames_domain(iname), iname, context) - for iname in inames] - return zip(*base_indices_and_sizes) - - @memoize_method - def get_constant_iname_length(self, iname): - from loopy.isl_helpers import static_max_of_pw_aff - from loopy.symbolic import aff_to_expr - return int(aff_to_expr(static_max_of_pw_aff( - self.get_iname_bounds(iname).size, - constants_only=True))) - - @memoize_method - def get_grid_sizes(self, ignore_auto=False): - all_inames_by_insns = set() - for insn in self.instructions: - all_inames_by_insns |= self.insn_inames(insn) - - if not all_inames_by_insns <= self.all_inames(): - raise RuntimeError("some inames collected from instructions (%s) " - "are not present in domain (%s)" - % (", ".join(sorted(all_inames_by_insns)), - ", ".join(sorted(self.all_inames())))) - - global_sizes = {} - local_sizes = {} - - from loopy.kernel import ( - GroupIndexTag, LocalIndexTag, - AutoLocalIndexTagBase) - - for iname in self.all_inames(): - tag = self.iname_to_tag.get(iname) - - if isinstance(tag, GroupIndexTag): - tgt_dict = global_sizes - elif isinstance(tag, LocalIndexTag): - tgt_dict = local_sizes - elif isinstance(tag, AutoLocalIndexTagBase) and not ignore_auto: - raise RuntimeError("cannot find grid sizes if automatic local index tags are " - "present") - else: - tgt_dict = None - - if tgt_dict is None: - continue - - size = self.get_iname_bounds(iname).size - - if tag.axis in tgt_dict: - size = tgt_dict[tag.axis].max(size) - - from loopy.isl_helpers import static_max_of_pw_aff - try: - # insist block size is constant - size = static_max_of_pw_aff(size, - constants_only=isinstance(tag, LocalIndexTag)) - except ValueError: - pass - - tgt_dict[tag.axis] = size - - max_dims = self.device.max_work_item_dimensions - - def to_dim_tuple(size_dict, which, forced_sizes={}): - forced_sizes = forced_sizes.copy() - - size_list = [] - sorted_axes = sorted(size_dict.iterkeys()) - - while sorted_axes or forced_sizes: - if sorted_axes: - cur_axis = sorted_axes.pop(0) - else: - cur_axis = None - - if len(size_list) in forced_sizes: - size_list.append( - forced_sizes.pop(len(size_list))) - continue - - assert cur_axis is not None - - if cur_axis > len(size_list): - raise RuntimeError("%s axis %d unused" % ( - which, len(size_list))) - - size_list.append(size_dict[cur_axis]) - - if len(size_list) > max_dims: - raise ValueError("more %s dimensions assigned than supported " - "by hardware (%d > %d)" % (which, len(size_list), max_dims)) - - return tuple(size_list) - - return (to_dim_tuple(global_sizes, "global"), - to_dim_tuple(local_sizes, "local", forced_sizes=self.local_sizes)) - - def get_grid_sizes_as_exprs(self, ignore_auto=False): - grid_size, group_size = self.get_grid_sizes(ignore_auto=ignore_auto) - - def tup_to_exprs(tup): - from loopy.symbolic import pw_aff_to_expr - return tuple(pw_aff_to_expr(i, int_ok=True) for i in tup) - - return tup_to_exprs(grid_size), tup_to_exprs(group_size) - - # }}} - - # {{{ local memory - - @memoize_method - def local_var_names(self): - return set( - tv.name - for tv in self.temporary_variables.itervalues() - if tv.is_local) - - def local_mem_use(self): - return sum(lv.nbytes for lv in self.temporary_variables.itervalues() - if lv.is_local) - - # }}} - - # {{{ pretty-printing - - def __str__(self): - lines = [] - - sep = 75*"-" - lines.append(sep) - lines.append("INAME-TO-TAG MAP:") - for iname in sorted(self.all_inames()): - line = "%s: %s" % (iname, self.iname_to_tag.get(iname)) - lines.append(line) - - lines.append(sep) - lines.append("DOMAINS:") - for dom, parents in zip(self.domains, self.all_parents_per_domain()): - lines.append(len(parents)*" " + str(dom)) - - if self.substitutions: - lines.append(sep) - lines.append("SUBSTIUTION RULES:") - for rule_name in sorted(self.substitutions.iterkeys()): - lines.append(str(self.substitutions[rule_name])) - - lines.append(sep) - lines.append("INSTRUCTIONS:") - loop_list_width = 35 - for insn in self.instructions: - loop_list = ",".join(sorted(self.insn_inames(insn))) - - options = [insn.id] - if insn.priority: - options.append("priority=%d" % insn.priority) - - if len(loop_list) > loop_list_width: - lines.append("[%s]" % loop_list) - lines.append("%s%s <- %s # %s" % ( - (loop_list_width+2)*" ", insn.assignee, - insn.expression, ", ".join(options))) - else: - lines.append("[%s]%s%s <- %s # %s" % ( - loop_list, " "*(loop_list_width-len(loop_list)), - insn.assignee, insn.expression, ", ".join(options))) - - lines.append(sep) - lines.append("DEPENDENCIES:") - for insn in self.instructions: - if insn.insn_deps: - lines.append("%s : %s" % (insn.id, ",".join(insn.insn_deps))) - lines.append(sep) - - if self.schedule is not None: - lines.append("SCHEDULE:") - from loopy.schedule import dump_schedule - lines.append(dump_schedule(self.schedule)) - lines.append(sep) - - return "\n".join(lines) - - # }}} - -# }}} - -# {{{ add and infer argument dtypes - -def add_argument_dtypes(knl, dtype_dict): - dtype_dict = dtype_dict.copy() - new_args = [] - - for arg in knl.args: - new_dtype = dtype_dict.pop(arg.name, None) - if new_dtype is not None: - new_dtype = np.dtype(new_dtype) - if arg.dtype is not None and arg.dtype != new_dtype: - raise RuntimeError( - "argument '%s' already has a different dtype " - "(existing: %s, new: %s)" - % (arg.name, arg.dtype, new_dtype)) - arg = arg.copy(dtype=new_dtype) - - new_args.append(arg) - - knl = knl.copy(args=new_args) - - if dtype_dict: - raise RuntimeError("unused argument dtypes: %s" - % ", ".join(dtype_dict)) - - return knl.copy(args=new_args) - -def infer_argument_dtypes(knl): - new_args = [] - - writer_map = knl.writer_map() - - from loopy.codegen.expression import ( - TypeInferenceMapper, TypeInferenceFailure) - tim = TypeInferenceMapper(knl) - - for arg in knl.args: - if arg.dtype is None: - new_dtype = None - - if arg.name in knl.all_params(): - new_dtype = knl.index_dtype - else: - try: - for write_insn_id in writer_map.get(arg.name, ()): - write_insn = knl.id_to_insn[write_insn_id] - new_tim_dtype = tim(write_insn.expression) - if new_dtype is None: - new_dtype = new_tim_dtype - elif new_dtype != new_tim_dtype: - # Now we know *nothing*. - new_dtype = None - break - - except TypeInferenceFailure: - # Even one type inference failure is enough to - # make this dtype not safe to guess. Don't. - pass - - if new_dtype is not None: - arg = arg.copy(dtype=new_dtype) - - new_args.append(arg) - - return knl.copy(args=new_args) - -def get_arguments_with_incomplete_dtype(knl): - return [arg.name for arg in knl.args - if arg.dtype is None] - -# }}} - -# {{{ find_all_insn_inames fixed point iteration - -def find_all_insn_inames(kernel): - from loopy.symbolic import get_dependencies - - writer_map = kernel.writer_map() - - insn_id_to_inames = {} - insn_assignee_inames = {} - - all_read_deps = {} - all_write_deps = {} - - from loopy.subst import expand_subst - kernel = expand_subst(kernel) - - for insn in kernel.instructions: - all_read_deps[insn.id] = read_deps = get_dependencies(insn.expression) - all_write_deps[insn.id] = write_deps = get_dependencies(insn.assignee) - deps = read_deps | write_deps - - iname_deps = ( - deps & kernel.all_inames() - | insn.forced_iname_deps) - - insn_id_to_inames[insn.id] = iname_deps - insn_assignee_inames[insn.id] = write_deps & kernel.all_inames() - - temp_var_names = set(kernel.temporary_variables.iterkeys()) - - # fixed point iteration until all iname dep sets have converged - - # Why is fixed point iteration necessary here? Consider the following - # scenario: - # - # z = expr(iname) - # y = expr(z) - # x = expr(y) - # - # x clearly has a dependency on iname, but this is not found until that - # dependency has propagated all the way up. Doing this recursively is - # not guaranteed to terminate because of circular dependencies. - - while True: - did_something = False - for insn in kernel.instructions: - - # {{{ depdency-based propagation - - # For all variables that insn depends on, find the intersection - # of iname deps of all writers, and add those to insn's - # dependencies. - - for tv_name in (all_read_deps[insn.id] & temp_var_names): - implicit_inames = None - - for writer_id in writer_map[tv_name]: - writer_implicit_inames = ( - insn_id_to_inames[writer_id] - - insn_assignee_inames[writer_id]) - if implicit_inames is None: - implicit_inames = writer_implicit_inames - else: - implicit_inames = (implicit_inames - & writer_implicit_inames) - - inames_old = insn_id_to_inames[insn.id] - inames_new = (inames_old | implicit_inames) \ - - insn.reduction_inames() - insn_id_to_inames[insn.id] = inames_new - - if inames_new != inames_old: - did_something = True - - # }}} - - # {{{ domain-based propagation - - # Add all inames occurring in parameters of domains that my current - # inames refer to. - - inames_old = insn_id_to_inames[insn.id] - inames_new = set(insn_id_to_inames[insn.id]) - - for iname in inames_old: - home_domain = kernel.domains[kernel.get_home_domain_index(iname)] - - for par in home_domain.get_var_names(dim_type.param): - if par in kernel.all_inames(): - inames_new.add(par) - - if inames_new != inames_old: - did_something = True - insn_id_to_inames[insn.id] = frozenset(inames_new) - - # }}} - - if not did_something: - break - - return insn_id_to_inames - -# }}} - -# {{{ set operation cache - -class SetOperationCacheManager: - def __init__(self): - # mapping: set hash -> [(set, op, args, result)] - self.cache = {} - - def op(self, set, op_name, op, args): - hashval = hash(set) - bucket = self.cache.setdefault(hashval, []) - - for bkt_set, bkt_op, bkt_args, result in bucket: - if set.plain_is_equal(bkt_set) and op == bkt_op and args == bkt_args: - return result - - #print op, set.get_dim_name(dim_type.set, args[0]) - result = op(*args) - bucket.append((set, op_name, args, result)) - return result - - def dim_min(self, set, *args): - return self.op(set, "dim_min", set.dim_min, args) - - def dim_max(self, set, *args): - return self.op(set, "dim_max", set.dim_max, args) - - def base_index_and_length(self, set, iname, context=None): - iname_to_dim = set.space.get_var_dict() - lower_bound_pw_aff = self.dim_min(set, iname_to_dim[iname][1]) - upper_bound_pw_aff = self.dim_max(set, iname_to_dim[iname][1]) - - from loopy.isl_helpers import static_max_of_pw_aff, static_value_of_pw_aff - from loopy.symbolic import pw_aff_to_expr - - size = pw_aff_to_expr(static_max_of_pw_aff( - upper_bound_pw_aff - lower_bound_pw_aff + 1, constants_only=True, - context=context)) - base_index = pw_aff_to_expr( - static_value_of_pw_aff(lower_bound_pw_aff, constants_only=False, - context=context)) - - return base_index, size - -# }}} - -# {{{ domain change helper - -class DomainChanger: - """Helps change the domain responsible for *inames* within a kernel. - - .. note: Does not perform an in-place change! - """ - - def __init__(self, kernel, inames): - self.kernel = kernel - if inames: - ldi = kernel.get_leaf_domain_indices(inames) - if len(ldi) > 1: - raise RuntimeError("Inames '%s' require more than one leaf " - "domain, which makes the domain change that is part " - "of your current operation ambiguous." % ", ".join(inames)) - - self.leaf_domain_index, = ldi - self.domain = kernel.domains[self.leaf_domain_index] - - else: - self.domain = kernel.combine_domains(()) - self.leaf_domain_index = None - - def get_domains_with(self, replacement): - result = self.kernel.domains[:] - if self.leaf_domain_index is not None: - result[self.leaf_domain_index] = replacement - else: - result.append(replacement) - - return result - -# }}} - -# {{{ graphviz / dot export - -def get_dot_dependency_graph(kernel, iname_cluster=False, iname_edge=True): - lines = [] - for insn in kernel.instructions: - lines.append("%s [shape=\"box\"];" % insn.id) - for dep in insn.insn_deps: - lines.append("%s -> %s;" % (dep, insn.id)) - - if iname_edge: - for iname in kernel.insn_inames(insn): - lines.append("%s -> %s [style=\"dotted\"];" % (iname, insn.id)) - - if iname_cluster: - for iname in kernel.all_inames(): - lines.append("subgraph cluster_%s { label=\"%s\" %s }" % (iname, iname, - " ".join(insn.id for insn in kernel.instructions - if iname in kernel.insn_inames(insn)))) - - return "digraph loopy_deps {\n%s\n}" % "\n".join(lines) - -# }}} - -# vim: foldmethod=marker diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b07a487fc192b6fc6ab9ae1063e730872e6e591f --- /dev/null +++ b/loopy/kernel/__init__.py @@ -0,0 +1,887 @@ +"""Kernel object.""" + +from __future__ import division + +__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + + + + +import numpy as np +from pytools import Record, memoize_method +import islpy as isl +from islpy import dim_type + +from loopy.kernel.creation import UniqueNameGenerator, generate_unique_possibilities + +from loopy.kernel.data import ( + default_function_mangler, + opencl_function_mangler, + single_arg_function_mangler, + + opencl_symbol_mangler, + + default_preamble_generator, + ) + + + + +class CannotBranchDomainTree(RuntimeError): + pass + +# {{{ loop kernel object + +class LoopKernel(Record): + """ + :ivar device: :class:`pyopencl.Device` + :ivar domains: :class:`islpy.BasicSet` + :ivar instructions: + :ivar args: + :ivar schedule: + :ivar name: + :ivar preambles: a list of (tag, code) tuples that identify preamble snippets. + Each tag's snippet is only included once, at its first occurrence. + The preambles will be inserted in order of their tags. + :ivar preamble_generators: a list of functions of signature + (seen_dtypes, seen_functions) where seen_functions is a set of + (name, c_name, arg_dtypes), generating extra entries for `preambles`. + :ivar assumptions: the initial implemented_domain, captures assumptions + on the parameters. (an isl.Set) + :ivar local_sizes: A dictionary from integers to integers, mapping + workgroup axes to their sizes, e.g. *{0: 16}* forces axis 0 to be + length 16. + :ivar temporary_variables: + :ivar iname_to_tag: + :ivar substitutions: a mapping from substitution names to :class:`SubstitutionRule` + objects + :ivar function_manglers: list of functions of signature (name, arg_dtypes) + returning a tuple (result_dtype, c_name) + or a tuple (result_dtype, c_name, arg_dtypes), + where c_name is the C-level function to be called. + :ivar symbol_manglers: list of functions of signature (name) returning + a tuple (result_dtype, c_name), where c_name is the C-level symbol to be + evaluated. + :ivar defines: a dictionary of replacements to be made in instructions given + as strings before parsing. A macro instance intended to be replaced should + look like "MACRO" in the instruction code. The expansion given in this + parameter is allowed to be a list. In this case, instructions are generated + for *each* combination of macro values. + + These defines may also be used in the domain and in argument shapes and + strides. They are expanded only upon kernel creation. + + The following arguments are not user-facing: + + :ivar iname_slab_increments: a dictionary mapping inames to (lower_incr, + upper_incr) tuples that will be separated out in the execution to generate + 'bulk' slabs with fewer conditionals. + :ivar applied_iname_rewrites: A list of past substitution dictionaries that + were applied to the kernel. These are stored so that they may be repeated + on expressions the user specifies later. + :ivar cache_manager: + :ivar isl_context: + """ + + # {{{ constructor + + def __init__(self, device, domains, instructions, args=[], schedule=None, + name="loopy_kernel", + preambles=[], + preamble_generators=[default_preamble_generator], + assumptions=None, + local_sizes={}, + temporary_variables={}, + iname_to_tag={}, + substitutions={}, + function_manglers=[ + default_function_mangler, + opencl_function_mangler, + single_arg_function_mangler, + ], + symbol_manglers=[opencl_symbol_mangler], + defines={}, + + # non-user-facing + iname_slab_increments={}, + applied_iname_rewrites=[], + cache_manager=None, + index_dtype=np.int32, + isl_context=None, + + # When kernels get intersected in slab decomposition, + # their grid sizes shouldn't change. This provides + # a way to forward sub-kernel grid size requests. + get_grid_sizes=None): + """ + :arg domain: a :class:`islpy.BasicSet`, or a string parseable to a basic set by the isl. + Example: "{[i,j]: 0<=i < 10 and 0<= j < 9}" + """ + + if cache_manager is None: + from loopy.kernel.tools import SetOperationCacheManager + cache_manager = SetOperationCacheManager() + + # {{{ make instruction ids unique + + from loopy.kernel.creation import MakeUnique + + insn_ids = set() + for insn in instructions: + if insn.id is not None and not isinstance(insn.id, MakeUnique): + if insn.id in insn_ids: + raise RuntimeError("duplicate instruction id: %s" % insn.id) + insn_ids.add(insn.id) + + insn_id_gen = UniqueNameGenerator(insn_ids) + + new_instructions = [] + + for insn in instructions: + if insn.id is None: + new_instructions.append( + insn.copy(id=insn_id_gen("insn"))) + elif isinstance(insn.id, MakeUnique): + new_instructions.append( + insn.copy(id=insn_id_gen(insn.id.name))) + else: + new_instructions.append(insn) + + instructions = new_instructions + del new_instructions + + # }}} + + # {{{ process assumptions + + if assumptions is None: + dom0_space = domains[0].get_space() + assumptions_space = isl.Space.params_alloc( + dom0_space.get_ctx(), dom0_space.dim(dim_type.param)) + for i in xrange(dom0_space.dim(dim_type.param)): + assumptions_space = assumptions_space.set_dim_name( + dim_type.param, i, dom0_space.get_dim_name(dim_type.param, i)) + assumptions = isl.BasicSet.universe(assumptions_space) + + elif isinstance(assumptions, str): + all_inames = set() + all_params = set() + for dom in domains: + all_inames.update(dom.get_var_names(dim_type.set)) + all_params.update(dom.get_var_names(dim_type.param)) + + domain_parameters = all_params-all_inames + + assumptions_set_str = "[%s] -> { : %s}" \ + % (",".join(s for s in domain_parameters), + assumptions) + assumptions = isl.BasicSet.read_from_str(domains[0].get_ctx(), + assumptions_set_str) + + assert assumptions.is_params() + + # }}} + + # {{{ expand macros in arg shapes + + from loopy.kernel.data import ShapedArg + from loopy.kernel.creation import expand_defines_in_expr + + processed_args = [] + for arg in args: + for arg_name in arg.name.split(","): + new_arg = arg.copy(name=arg_name) + if isinstance(arg, ShapedArg): + if arg.shape is not None: + new_arg = new_arg.copy(shape=expand_defines_in_expr(arg.shape, defines)) + if arg.strides is not None: + new_arg = new_arg.copy(strides=expand_defines_in_expr(arg.strides, defines)) + + processed_args.append(new_arg) + + # }}} + + index_dtype = np.dtype(index_dtype) + if index_dtype.kind != 'i': + raise TypeError("index_dtype must be an integer") + if np.iinfo(index_dtype).min >= 0: + raise TypeError("index_dtype must be signed") + + if get_grid_sizes is not None: + # overwrites method down below + self.get_grid_sizes = get_grid_sizes + + Record.__init__(self, + device=device, domains=domains, + instructions=instructions, + args=processed_args, + schedule=schedule, + name=name, + preambles=preambles, + preamble_generators=preamble_generators, + assumptions=assumptions, + iname_slab_increments=iname_slab_increments, + temporary_variables=temporary_variables, + local_sizes=local_sizes, + iname_to_tag=iname_to_tag, + substitutions=substitutions, + cache_manager=cache_manager, + applied_iname_rewrites=applied_iname_rewrites, + function_manglers=function_manglers, + symbol_manglers=symbol_manglers, + index_dtype=index_dtype, + isl_context=isl_context) + + # }}} + + # {{{ function mangling + + def register_function_mangler(self, mangler): + return self.copy( + function_manglers=[mangler]+self.function_manglers) + + def mangle_function(self, identifier, arg_dtypes): + for mangler in self.function_manglers: + mangle_result = mangler(identifier, arg_dtypes) + if mangle_result is not None: + return mangle_result + + return None + + # }}} + + # {{{ name wrangling + + @memoize_method + def non_iname_variable_names(self): + return (set(self.arg_dict.iterkeys()) + | set(self.temporary_variables.iterkeys())) + + @memoize_method + def all_variable_names(self): + return ( + set(self.temporary_variables.iterkeys()) + | set(self.substitutions.iterkeys()) + | set(arg.name for arg in self.args) + | set(self.all_inames())) + + def get_var_name_generator(self): + return UniqueNameGenerator(self.all_variable_names()) + + def make_unique_instruction_id(self, insns=None, based_on="insn", extra_used_ids=set()): + if insns is None: + insns = self.instructions + + used_ids = set(insn.id for insn in insns) | extra_used_ids + + for id_str in generate_unique_possibilities(based_on): + if id_str not in used_ids: + return id_str + + def get_var_descriptor(self, name): + try: + return self.arg_dict[name] + except KeyError: + pass + + try: + return self.temporary_variables[name] + except KeyError: + pass + + raise ValueError("nothing known about variable '%s'" % name) + + @property + @memoize_method + def id_to_insn(self): + return dict((insn.id, insn) for insn in self.instructions) + + # }}} + + # {{{ domain wrangling + + @memoize_method + def parents_per_domain(self): + """Return a list corresponding to self.domains (by index) + containing domain indices which are nested around this + domain. + + Each domains nest list walks from the leaves of the nesting + tree to the root. + """ + + # The stack of iname sets records which inames are active + # as we step through the linear list of domains. It also + # determines the granularity of inames to be popped/decactivated + # if we ascend a level. + + iname_set_stack = [] + result = [] + + writer_map = self.writer_map() + + for dom in self.domains: + parameters = set(dom.get_var_names(dim_type.param)) + inames = set(dom.get_var_names(dim_type.set)) + + # This next domain may be nested inside the previous domain. + # Or it may not, in which case we need to figure out how many + # levels of parents we need to discard in order to find the + # true parent. + + discard_level_count = 0 + while discard_level_count < len(iname_set_stack): + # {{{ check for parenthood by loop bound iname + + last_inames = iname_set_stack[-1-discard_level_count] + if last_inames & parameters: + break + + # }}} + + # {{{ check for parenthood by written variable + + is_parent_by_variable = False + for par in parameters: + if par in self.temporary_variables: + writer_insns = writer_map[par] + + if len(writer_insns) > 1: + raise RuntimeError("loop bound '%s' " + "may only be written to once" % par) + + writer_insn, = writer_insns + writer_inames = self.insn_inames(writer_insn) + + if writer_inames & last_inames: + is_parent_by_variable = True + break + + if is_parent_by_variable: + break + + # }}} + + discard_level_count += 1 + + if discard_level_count: + iname_set_stack = iname_set_stack[:-discard_level_count] + + if result: + parent = len(result)-1 + else: + parent = None + + for i in range(discard_level_count): + assert parent is not None + parent = result[parent] + + # found this domain's parent + result.append(parent) + + if iname_set_stack: + parent_inames = iname_set_stack[-1] + else: + parent_inames = set() + iname_set_stack.append(parent_inames | inames) + + return result + + @memoize_method + def all_parents_per_domain(self): + """Return a list corresponding to self.domains (by index) + containing domain indices which are nested around this + domain. + + Each domains nest list walks from the leaves of the nesting + tree to the root. + """ + result = [] + + ppd = self.parents_per_domain() + for dom, parent in zip(self.domains, ppd): + # keep walking up tree to find *all* parents + dom_result = [] + while parent is not None: + dom_result.insert(0, parent) + parent = ppd[parent] + + result.append(dom_result) + + return result + + @memoize_method + def _get_home_domain_map(self): + return dict( + (iname, i_domain) + for i_domain, dom in enumerate(self.domains) + for iname in dom.get_var_names(dim_type.set)) + + def get_home_domain_index(self, iname): + return self._get_home_domain_map()[iname] + + @memoize_method + def combine_domains(self, domains): + """ + :arg domains: domain indices of domains to be combined. More 'dominant' + domains (those which get most say on the actual dim_type of an iname) + must be later in the order. + """ + assert isinstance(domains, tuple) # for caching + + if not domains: + return isl.BasicSet.universe(isl.Space.set_alloc( + self.isl_context, 0, 0)) + + result = None + for dom_index in domains: + dom = self.domains[dom_index] + if result is None: + result = dom + else: + aligned_dom, aligned_result = isl.align_two( + dom, result, across_dim_types=True) + result = aligned_result & aligned_dom + + return result + + def get_inames_domain(self, inames): + if not inames: + return self.combine_domains(()) + + if isinstance(inames, str): + inames = frozenset([inames]) + if not isinstance(inames, frozenset): + inames = frozenset(inames) + + from warnings import warn + warn("get_inames_domain did not get a frozenset", stacklevel=2) + + return self._get_inames_domain_backend(inames) + + @memoize_method + def get_leaf_domain_indices(self, inames): + """Find the leaves of the domain tree needed to cover all inames.""" + + hdm = self._get_home_domain_map() + ppd = self.all_parents_per_domain() + + domain_indices = set() + + # map root -> leaf + root_to_leaf = {} + + for iname in inames: + home_domain_index = hdm[iname] + if home_domain_index in domain_indices: + # nothin' new + continue + + domain_parents = [home_domain_index] + ppd[home_domain_index] + current_root = domain_parents[-1] + previous_leaf = root_to_leaf.get(current_root) + + if previous_leaf is not None: + # Check that we don't branch the domain tree. + # + # Branching the domain tree is dangerous/ill-formed because + # it can introduce artificial restrictions on variables + # further up the tree. + + prev_parents = set(ppd[previous_leaf]) + if not prev_parents <= set(domain_parents): + raise CannotBranchDomainTree("iname set '%s' requires " + "branch in domain tree (when adding '%s')" + % (", ".join(inames), iname)) + else: + # We're adding a new root. That's fine. + pass + + root_to_leaf[current_root] = home_domain_index + domain_indices.update(domain_parents) + + return root_to_leaf.values() + + @memoize_method + def _get_inames_domain_backend(self, inames): + domain_indices = set() + for leaf_dom_idx in self.get_leaf_domain_indices(inames): + domain_indices.add(leaf_dom_idx) + domain_indices.update(self.all_parents_per_domain()[leaf_dom_idx]) + + return self.combine_domains(tuple(sorted(domain_indices))) + + # }}} + + # {{{ iname wrangling + + @memoize_method + def all_inames(self): + result = set() + for dom in self.domains: + result.update(dom.get_var_names(dim_type.set)) + return frozenset(result) + + @memoize_method + def all_params(self): + all_inames = self.all_inames() + + result = set() + for dom in self.domains: + result.update(set(dom.get_var_names(dim_type.param)) - all_inames) + + return frozenset(result) + + @memoize_method + def all_insn_inames(self): + """Return a mapping from instruction ids to inames inside which + they should be run. + """ + + from loopy.kernel.tools import find_all_insn_inames + return find_all_insn_inames(self) + + @memoize_method + def all_referenced_inames(self): + result = set() + for inames in self.all_insn_inames().itervalues(): + result.update(inames) + return result + + def insn_inames(self, insn): + from loopy.kernel.data import Instruction + if isinstance(insn, Instruction): + return self.all_insn_inames()[insn.id] + else: + return self.all_insn_inames()[insn] + + @memoize_method + def iname_to_insns(self): + result = dict( + (iname, set()) for iname in self.all_inames()) + for insn in self.instructions: + for iname in self.insn_inames(insn): + result[iname].add(insn.id) + + return result + + # }}} + + # {{{ read and written variables + + @memoize_method + def reader_map(self): + """ + :return: a dict that maps variable names to ids of insns that read that variable. + """ + result = {} + + admissible_vars = ( + set(arg.name for arg in self.args) + | set(self.temporary_variables.iterkeys())) + + for insn in self.instructions: + for var_name in insn.get_read_var_names() & admissible_vars: + result.setdefault(var_name, set()).add(insn.id) + + @memoize_method + def writer_map(self): + """ + :return: a dict that maps variable names to ids of insns that write to that variable. + """ + result = {} + + for insn in self.instructions: + var_name = insn.get_assignee_var_name() + var_names = [var_name] + + for var_name in var_names: + result.setdefault(var_name, set()).add(insn.id) + + return result + + @memoize_method + def get_read_variables(self): + result = set() + for insn in self.instructions: + result.update(insn.get_read_var_names()) + return result + + @memoize_method + def get_written_variables(self): + return frozenset( + insn.get_assignee_var_name() + for insn in self.instructions) + + # }}} + + # {{{ argument wrangling + + @property + @memoize_method + def arg_dict(self): + return dict((arg.name, arg) for arg in self.args) + + @property + @memoize_method + def scalar_loop_args(self): + from loopy.kernel.data import ValueArg + + if self.args is None: + return [] + else: + from pytools import flatten + loop_arg_names = list(flatten(dom.get_var_names(dim_type.param) + for dom in self.domains)) + return [arg.name for arg in self.args if isinstance(arg, ValueArg) + if arg.name in loop_arg_names] + # }}} + + # {{{ bounds finding + + @memoize_method + def get_iname_bounds(self, iname): + domain = self.get_inames_domain(frozenset([iname])) + d_var_dict = domain.get_var_dict() + + assumptions, domain = isl.align_two(self.assumptions, domain) + + dom_intersect_assumptions = assumptions & domain + + lower_bound_pw_aff = ( + self.cache_manager.dim_min( + dom_intersect_assumptions, + d_var_dict[iname][1]) + .coalesce()) + upper_bound_pw_aff = ( + self.cache_manager.dim_max( + dom_intersect_assumptions, + d_var_dict[iname][1]) + .coalesce()) + + class BoundsRecord(Record): + pass + + size = (upper_bound_pw_aff - lower_bound_pw_aff + 1) + size = size.gist(self.assumptions) + + return BoundsRecord( + lower_bound_pw_aff=lower_bound_pw_aff, + upper_bound_pw_aff=upper_bound_pw_aff, + size=size) + + def find_var_base_indices_and_shape_from_inames( + self, inames, cache_manager, context=None): + if not inames: + return [], [] + + base_indices_and_sizes = [ + cache_manager.base_index_and_length( + self.get_inames_domain(iname), iname, context) + for iname in inames] + return zip(*base_indices_and_sizes) + + @memoize_method + def get_constant_iname_length(self, iname): + from loopy.isl_helpers import static_max_of_pw_aff + from loopy.symbolic import aff_to_expr + return int(aff_to_expr(static_max_of_pw_aff( + self.get_iname_bounds(iname).size, + constants_only=True))) + + @memoize_method + def get_grid_sizes(self, ignore_auto=False): + all_inames_by_insns = set() + for insn in self.instructions: + all_inames_by_insns |= self.insn_inames(insn) + + if not all_inames_by_insns <= self.all_inames(): + raise RuntimeError("some inames collected from instructions (%s) " + "are not present in domain (%s)" + % (", ".join(sorted(all_inames_by_insns)), + ", ".join(sorted(self.all_inames())))) + + global_sizes = {} + local_sizes = {} + + from loopy.kernel.data import ( + GroupIndexTag, LocalIndexTag, + AutoLocalIndexTagBase) + + for iname in self.all_inames(): + tag = self.iname_to_tag.get(iname) + + if isinstance(tag, GroupIndexTag): + tgt_dict = global_sizes + elif isinstance(tag, LocalIndexTag): + tgt_dict = local_sizes + elif isinstance(tag, AutoLocalIndexTagBase) and not ignore_auto: + raise RuntimeError("cannot find grid sizes if automatic local index tags are " + "present") + else: + tgt_dict = None + + if tgt_dict is None: + continue + + size = self.get_iname_bounds(iname).size + + if tag.axis in tgt_dict: + size = tgt_dict[tag.axis].max(size) + + from loopy.isl_helpers import static_max_of_pw_aff + try: + # insist block size is constant + size = static_max_of_pw_aff(size, + constants_only=isinstance(tag, LocalIndexTag)) + except ValueError: + pass + + tgt_dict[tag.axis] = size + + max_dims = self.device.max_work_item_dimensions + + def to_dim_tuple(size_dict, which, forced_sizes={}): + forced_sizes = forced_sizes.copy() + + size_list = [] + sorted_axes = sorted(size_dict.iterkeys()) + + while sorted_axes or forced_sizes: + if sorted_axes: + cur_axis = sorted_axes.pop(0) + else: + cur_axis = None + + if len(size_list) in forced_sizes: + size_list.append( + forced_sizes.pop(len(size_list))) + continue + + assert cur_axis is not None + + if cur_axis > len(size_list): + raise RuntimeError("%s axis %d unused" % ( + which, len(size_list))) + + size_list.append(size_dict[cur_axis]) + + if len(size_list) > max_dims: + raise ValueError("more %s dimensions assigned than supported " + "by hardware (%d > %d)" % (which, len(size_list), max_dims)) + + return tuple(size_list) + + return (to_dim_tuple(global_sizes, "global"), + to_dim_tuple(local_sizes, "local", forced_sizes=self.local_sizes)) + + def get_grid_sizes_as_exprs(self, ignore_auto=False): + grid_size, group_size = self.get_grid_sizes(ignore_auto=ignore_auto) + + def tup_to_exprs(tup): + from loopy.symbolic import pw_aff_to_expr + return tuple(pw_aff_to_expr(i, int_ok=True) for i in tup) + + return tup_to_exprs(grid_size), tup_to_exprs(group_size) + + # }}} + + # {{{ local memory + + @memoize_method + def local_var_names(self): + return set( + tv.name + for tv in self.temporary_variables.itervalues() + if tv.is_local) + + def local_mem_use(self): + return sum(lv.nbytes for lv in self.temporary_variables.itervalues() + if lv.is_local) + + # }}} + + # {{{ pretty-printing + + def __str__(self): + lines = [] + + sep = 75*"-" + lines.append(sep) + lines.append("INAME-TO-TAG MAP:") + for iname in sorted(self.all_inames()): + line = "%s: %s" % (iname, self.iname_to_tag.get(iname)) + lines.append(line) + + lines.append(sep) + lines.append("DOMAINS:") + for dom, parents in zip(self.domains, self.all_parents_per_domain()): + lines.append(len(parents)*" " + str(dom)) + + if self.substitutions: + lines.append(sep) + lines.append("SUBSTIUTION RULES:") + for rule_name in sorted(self.substitutions.iterkeys()): + lines.append(str(self.substitutions[rule_name])) + + lines.append(sep) + lines.append("INSTRUCTIONS:") + loop_list_width = 35 + for insn in self.instructions: + loop_list = ",".join(sorted(self.insn_inames(insn))) + + options = [insn.id] + if insn.priority: + options.append("priority=%d" % insn.priority) + + if len(loop_list) > loop_list_width: + lines.append("[%s]" % loop_list) + lines.append("%s%s <- %s # %s" % ( + (loop_list_width+2)*" ", insn.assignee, + insn.expression, ", ".join(options))) + else: + lines.append("[%s]%s%s <- %s # %s" % ( + loop_list, " "*(loop_list_width-len(loop_list)), + insn.assignee, insn.expression, ", ".join(options))) + + lines.append(sep) + lines.append("DEPENDENCIES:") + for insn in self.instructions: + if insn.insn_deps: + lines.append("%s : %s" % (insn.id, ",".join(insn.insn_deps))) + lines.append(sep) + + if self.schedule is not None: + lines.append("SCHEDULE:") + from loopy.schedule import dump_schedule + lines.append(dump_schedule(self.schedule)) + lines.append(sep) + + return "\n".join(lines) + + # }}} + +# }}} + +# vim: foldmethod=marker diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py new file mode 100644 index 0000000000000000000000000000000000000000..70f7bdb42af60434cb68287a642b0690a1111044 --- /dev/null +++ b/loopy/kernel/creation.py @@ -0,0 +1,667 @@ +"""UI for kernel creation.""" + +from __future__ import division + +__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + + + +import numpy as np +from loopy.symbolic import IdentityMapper +from loopy.kernel.data import Instruction, SubstitutionRule +import islpy as isl +from islpy import dim_type + +import re + + +# {{{ unique name generation + +def generate_unique_possibilities(prefix): + yield prefix + + try_num = 0 + while True: + yield "%s_%d" % (prefix, try_num) + try_num += 1 + +class UniqueNameGenerator: + def __init__(self, existing_names): + self.existing_names = existing_names.copy() + + def is_name_conflicting(self, name): + return name in self.existing_names + + def add_name(self, name): + if self.is_name_conflicting(name): + raise ValueError("name '%s' conflicts with existing names") + self.existing_names.add(name) + + def add_names(self, names): + for name in names: + self.add_name(name) + + def __call__(self, based_on="var"): + for var_name in generate_unique_possibilities(based_on): + if not self.is_name_conflicting(var_name): + break + + self.existing_names.add(var_name) + return var_name + +_IDENTIFIER_RE = re.compile(r"\b([a-zA-Z_][a-zA-Z0-9_]*)\b") + +def _gather_identifiers(s): + return set(_IDENTIFIER_RE.findall(s)) + +class MakeUnique: + """A tag for a string that identifies a partial identifier that is to + be made unique by the UI. + """ + + def __init__(self, name): + self.name = name + +# }}} + +# {{{ domain parsing + +def parse_domains(ctx, args_and_vars, domains, defines): + result = [] + available_parameters = args_and_vars.copy() + used_inames = set() + + for dom in domains: + if isinstance(dom, str): + dom, = expand_defines(dom, defines) + + if not dom.lstrip().startswith("["): + # i.e. if no parameters are already given + ids = _gather_identifiers(dom) + parameters = ids & available_parameters + dom = "[%s] -> %s" % (",".join(parameters), dom) + + try: + dom = isl.BasicSet.read_from_str(ctx, dom) + except: + print "failed to parse domain '%s'" % dom + raise + else: + assert isinstance(dom, (isl.Set, isl.BasicSet)) + # assert dom.get_ctx() == ctx + + for i_iname in xrange(dom.dim(dim_type.set)): + iname = dom.get_dim_name(dim_type.set, i_iname) + + if iname is None: + raise RuntimeError("domain '%s' provided no iname at index " + "%d (redefined iname?)" % (dom, i_iname)) + + if iname in used_inames: + raise RuntimeError("domain '%s' redefines iname '%s' " + "that is part of a previous domain" % (dom, iname)) + + used_inames.add(iname) + available_parameters.add(iname) + + result.append(dom) + + return result + +# }}} + +# {{{ expand defines + +WORD_RE = re.compile(r"\b([a-zA-Z0-9_]+)\b") +BRACE_RE = re.compile(r"\$\{([a-zA-Z0-9_]+)\}") + +def expand_defines(insn, defines, single_valued=True): + replacements = [()] + + for find_regexp, replace_pattern in [ + (BRACE_RE, r"\$\{%s\}"), + (WORD_RE, r"\b%s\b"), + ]: + + for match in find_regexp.finditer(insn): + word = match.group(1) + + try: + value = defines[word] + except KeyError: + continue + + if isinstance(value, list): + if single_valued: + raise ValueError("multi-valued macro expansion not allowed " + "in this context (when expanding '%s')" % word) + + replacements = [ + rep+((replace_pattern % word, subval),) + for rep in replacements + for subval in value + ] + else: + replacements = [ + rep+((replace_pattern % word, value),) + for rep in replacements] + + for rep in replacements: + rep_value = insn + for pattern, val in rep: + rep_value = re.sub(pattern, str(val), rep_value) + + yield rep_value + +def expand_defines_in_expr(expr, defines): + from pymbolic.primitives import Variable + from loopy.symbolic import parse + + def subst_func(var): + if isinstance(var, Variable): + try: + var_value = defines[var.name] + except KeyError: + return None + else: + return parse(str(var_value)) + else: + return None + + from loopy.symbolic import SubstitutionMapper + return SubstitutionMapper(subst_func)(expr) + +# }}} + +# {{{ parse instructions + +INSN_RE = re.compile( + "\s*(?:\<(?P.*?)\>)?" + "\s*(?P.+?)\s*(?.+?)" + "\s*?(?:\{(?P[\s\w=,:]+)\}\s*)?$" + ) +SUBST_RE = re.compile( + r"^\s*(?P.+?)\s*:=\s*(?P.+)\s*$" + ) + +def parse_insn(insn): + insn_match = INSN_RE.match(insn) + subst_match = SUBST_RE.match(insn) + if insn_match is not None and subst_match is not None: + raise RuntimeError("instruction parse error: %s" % insn) + + if insn_match is not None: + groups = insn_match.groupdict() + elif subst_match is not None: + groups = subst_match.groupdict() + else: + raise RuntimeError("insn parse error") + + from loopy.symbolic import parse + lhs = parse(groups["lhs"]) + rhs = parse(groups["rhs"]) + + if insn_match is not None: + insn_deps = set() + insn_id = None + priority = 0 + + if groups["options"] is not None: + for option in groups["options"].split(","): + option = option.strip() + if not option: + raise RuntimeError("empty option supplied") + + equal_idx = option.find("=") + if equal_idx == -1: + opt_key = option + opt_value = None + else: + opt_key = option[:equal_idx].strip() + opt_value = option[equal_idx+1:].strip() + + if opt_key == "id": + insn_id = opt_value + elif opt_key == "priority": + priority = int(opt_value) + elif opt_key == "dep": + insn_deps = set(opt_value.split(":")) + else: + raise ValueError("unrecognized instruction option '%s'" + % opt_key) + + if groups["temp_var_type"] is not None: + if groups["temp_var_type"]: + temp_var_type = np.dtype(groups["temp_var_type"]) + else: + from loopy import infer_type + temp_var_type = infer_type + else: + temp_var_type = None + + from pymbolic.primitives import Variable, Subscript + if not isinstance(lhs, (Variable, Subscript)): + raise RuntimeError("left hand side of assignment '%s' must " + "be variable or subscript" % lhs) + + return Instruction( + id=insn_id, + insn_deps=insn_deps, + forced_iname_deps=frozenset(), + assignee=lhs, expression=rhs, + temp_var_type=temp_var_type, + priority=priority) + + elif subst_match is not None: + from pymbolic.primitives import Variable, Call + + if isinstance(lhs, Variable): + subst_name = lhs.name + arg_names = [] + elif isinstance(lhs, Call): + if not isinstance(lhs.function, Variable): + raise RuntimeError("Invalid substitution rule left-hand side") + subst_name = lhs.function.name + arg_names = [] + + for i, arg in enumerate(lhs.parameters): + if not isinstance(arg, Variable): + raise RuntimeError("Invalid substitution rule " + "left-hand side: %s--arg number %d " + "is not a variable"% (lhs, i)) + arg_names.append(arg.name) + else: + raise RuntimeError("Invalid substitution rule left-hand side") + + return SubstitutionRule( + name=subst_name, + arguments=tuple(arg_names), + expression=rhs) + +def parse_if_necessary(insn, defines): + if isinstance(insn, Instruction): + yield insn + return + elif not isinstance(insn, str): + raise TypeError("Instructions must be either an Instruction " + "instance or a parseable string. got '%s' instead." + % type(insn)) + + for insn in insn.split("\n"): + comment_start = insn.find("#") + if comment_start >= 0: + insn = insn[:comment_start] + + insn = insn.strip() + if not insn: + continue + + for sub_insn in expand_defines(insn, defines, single_valued=False): + yield parse_insn(sub_insn) + +# }}} + +# {{{ tag reduction inames as sequential + +def tag_reduction_inames_as_sequential(knl): + result = set() + + def map_reduction(red_expr, rec): + rec(red_expr.expr) + result.update(red_expr.inames) + + from loopy.symbolic import ReductionCallbackMapper + for insn in knl.instructions: + ReductionCallbackMapper(map_reduction)(insn.expression) + + from loopy.kernel.data import ParallelTag, ForceSequentialTag + + new_iname_to_tag = {} + for iname in result: + tag = knl.iname_to_tag.get(iname) + if tag is not None and isinstance(tag, ParallelTag): + raise RuntimeError("inconsistency detected: " + "reduction iname '%s' has " + "a parallel tag" % iname) + + if tag is None: + new_iname_to_tag[iname] = ForceSequentialTag() + + from loopy import tag_inames + return tag_inames(knl, new_iname_to_tag) + +# }}} + +# {{{ sanity checking + +def check_for_duplicate_names(knl): + name_to_source = {} + + def add_name(name, source): + if name in name_to_source: + raise RuntimeError("invalid %s name '%s'--name already used as " + "%s" % (source, name, name_to_source[name])) + + name_to_source[name] = source + + for name in knl.all_inames(): + add_name(name, "iname") + for arg in knl.args: + add_name(arg.name, "argument") + for name in knl.temporary_variables: + add_name(name, "temporary") + for name in knl.substitutions: + add_name(name, "substitution") + +def check_for_nonexistent_iname_deps(knl): + for insn in knl.instructions: + if not set(insn.forced_iname_deps) <= knl.all_inames(): + raise ValueError("In instruction '%s': " + "cannot force dependency on inames '%s'--" + "they don't exist" % ( + insn.id, + ",".join( + set(insn.forced_iname_deps)-knl.all_inames()))) + +def check_for_multiple_writes_to_loop_bounds(knl): + from islpy import dim_type + + domain_parameters = set() + for dom in knl.domains: + domain_parameters.update(dom.get_space().get_var_dict(dim_type.param)) + + temp_var_domain_parameters = domain_parameters & set( + knl.temporary_variables) + + wmap = knl.writer_map() + for tvpar in temp_var_domain_parameters: + par_writers = wmap[tvpar] + if len(par_writers) != 1: + raise RuntimeError("there must be exactly one write to data-dependent " + "domain parameter '%s' (found %d)" % (tvpar, len(par_writers))) + + +def check_written_variable_names(knl): + admissible_vars = ( + set(arg.name for arg in knl.args) + | set(knl.temporary_variables.iterkeys())) + + for insn in knl.instructions: + var_name = insn.get_assignee_var_name() + + if var_name not in admissible_vars: + raise RuntimeError("variable '%s' not declared or not " + "allowed for writing" % var_name) + +# }}} + +# {{{ expand common subexpressions into assignments + +class CSEToAssignmentMapper(IdentityMapper): + def __init__(self, add_assignment): + self.add_assignment = add_assignment + self.expr_to_var = {} + + def map_common_subexpression(self, expr): + try: + return self.expr_to_var[expr.child] + except KeyError: + from loopy.symbolic import TypedCSE + if isinstance(expr, TypedCSE): + dtype = expr.dtype + else: + dtype = None + + child = self.rec(expr.child) + from pymbolic.primitives import Variable + if isinstance(child, Variable): + return child + + var_name = self.add_assignment(expr.prefix, child, dtype) + var = Variable(var_name) + self.expr_to_var[expr.child] = var + return var + +def expand_cses(knl): + def add_assignment(base_name, expr, dtype): + if base_name is None: + base_name = "var" + + new_var_name = var_name_gen(base_name) + + if dtype is None: + from loopy import infer_type + dtype = infer_type + else: + dtype=np.dtype(dtype) + + from loopy.kernel import TemporaryVariable + new_temp_vars[new_var_name] = TemporaryVariable( + name=new_var_name, + dtype=dtype, + is_local=None, + shape=()) + + from pymbolic.primitives import Variable + insn = Instruction( + id=knl.make_unique_instruction_id(extra_used_ids=newly_created_insn_ids), + assignee=Variable(new_var_name), expression=expr) + newly_created_insn_ids.add(insn.id) + new_insns.append(insn) + + return new_var_name + + cseam = CSEToAssignmentMapper(add_assignment=add_assignment) + + new_insns = [] + + var_name_gen = knl.get_var_name_generator() + + newly_created_insn_ids = set() + new_temp_vars = knl.temporary_variables.copy() + + for insn in knl.instructions: + new_insns.append(insn.copy(expression=cseam(insn.expression))) + + return knl.copy( + instructions=new_insns, + temporary_variables=new_temp_vars) + +# }}} + +# {{{ temporary variable creation + +def create_temporaries(knl): + new_insns = [] + new_temp_vars = knl.temporary_variables.copy() + + for insn in knl.instructions: + from loopy.kernel.data import TemporaryVariable + + if insn.temp_var_type is not None: + assignee_name = insn.get_assignee_var_name() + + assignee_indices = [] + from pymbolic.primitives import Variable + for index_expr in insn.get_assignee_indices(): + if (not isinstance(index_expr, Variable) + or not index_expr.name in knl.all_inames()): + raise RuntimeError( + "only plain inames are allowed in " + "the lvalue index when declaring the " + "variable '%s' in an instruction" + % assignee_name) + + assignee_indices.append(index_expr.name) + + base_indices, shape = \ + knl.find_var_base_indices_and_shape_from_inames( + assignee_indices, knl.cache_manager) + + if assignee_name in new_temp_vars: + raise RuntimeError("cannot create temporary variable '%s'--" + "already exists" % assignee_name) + if assignee_name in knl.arg_dict: + raise RuntimeError("cannot create temporary variable '%s'--" + "already exists as argument" % assignee_name) + + new_temp_vars[assignee_name] = TemporaryVariable( + name=assignee_name, + dtype=insn.temp_var_type, + is_local=None, + base_indices=base_indices, + shape=shape) + + insn = insn.copy(temp_var_type=None) + + new_insns.append(insn) + + return knl.copy( + instructions=new_insns, + temporary_variables=new_temp_vars) + +# }}} + +# {{{ check for reduction iname duplication + +def check_for_reduction_inames_duplication_requests(kernel): + + # {{{ helper function + + def check_reduction_inames(reduction_expr, rec): + for iname in reduction_expr.inames: + if iname.startswith("@"): + raise RuntimeError("Reduction iname duplication with '@' is no " + "longer supported. Use loopy.duplicate_inames instead.") + + # }}} + + + from loopy.symbolic import ReductionCallbackMapper + rcm = ReductionCallbackMapper(check_reduction_inames) + for insn in kernel.instructions: + rcm(insn.expression) + + for sub_name, sub_rule in kernel.substitutions.iteritems(): + rcm(sub_rule.expression) + +# }}} + +# {{{ kernel creation top-level + +def make_kernel(device, domains, instructions, kernel_args=[], *args, **kwargs): + """User-facing kernel creation entrypoint.""" + + for forbidden_kwarg in [ + "substitutions", + "iname_slab_increments", + "applied_iname_rewrites", + "cache_manager", + "isl_context", + ]: + if forbidden_kwarg in kwargs: + raise RuntimeError("'%s' is not part of user-facing interface" + % forbidden_kwarg) + + defines = kwargs.get("defines", {}) + temporary_variables = kwargs.get("temporary_variables", {}) + + # {{{ instruction/subst parsing + + parsed_instructions = [] + kwargs["substitutions"] = substitutions = {} + + if isinstance(instructions, str): + instructions = [instructions] + for insn in instructions: + for new_insn in parse_if_necessary(insn, defines): + if isinstance(new_insn, Instruction): + parsed_instructions.append(new_insn) + elif isinstance(new_insn, SubstitutionRule): + substitutions[new_insn.name] = new_insn + else: + raise RuntimeError("unexpected type in instruction parsing") + + instructions = parsed_instructions + del parsed_instructions + + # }}} + + # Ordering dependency: + # Domain construction needs to know what temporary variables are + # available. That information can only be obtained once instructions + # are parsed. + + # {{{ parse domains + + if isinstance(domains, str): + domains = [domains] + + isl_context = None + for domain in domains: + if isinstance(domain, isl.BasicSet): + isl_context = domain.get_ctx() + if isl_context is None: + isl_context = isl.Context() + + from loopy.kernel.data import ValueArg + scalar_arg_names = set(arg.name for arg in kernel_args if isinstance(arg, ValueArg)) + var_names = ( + set(temporary_variables) + | set(insn.get_assignee_var_name() + for insn in instructions + if insn.temp_var_type is not None)) + domains = parse_domains(isl_context, scalar_arg_names | var_names, domains, + defines) + + kwargs["isl_context"] = isl_context + + # }}} + + from loopy.kernel import LoopKernel + knl = LoopKernel(device, domains, instructions, kernel_args, *args, **kwargs) + + check_for_nonexistent_iname_deps(knl) + check_for_reduction_inames_duplication_requests(knl) + + knl = tag_reduction_inames_as_sequential(knl) + knl = create_temporaries(knl) + knl = expand_cses(knl) + + # ------------------------------------------------------------------------- + # Ordering dependency: + # ------------------------------------------------------------------------- + # Must create temporaries before checking for writes to temporary variables + # that are domain parameters. + # ------------------------------------------------------------------------- + + check_for_multiple_writes_to_loop_bounds(knl) + check_for_duplicate_names(knl) + check_written_variable_names(knl) + + return knl + +# }}} + +# vim: fdm=marker diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py new file mode 100644 index 0000000000000000000000000000000000000000..80e53b17d541bbba399f452bc266e2d808d3162d --- /dev/null +++ b/loopy/kernel/data.py @@ -0,0 +1,559 @@ +"""Data used by the kernel object.""" + +from __future__ import division + +__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +from pytools import Record, memoize_method + + + + +# {{{ index tags + +class IndexTag(Record): + __slots__ = [] + + def __hash__(self): + raise RuntimeError("use .key to hash index tags") + + + + +class ParallelTag(IndexTag): + pass + +class HardwareParallelTag(ParallelTag): + pass + +class UniqueTag(IndexTag): + @property + def key(self): + return type(self) + +class AxisTag(UniqueTag): + __slots__ = ["axis"] + + def __init__(self, axis): + Record.__init__(self, + axis=axis) + + @property + def key(self): + return (type(self), self.axis) + + def __str__(self): + return "%s.%d" % ( + self.print_name, self.axis) + +class GroupIndexTag(HardwareParallelTag, AxisTag): + print_name = "g" + +class LocalIndexTagBase(HardwareParallelTag): + pass + +class LocalIndexTag(LocalIndexTagBase, AxisTag): + print_name = "l" + +class AutoLocalIndexTagBase(LocalIndexTagBase): + pass + +class AutoFitLocalIndexTag(AutoLocalIndexTagBase): + def __str__(self): + return "l.auto" + +class IlpBaseTag(ParallelTag): + pass + +class UnrolledIlpTag(IlpBaseTag): + def __str__(self): + return "ilp.unr" + +class LoopedIlpTag(IlpBaseTag): + def __str__(self): + return "ilp.seq" + +class UnrollTag(IndexTag): + def __str__(self): + return "unr" + +class ForceSequentialTag(IndexTag): + def __str__(self): + return "forceseq" + +def parse_tag(tag): + if tag is None: + return tag + + if isinstance(tag, IndexTag): + return tag + + if not isinstance(tag, str): + raise ValueError("cannot parse tag: %s" % tag) + + if tag == "for": + return None + elif tag in ["unr"]: + return UnrollTag() + elif tag in ["ilp", "ilp.unr"]: + return UnrolledIlpTag() + elif tag == "ilp.seq": + return LoopedIlpTag() + elif tag.startswith("g."): + return GroupIndexTag(int(tag[2:])) + elif tag.startswith("l."): + axis = tag[2:] + if axis == "auto": + return AutoFitLocalIndexTag() + else: + return LocalIndexTag(int(axis)) + else: + raise ValueError("cannot parse tag: %s" % tag) + +# }}} + +# {{{ arguments + +class ShapedArg(Record): + def __init__(self, name, dtype=None, shape=None, strides=None, order="C", + offset=0): + """ + All of the following are optional. Specify either strides or shape. + + :arg shape: + :arg strides: like numpy strides, but in multiples of + data type size + :arg order: + :arg offset: Offset from the beginning of the vector from which + the strides are counted. + """ + if dtype is not None: + dtype = np.dtype(dtype) + + def parse_if_necessary(x): + if isinstance(x, str): + from pymbolic import parse + return parse(x) + else: + return x + + def process_tuple(x): + x = parse_if_necessary(x) + if not isinstance(x, tuple): + x = (x,) + + return tuple(parse_if_necessary(xi) for xi in x) + + if strides is not None: + strides = process_tuple(strides) + + if shape is not None: + shape = process_tuple(shape) + + if strides is None and shape is not None: + from pyopencl.compyte.array import ( + f_contiguous_strides, + c_contiguous_strides) + + if order == "F": + strides = f_contiguous_strides(1, shape) + elif order == "C": + strides = c_contiguous_strides(1, shape) + else: + raise ValueError("invalid order: %s" % order) + + Record.__init__(self, + name=name, + dtype=dtype, + strides=strides, + offset=offset, + shape=shape) + + @property + @memoize_method + def numpy_strides(self): + return tuple(self.dtype.itemsize*s for s in self.strides) + + @property + def dimensions(self): + return len(self.shape) + +class GlobalArg(ShapedArg): + def __repr__(self): + return "" % ( + self.name, self.dtype, ",".join(str(i) for i in self.shape)) + +class ArrayArg(GlobalArg): + def __init__(self, *args, **kwargs): + from warnings import warn + warn("ArrayArg is a deprecated name of GlobalArg", DeprecationWarning, + stacklevel=2) + GlobalArg.__init__(self, *args, **kwargs) + +class ConstantArg(ShapedArg): + def __repr__(self): + return "" % ( + self.name, self.dtype, ",".join(str(i) for i in self.shape)) + +class ImageArg(Record): + def __init__(self, name, dtype=None, dimensions=None, shape=None): + dtype = np.dtype(dtype) + if shape is not None: + if dimensions is not None and dimensions != len(shape): + raise RuntimeError("cannot specify both shape and " + "disagreeing dimensions in ImageArg") + dimensions = len(shape) + else: + if not isinstance(dimensions, int): + raise RuntimeError("ImageArg: dimensions must be an integer") + + Record.__init__(self, + dimensions=dimensions, + shape=shape, + dtype=dtype, + name=name) + + + def __repr__(self): + return "" % (self.name, self.dtype) + +class ValueArg(Record): + def __init__(self, name, dtype=None, approximately=None): + if dtype is not None: + dtype = np.dtype(dtype) + + Record.__init__(self, name=name, dtype=dtype, + approximately=approximately) + + def __repr__(self): + return "" % (self.name, self.dtype) + +class ScalarArg(ValueArg): + def __init__(self, name, dtype=None, approximately=None): + from warnings import warn + warn("ScalarArg is a deprecated name of ValueArg", + DeprecationWarning, stacklevel=2) + + ValueArg.__init__(self, name, dtype, approximately) + +# }}} + +# {{{ temporary variable + +class TemporaryVariable(Record): + """ + :ivar name: + :ivar dtype: + :ivar shape: + :ivar storage_shape: + :ivar base_indices: + :ivar is_local: + """ + + def __init__(self, name, dtype, shape, is_local, base_indices=None, + storage_shape=None): + if base_indices is None: + base_indices = (0,) * len(shape) + + if shape is not None and not isinstance(shape, tuple): + shape = tuple(shape) + + Record.__init__(self, name=name, dtype=dtype, shape=shape, is_local=is_local, + base_indices=base_indices, + storage_shape=storage_shape) + + @property + def nbytes(self): + from pytools import product + return product(si for si in self.shape)*self.dtype.itemsize + +# }}} + +# {{{ subsitution rule + +class SubstitutionRule(Record): + """ + :ivar name: + :ivar arguments: + :ivar expression: + """ + + def __init__(self, name, arguments, expression): + assert isinstance(arguments, tuple) + + Record.__init__(self, + name=name, arguments=arguments, expression=expression) + + def __str__(self): + return "%s(%s) := %s" % ( + self.name, ", ".join(self.arguments), self.expression) + +# }}} + +# {{{ instruction + +class Instruction(Record): + """ + :ivar id: An (otherwise meaningless) identifier that is unique within + a :class:`LoopKernel`. + :ivar assignee: + :ivar expression: + :ivar forced_iname_deps: a set of inames that are added to the list of iname + dependencies + :ivar insn_deps: a list of ids of :class:`Instruction` instances that + *must* be executed before this one. Note that loop scheduling augments this + by adding dependencies on any writes to temporaries read by this instruction. + :ivar boostable: Whether the instruction may safely be executed + inside more loops than advertised without changing the meaning + of the program. Allowed values are *None* (for unknown), *True*, and *False*. + :ivar boostable_into: a set of inames into which the instruction + may need to be boosted, as a heuristic help for the scheduler. + :ivar priority: scheduling priority + + The following two instance variables are only used until :func:`loopy.make_kernel` is + finished: + + :ivar temp_var_type: if not None, a type that will be assigned to the new temporary variable + created from the assignee + """ + def __init__(self, + id, assignee, expression, + forced_iname_deps=frozenset(), insn_deps=set(), boostable=None, + boostable_into=None, + temp_var_type=None, priority=0): + + from loopy.symbolic import parse + if isinstance(assignee, str): + assignee = parse(assignee) + if isinstance(expression, str): + assignee = parse(expression) + + assert isinstance(forced_iname_deps, frozenset) + assert isinstance(insn_deps, set) + + Record.__init__(self, + id=id, assignee=assignee, expression=expression, + forced_iname_deps=forced_iname_deps, + insn_deps=insn_deps, boostable=boostable, + boostable_into=boostable_into, + temp_var_type=temp_var_type, + priority=priority) + + @memoize_method + def reduction_inames(self): + def map_reduction(expr, rec): + rec(expr.expr) + for iname in expr.inames: + result.add(iname) + + from loopy.symbolic import ReductionCallbackMapper + cb_mapper = ReductionCallbackMapper(map_reduction) + + result = set() + cb_mapper(self.expression) + + return result + + def __str__(self): + result = "%s: %s <- %s" % (self.id, + self.assignee, self.expression) + + if self.boostable == True: + if self.boostable_into: + result += " (boostable into '%s')" % ",".join(self.boostable_into) + else: + result += " (boostable)" + elif self.boostable == False: + result += " (not boostable)" + elif self.boostable is None: + pass + else: + raise RuntimeError("unexpected value for Instruction.boostable") + + options = [] + + if self.insn_deps: + options.append("deps="+":".join(self.insn_deps)) + if self.priority: + options.append("priority=%d" % self.priority) + + return result + + @memoize_method + def get_assignee_var_name(self): + from pymbolic.primitives import Variable, Subscript + + if isinstance(self.assignee, Variable): + var_name = self.assignee.name + elif isinstance(self.assignee, Subscript): + agg = self.assignee.aggregate + assert isinstance(agg, Variable) + var_name = agg.name + else: + raise RuntimeError("invalid lvalue '%s'" % self.assignee) + + return var_name + + @memoize_method + def get_assignee_indices(self): + from pymbolic.primitives import Variable, Subscript + + if isinstance(self.assignee, Variable): + return () + elif isinstance(self.assignee, Subscript): + result = self.assignee.index + if not isinstance(result, tuple): + result = (result,) + return result + else: + raise RuntimeError("invalid lvalue '%s'" % self.assignee) + + @memoize_method + def get_read_var_names(self): + from loopy.symbolic import get_dependencies + return get_dependencies(self.expression) + +# }}} + +# {{{ function manglers / dtype getters + +def default_function_mangler(name, arg_dtypes): + from loopy.reduction import reduction_function_mangler + + manglers = [reduction_function_mangler] + for mangler in manglers: + result = mangler(name, arg_dtypes) + if result is not None: + return result + + return None + +def opencl_function_mangler(name, arg_dtypes): + if name == "atan2" and len(arg_dtypes) == 2: + return arg_dtypes[0], name + + if len(arg_dtypes) == 1: + arg_dtype, = arg_dtypes + + if arg_dtype.kind == "c": + if arg_dtype == np.complex64: + tpname = "cfloat" + elif arg_dtype == np.complex128: + tpname = "cdouble" + else: + raise RuntimeError("unexpected complex type '%s'" % arg_dtype) + + if name in ["sqrt", "exp", "log", + "sin", "cos", "tan", + "sinh", "cosh", "tanh"]: + return arg_dtype, "%s_%s" % (tpname, name) + + if name in ["real", "imag"]: + return np.dtype(arg_dtype.type(0).real), "%s_%s" % (tpname, name) + + if name == "dot": + scalar_dtype, offset, field_name = arg_dtypes[0].fields["s0"] + return scalar_dtype, name + + return None + +def single_arg_function_mangler(name, arg_dtypes): + if len(arg_dtypes) == 1: + dtype, = arg_dtypes + return dtype, name + + return None + +def opencl_symbol_mangler(name): + # FIXME: should be more picky about exact names + if name.startswith("FLT_"): + return np.dtype(np.float32), name + elif name.startswith("DBL_"): + return np.dtype(np.float64), name + elif name.startswith("M_"): + if name.endswith("_F"): + return np.dtype(np.float32), name + else: + return np.dtype(np.float64), name + else: + return None + +# }}} + +# {{{ preamble generators + +def default_preamble_generator(seen_dtypes, seen_functions): + from loopy.reduction import reduction_preamble_generator + + for result in reduction_preamble_generator(seen_dtypes, seen_functions): + yield result + + has_double = False + has_complex = False + + for dtype in seen_dtypes: + if dtype in [np.float64, np.complex128]: + has_double = True + if dtype.kind == "c": + has_complex = True + + if has_double: + yield ("00_enable_double", """ + #pragma OPENCL EXTENSION cl_khr_fp64: enable + """) + + if has_complex: + if has_double: + yield ("10_include_complex_header", """ + #define PYOPENCL_DEFINE_CDOUBLE + + #include + """) + else: + yield ("10_include_complex_header", """ + #include + """) + + c_funcs = set(c_name for name, c_name, arg_dtypes in seen_functions) + if "int_floor_div" in c_funcs: + yield ("05_int_floor_div", """ + #define int_floor_div(a,b) \ + (( (a) - \ + ( ( (a)<0 ) != ( (b)<0 )) \ + *( (b) + ( (b)<0 ) - ( (b)>=0 ) )) \ + / (b) ) + """) + + if "int_floor_div_pos_b" in c_funcs: + yield ("05_int_floor_div_pos_b", """ + #define int_floor_div_pos_b(a,b) ( \ + ( (a) - ( ((a)<0) ? ((b)-1) : 0 ) ) / (b) \ + ) + """) + + +# }}} + +# vim: foldmethod=marker diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py new file mode 100644 index 0000000000000000000000000000000000000000..b8d170d8e9f3906b1ac0b93b56d3342d3a64e7fe --- /dev/null +++ b/loopy/kernel/tools.py @@ -0,0 +1,319 @@ +"""Operations on the kernel object.""" + +from __future__ import division + +__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + + + +import numpy as np +from pytools import Record, memoize_method +import islpy as isl +from islpy import dim_type + +import re + + + + +# {{{ add and infer argument dtypes + +def add_argument_dtypes(knl, dtype_dict): + dtype_dict = dtype_dict.copy() + new_args = [] + + for arg in knl.args: + new_dtype = dtype_dict.pop(arg.name, None) + if new_dtype is not None: + new_dtype = np.dtype(new_dtype) + if arg.dtype is not None and arg.dtype != new_dtype: + raise RuntimeError( + "argument '%s' already has a different dtype " + "(existing: %s, new: %s)" + % (arg.name, arg.dtype, new_dtype)) + arg = arg.copy(dtype=new_dtype) + + new_args.append(arg) + + knl = knl.copy(args=new_args) + + if dtype_dict: + raise RuntimeError("unused argument dtypes: %s" + % ", ".join(dtype_dict)) + + return knl.copy(args=new_args) + +def infer_argument_dtypes(knl): + new_args = [] + + writer_map = knl.writer_map() + + from loopy.codegen.expression import ( + TypeInferenceMapper, TypeInferenceFailure) + tim = TypeInferenceMapper(knl) + + for arg in knl.args: + if arg.dtype is None: + new_dtype = None + + if arg.name in knl.all_params(): + new_dtype = knl.index_dtype + else: + try: + for write_insn_id in writer_map.get(arg.name, ()): + write_insn = knl.id_to_insn[write_insn_id] + new_tim_dtype = tim(write_insn.expression) + if new_dtype is None: + new_dtype = new_tim_dtype + elif new_dtype != new_tim_dtype: + # Now we know *nothing*. + new_dtype = None + break + + except TypeInferenceFailure: + # Even one type inference failure is enough to + # make this dtype not safe to guess. Don't. + pass + + if new_dtype is not None: + arg = arg.copy(dtype=new_dtype) + + new_args.append(arg) + + return knl.copy(args=new_args) + +def get_arguments_with_incomplete_dtype(knl): + return [arg.name for arg in knl.args + if arg.dtype is None] + +# }}} + +# {{{ find_all_insn_inames fixed point iteration + +def find_all_insn_inames(kernel): + from loopy.symbolic import get_dependencies + + writer_map = kernel.writer_map() + + insn_id_to_inames = {} + insn_assignee_inames = {} + + all_read_deps = {} + all_write_deps = {} + + from loopy.subst import expand_subst + kernel = expand_subst(kernel) + + for insn in kernel.instructions: + all_read_deps[insn.id] = read_deps = get_dependencies(insn.expression) + all_write_deps[insn.id] = write_deps = get_dependencies(insn.assignee) + deps = read_deps | write_deps + + iname_deps = ( + deps & kernel.all_inames() + | insn.forced_iname_deps) + + insn_id_to_inames[insn.id] = iname_deps + insn_assignee_inames[insn.id] = write_deps & kernel.all_inames() + + temp_var_names = set(kernel.temporary_variables.iterkeys()) + + # fixed point iteration until all iname dep sets have converged + + # Why is fixed point iteration necessary here? Consider the following + # scenario: + # + # z = expr(iname) + # y = expr(z) + # x = expr(y) + # + # x clearly has a dependency on iname, but this is not found until that + # dependency has propagated all the way up. Doing this recursively is + # not guaranteed to terminate because of circular dependencies. + + while True: + did_something = False + for insn in kernel.instructions: + + # {{{ depdency-based propagation + + # For all variables that insn depends on, find the intersection + # of iname deps of all writers, and add those to insn's + # dependencies. + + for tv_name in (all_read_deps[insn.id] & temp_var_names): + implicit_inames = None + + for writer_id in writer_map[tv_name]: + writer_implicit_inames = ( + insn_id_to_inames[writer_id] + - insn_assignee_inames[writer_id]) + if implicit_inames is None: + implicit_inames = writer_implicit_inames + else: + implicit_inames = (implicit_inames + & writer_implicit_inames) + + inames_old = insn_id_to_inames[insn.id] + inames_new = (inames_old | implicit_inames) \ + - insn.reduction_inames() + insn_id_to_inames[insn.id] = inames_new + + if inames_new != inames_old: + did_something = True + + # }}} + + # {{{ domain-based propagation + + # Add all inames occurring in parameters of domains that my current + # inames refer to. + + inames_old = insn_id_to_inames[insn.id] + inames_new = set(insn_id_to_inames[insn.id]) + + for iname in inames_old: + home_domain = kernel.domains[kernel.get_home_domain_index(iname)] + + for par in home_domain.get_var_names(dim_type.param): + if par in kernel.all_inames(): + inames_new.add(par) + + if inames_new != inames_old: + did_something = True + insn_id_to_inames[insn.id] = frozenset(inames_new) + + # }}} + + if not did_something: + break + + return insn_id_to_inames + +# }}} + +# {{{ set operation cache + +class SetOperationCacheManager: + def __init__(self): + # mapping: set hash -> [(set, op, args, result)] + self.cache = {} + + def op(self, set, op_name, op, args): + hashval = hash(set) + bucket = self.cache.setdefault(hashval, []) + + for bkt_set, bkt_op, bkt_args, result in bucket: + if set.plain_is_equal(bkt_set) and op == bkt_op and args == bkt_args: + return result + + #print op, set.get_dim_name(dim_type.set, args[0]) + result = op(*args) + bucket.append((set, op_name, args, result)) + return result + + def dim_min(self, set, *args): + return self.op(set, "dim_min", set.dim_min, args) + + def dim_max(self, set, *args): + return self.op(set, "dim_max", set.dim_max, args) + + def base_index_and_length(self, set, iname, context=None): + iname_to_dim = set.space.get_var_dict() + lower_bound_pw_aff = self.dim_min(set, iname_to_dim[iname][1]) + upper_bound_pw_aff = self.dim_max(set, iname_to_dim[iname][1]) + + from loopy.isl_helpers import static_max_of_pw_aff, static_value_of_pw_aff + from loopy.symbolic import pw_aff_to_expr + + size = pw_aff_to_expr(static_max_of_pw_aff( + upper_bound_pw_aff - lower_bound_pw_aff + 1, constants_only=True, + context=context)) + base_index = pw_aff_to_expr( + static_value_of_pw_aff(lower_bound_pw_aff, constants_only=False, + context=context)) + + return base_index, size + +# }}} + +# {{{ domain change helper + +class DomainChanger: + """Helps change the domain responsible for *inames* within a kernel. + + .. note: Does not perform an in-place change! + """ + + def __init__(self, kernel, inames): + self.kernel = kernel + if inames: + ldi = kernel.get_leaf_domain_indices(inames) + if len(ldi) > 1: + raise RuntimeError("Inames '%s' require more than one leaf " + "domain, which makes the domain change that is part " + "of your current operation ambiguous." % ", ".join(inames)) + + self.leaf_domain_index, = ldi + self.domain = kernel.domains[self.leaf_domain_index] + + else: + self.domain = kernel.combine_domains(()) + self.leaf_domain_index = None + + def get_domains_with(self, replacement): + result = self.kernel.domains[:] + if self.leaf_domain_index is not None: + result[self.leaf_domain_index] = replacement + else: + result.append(replacement) + + return result + +# }}} + +# {{{ graphviz / dot export + +def get_dot_dependency_graph(kernel, iname_cluster=False, iname_edge=True): + lines = [] + for insn in kernel.instructions: + lines.append("%s [shape=\"box\"];" % insn.id) + for dep in insn.insn_deps: + lines.append("%s -> %s;" % (dep, insn.id)) + + if iname_edge: + for iname in kernel.insn_inames(insn): + lines.append("%s -> %s [style=\"dotted\"];" % (iname, insn.id)) + + if iname_cluster: + for iname in kernel.all_inames(): + lines.append("subgraph cluster_%s { label=\"%s\" %s }" % (iname, iname, + " ".join(insn.id for insn in kernel.instructions + if iname in kernel.insn_inames(insn)))) + + return "digraph loopy_deps {\n%s\n}" % "\n".join(lines) + +# }}} + +# vim: foldmethod=marker diff --git a/loopy/padding.py b/loopy/padding.py index 9fbc4b59c8d54dc1777e78e445363b3293b28be3..7998dd4ba80370c500c6a853e181a4b33332f778 100644 --- a/loopy/padding.py +++ b/loopy/padding.py @@ -79,7 +79,7 @@ def split_arg_axis(kernel, args_and_axes, count): if len(args_and_axes) != len(arg_to_rest): raise RuntimeError("cannot split multiple axes of the same variable") - from loopy.kernel import GlobalArg + from loopy.kernel.data import GlobalArg for arg_name in arg_to_rest: if not isinstance(kernel.arg_dict[arg_name], GlobalArg): raise RuntimeError("only GlobalArg axes may be split") diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 98807b3b3d39e90c41576883658635b8fee7470c..f67b25e1fe8fb5cfb545d251e39ea046d428938b 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -87,7 +87,7 @@ def infer_types_of_temporaries(kernel): # {{{ work on type inference queue - from loopy.kernel import TemporaryVariable + from loopy.kernel.data import TemporaryVariable debug = 0 @@ -137,7 +137,7 @@ def infer_types_of_temporaries(kernel): def mark_local_temporaries(kernel): new_temp_vars = {} - from loopy.kernel import LocalIndexTagBase + from loopy.kernel.data import LocalIndexTagBase writers = kernel.writer_map() @@ -246,9 +246,8 @@ def realize_reduction(kernel, insn_id_filter=None): arg_dtype = type_inf_mapper(expr.expr) - from loopy.kernel import Instruction + from loopy.kernel.data import Instruction, TemporaryVariable - from loopy.kernel import TemporaryVariable new_temporary_variables[target_var_name] = TemporaryVariable( name=target_var_name, shape=(), @@ -369,7 +368,7 @@ class ExtraInameIndexInserter(IdentityMapper): def duplicate_private_temporaries_for_ilp(kernel): wmap = kernel.writer_map() - from loopy.kernel import IlpBaseTag + from loopy.kernel.data import IlpBaseTag from loopy.symbolic import get_dependencies var_to_new_ilp_inames = {} @@ -591,7 +590,7 @@ def limit_boostability(kernel): # {{{ rank inames by stride def get_auto_axis_iname_ranking_by_stride(kernel, insn): - from loopy.kernel import ImageArg, ValueArg + from loopy.kernel.data import ImageArg, ValueArg approximate_arg_values = dict( (arg.name, arg.approximately) @@ -629,7 +628,7 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): # {{{ figure out automatic-axis inames - from loopy.kernel import AutoLocalIndexTagBase + from loopy.kernel.data import AutoLocalIndexTagBase auto_axis_inames = set( iname for iname in kernel.insn_inames(insn) @@ -690,7 +689,7 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): # {{{ assign automatic axes def assign_automatic_axes(kernel, axis=0, local_size=None): - from loopy.kernel import (AutoLocalIndexTagBase, LocalIndexTag) + from loopy.kernel.data import (AutoLocalIndexTagBase, LocalIndexTag) # Realize that at this point in time, axis lengths are already # fixed. So we compute them once and pass them to our recursive diff --git a/loopy/schedule.py b/loopy/schedule.py index dcbc1fec922dc12e577a817ad60a406b676e6089..944eef8d9d3ac752349f54061b16b4d99f21bb6e 100644 --- a/loopy/schedule.py +++ b/loopy/schedule.py @@ -72,7 +72,7 @@ def gather_schedule_subloop(schedule, start_idx): def get_barrier_needing_dependency(kernel, target, source, unordered=False): - from loopy.kernel import Instruction + from loopy.kernel.data import Instruction if not isinstance(source, Instruction): source = kernel.id_to_insn[source] if not isinstance(target, Instruction): @@ -194,7 +194,7 @@ def loop_nest_map(kernel): iname_to_insns = kernel.iname_to_insns() # examine pairs of all inames--O(n**2), I know. - from loopy.kernel import IlpBaseTag + from loopy.kernel.data import IlpBaseTag for inner_iname in all_inames: result[inner_iname] = set() for outer_iname in kernel.all_inames(): @@ -798,7 +798,7 @@ def generate_loop_schedules(kernel, loop_priority=[], debug_args={}): debug = ScheduleDebugger(**debug_args) - from loopy.kernel import IlpBaseTag, ParallelTag + from loopy.kernel.data import IlpBaseTag, ParallelTag ilp_inames = set( iname for iname in kernel.all_inames() diff --git a/loopy/subst.py b/loopy/subst.py index 31c48dddb371a2d7be318b8c5efb29b46a8f27e1..f45226cf8b0a3a2d5bbfa8c53952f9305c9826c8 100644 --- a/loopy/subst.py +++ b/loopy/subst.py @@ -147,7 +147,7 @@ def extract_subst(kernel, subst_name, template, parameters): new_expr = cbmapper(insn.expression) new_insns.append(insn.copy(expression=new_expr)) - from loopy.kernel import SubstitutionRule + from loopy.kernel.data import SubstitutionRule new_substs = { subst_name: SubstitutionRule( name=subst_name, diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 6ada11f181d9002c7eb55fe7683addcd7998d9a3..460e7b742555790ce5de153e2441e189efcd7f8f 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -420,7 +420,7 @@ class ExpandingIdentityMapper(IdentityMapper): :returns: (new_substitutions, subst_renames) """ - from loopy.kernel import SubstitutionRule + from loopy.kernel.data import SubstitutionRule orig_name_histogram = {} for key, (name, orig_name) in self.subst_rule_registry.iteritems():