diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst index 8674434084077ba1f791c46123a083346715209e..9138d9a41d7b33db956fd8aba55c0b3b788db064 100644 --- a/doc/ref_kernel.rst +++ b/doc/ref_kernel.rst @@ -5,8 +5,8 @@ Reference: Loopy's Model of a Kernel .. _domain-tree: -Loop Domain Tree ----------------- +Loop Domain Forest +------------------ .. {{{ @@ -29,10 +29,29 @@ Note that *n* in the example is not an iname. It is a :ref:`domain-parameters` that is passed to the kernel by the user. To accommodate some data-dependent control flow, there is not actually -a single loop domain, but rather a *tree of loop domains*, -allowing more deeply nested domains to depend on inames +a single loop domain, but rather a *forest of loop domains* (a collection +of trees) allowing more deeply nested domains to depend on inames introduced by domains closer to the root. +Here is an example:: + + { [l] : 0 <= l <= 2 } + { [i] : start <= i < end } + { [j] : start <= j < end } + +The i and j domains are "children" of the l domain (visible from indentation). +This is also how :mod:`loopy` prints the domain forest, to make the parent/child +relationship visible. In the example, the parameters start/end might be read +inside of the 'l' loop. + +The idea is that domains form a forest (a collection of trees), and a +"sub-forest" is extracted that covers all the inames for each +instruction. Each individual sub-tree is then checked for branching, +which is ill-formed. It is declared ill-formed because intersecting, in +the above case, the l, i, and j domains could result in restrictions from the +i domain affecting the j domain by way of how i affects l--which would +be counterintuitive to say the least.) + .. _inames: Inames diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index e8c846fbc491b7049d7820e3ef14d9ed8071ded3..e5305b703a4f03adcc886b03dece75c9273c4ca2 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -615,8 +615,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): # nothin' new continue - domain_parents = [home_domain_index] + ppd[home_domain_index] - current_root = domain_parents[-1] + domain_path_to_root = [home_domain_index] + ppd[home_domain_index] + current_root = domain_path_to_root[-1] previous_leaf = root_to_leaf.get(current_root) if previous_leaf is not None: @@ -626,8 +626,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): # it can introduce artificial restrictions on variables # further up the tree. - prev_parents = set(ppd[previous_leaf]) - if not prev_parents <= set(domain_parents): + prev_path_to_root = set([previous_leaf] + ppd[previous_leaf]) + if not prev_path_to_root <= set(domain_path_to_root): raise CannotBranchDomainTree("iname set '%s' requires " "branch in domain tree (when adding '%s')" % (", ".join(inames), iname)) @@ -636,7 +636,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): pass root_to_leaf[current_root] = home_domain_index - domain_indices.update(domain_parents) + domain_indices.update(domain_path_to_root) return list(root_to_leaf.values()) diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 467bc8ee801a090641c7e7f8d7f2e7c12a921232..8f371085e0f1655651397c16873f10a95a799f79 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -363,14 +363,26 @@ class PyOpenCLTarget(OpenCLTarget): raise NotImplementedError("atomics flavor: %s" % self.atomics_flavor) def is_vector_dtype(self, dtype): - from pyopencl.array import vec + try: + import pyopencl.cltypes as cltypes + vec_types = cltypes.vec_types + except ImportError: + from pyopencl.array import vec + vec_types = vec.types + return (isinstance(dtype, NumpyType) - and dtype.numpy_dtype in list(vec.types.values())) + and dtype.numpy_dtype in list(vec_types.values())) def vector_dtype(self, base, count): - from pyopencl.array import vec + try: + import pyopencl.cltypes as cltypes + vec_types = cltypes.vec_types + except ImportError: + from pyopencl.array import vec + vec_types = vec.types + return NumpyType( - vec.types[base.numpy_dtype, count], + vec_types[base.numpy_dtype, count], target=self) def alignment_requirement(self, type_decl): diff --git a/loopy/type_inference.py b/loopy/type_inference.py index b8b0cbcbf1236cdf712da998922ac238261a3e6e..78d817ce73724d90a6cc6f380b24290971f6c1e7 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -332,7 +332,20 @@ class TypeInferenceMapper(CombineMapper): if not agg_result: return agg_result - field = agg_result[0].numpy_dtype.fields[expr.name] + numpy_dtype = agg_result[0].numpy_dtype + fields = numpy_dtype.fields + if fields is None: + raise LoopyError("cannot look up attribute '%s' in " + "non-aggregate expression '%s'" + % (expr.aggregate, expr.name)) + + try: + field = fields[expr.name] + except KeyError: + raise LoopyError("cannot look up attribute '%s' in " + "aggregate expression '%s' of dtype '%s'" + % (expr.aggregate, expr.name, numpy_dtype)) + dtype = field[0] return [NumpyType(dtype)]