diff --git a/doc/reference.rst b/doc/reference.rst index 3d78bee36d3e70231fd306344ba2f119529cd494..59ab3c9864d2af4f3a7b885c5c16dcc10e522a08 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -39,6 +39,7 @@ Loopy's expressions are a slight superset of the expressions supported by * duplication of reduction inames * complex-valued arithmetic * tagging of array access and substitution rule use ("$") +* ``indexof``, ``indexof_vec`` .. _types: diff --git a/loopy/expression.py b/loopy/expression.py index 3194ac571b1959dca77f6451c78a29096824ed48..94eaf4448f8489f55c3185cc3e3ec121bbb08993 100644 --- a/loopy/expression.py +++ b/loopy/expression.py @@ -184,6 +184,9 @@ class TypeInferenceMapper(CombineMapper): if isinstance(identifier, Variable): identifier = identifier.name + if identifier in ["indexof", "indexof_vec"]: + return self.kernel.index_dtype + arg_dtypes = tuple(self.rec(par) for par in expr.parameters) mangle_result = self.kernel.mangle_function(identifier, arg_dtypes) diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index 9a3c9c0cfbc79e9aba934b7a7c051665b19c19c5..92fb232e92a6e891efcef7022e0dceedcb4f692f 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -306,8 +306,8 @@ def parse_array_dim_tags(dim_tags, use_increasing_target_axes=False): raise LoopyError("may not mix C/F dim_tag specifications with " "explicit specification of layout nesting levels") else: - target_axis_to_has_explicit_nesting_level[parsed_dim_tag.target_axis] = \ - has_explicit_nesting_level + target_axis_to_has_explicit_nesting_level[ + parsed_dim_tag.target_axis] = has_explicit_nesting_level # }}} diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 71844cfbf057c4533669dfbdce71e13f5e0a1ca3..112e7e5b95b3354092a6ced076a9a03140c87105 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -35,6 +35,7 @@ import islpy as isl from loopy.expression import dtype_to_type_context, TypeInferenceMapper +from loopy.diagnostic import LoopyError from loopy.tools import is_integer @@ -74,6 +75,22 @@ class LoopyCCodeMapper(RecursiveMapper): else: return s + def find_array(self, expr): + if expr.aggregate.name in self.kernel.arg_dict: + ary = self.kernel.arg_dict[expr.aggregate.name] + elif expr.aggregate.name in self.kernel.temporary_variables: + ary = self.kernel.temporary_variables[expr.aggregate.name] + else: + raise RuntimeError("nothing known about subscripted variable '%s'" + % expr.aggregate.name) + + from loopy.kernel.array import ArrayBase + if not isinstance(ary, ArrayBase): + raise RuntimeError("subscripted variable '%s' is not an array" + % expr.aggregate.name) + + return ary + def rec(self, expr, prec, type_context=None, needed_dtype=None): if needed_dtype is None: return RecursiveMapper.rec(self, expr, prec, type_context) @@ -150,18 +167,7 @@ class LoopyCCodeMapper(RecursiveMapper): if not isinstance(expr.aggregate, Variable): return base_impl(expr, enclosing_prec, type_context) - if expr.aggregate.name in self.kernel.arg_dict: - ary = self.kernel.arg_dict[expr.aggregate.name] - elif expr.aggregate.name in self.kernel.temporary_variables: - ary = self.kernel.temporary_variables[expr.aggregate.name] - else: - raise RuntimeError("nothing known about subscripted variable '%s'" - % expr.aggregate.name) - - from loopy.kernel.array import ArrayBase - if not isinstance(ary, ArrayBase): - raise RuntimeError("subscripted variable '%s' is not an array" - % expr.aggregate.name) + ary = self.find_array(expr) from loopy.kernel.array import get_access_info from pymbolic import evaluate @@ -367,11 +373,54 @@ class LoopyCCodeMapper(RecursiveMapper): "for constant '%s'" % expr) def map_call(self, expr, enclosing_prec, type_context): - from pymbolic.primitives import Variable + from pymbolic.primitives import Variable, Subscript from pymbolic.mapper.stringifier import PREC_NONE identifier = expr.function + # {{{ implement indexof, indexof_vec + + if identifier.name in ["indexof", "indexof_vec"]: + if len(expr.parameters) != 1: + raise LoopyError("%s takes exactly one argument" % identifier.name) + arg, = expr.parameters + if not isinstance(arg, Subscript): + raise LoopyError( + "argument to %s must be a subscript" % identifier.name) + + ary = self.find_array(arg) + + from loopy.kernel.array import get_access_info + from pymbolic import evaluate + access_info = get_access_info(self.kernel.target, ary, arg.index, + lambda expr: evaluate(expr, self.codegen_state.var_subst_map), + self.codegen_state.vectorization_info) + + from loopy.kernel.data import ImageArg + if isinstance(ary, ImageArg): + raise LoopyError("%s does not support images" % identifier.name) + + if identifier.name == "indexof": + return access_info.subscripts[0] + elif identifier.name == "indexof_vec": + from loopy.kernel.array import VectorArrayDimTag + ivec = None + for iaxis, dim_tag in enumerate(ary.dim_tags): + if isinstance(dim_tag, VectorArrayDimTag): + ivec = iaxis + + if ivec is None: + return access_info.subscripts[0] + else: + return ( + access_info.subscripts[0]*ary.shape[ivec] + + access_info.vector_index) + + else: + raise RuntimeError("should not get here") + + # }}} + c_name = None if isinstance(identifier, Variable): identifier = identifier.name diff --git a/test/test_loopy.py b/test/test_loopy.py index d7d7dc5768f163a84306ef234af08cf620c4066c..c8072108032e82517773709f5f5fd257928d3bd9 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2191,6 +2191,43 @@ def test_variable_size_temporary(): lp.generate_code(k) +def test_indexof(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + knl = lp.make_kernel( + ''' { [i,j]: 0<=i,j<5 } ''', + ''' out[i,j] = indexof(out[i,j])''') + + knl = lp.set_options(knl, write_cl=True) + + (evt, (out,)) = knl(queue) + out = out.get() + + assert np.array_equal(out.ravel(order="C"), np.arange(25)) + + +def test_indexof_vec(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + if ctx.devices[0].platform.name.startswith("Portable"): + # Accurate as of 2015-10-08 + pytest.skip("POCL miscompiles vector code") + + knl = lp.make_kernel( + ''' { [i,j,k]: 0<=i,j,k<4 } ''', + ''' out[i,j,k] = indexof_vec(out[i,j,k])''') + + knl = lp.tag_inames(knl, {"i": "vec"}) + knl = lp.tag_data_axes(knl, "out", "vec,c,c") + knl = lp.set_options(knl, write_cl=True) + + (evt, (out,)) = knl(queue) + #out = out.get() + #assert np.array_equal(out.ravel(order="C"), np.arange(25)) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1])