diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 8f5337533fd9e96c77b56154c1848f5ac419b425..7b089772702dd615005b7b75c38437f661d12b1e 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -317,6 +317,9 @@ class DependencyMapper(DependencyMapperBase): def map_type_cast(self, expr, *args, **kwargs): return self.rec(expr.child, *args, **kwargs) + def map_literal(self, expr, *args, **kwargs): + return set() + class SubstitutionRuleExpander(IdentityMapper): def __init__(self, rules): diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index a6742c225cce42bee5f48d44662f0e515d14b556..082f6a4527323ac74fa5b1229c020238ca4b7bb4 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -121,6 +121,17 @@ class ExpressionToCExpressionMapper(IdentityMapper): def map_variable(self, expr, type_context): def postproc(x): + kind_to_c_type = { + 'i': "(int)", + 'f': "(float)", + 'd': "(double)"} + # FIXME: the second condition is for handling type_context not in + # our dict, to account for some dtype kinds which we have missed, + # like "None". Is this fine? + if type_context != self.infer_type(x).numpy_dtype.kind and ( + type_context in kind_to_c_type): + return var(kind_to_c_type[type_context])(x) + return x if expr.name in self.codegen_state.var_subst_map: @@ -697,9 +708,15 @@ class ExpressionToCExpressionMapper(IdentityMapper): elif is_zero(expr.exponent - 2): return self.rec(expr.base*expr.base, type_context) - return type(expr)( - self.rec(expr.base, type_context), - self.rec(expr.exponent, type_context)) + if type_context == 'i': + # add 0.5 and then casting to int to mimic rounding + return var("(int)")(type(expr)( + self.rec(expr.base), + self.rec(expr.exponent))+0.5) + else: + return type(expr)( + self.rec(expr.base), + self.rec(expr.exponent)) if not self.allow_complex: return base_impl(expr, type_context) diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 8a6e5284258d864d19d7f1353ec9dfaaa7d72a9b..8f153336c47d3803fd026c9bff39cc98e1f49122 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -299,6 +299,24 @@ class ExpressionToOpenCLCExpressionMapper(ExpressionToCExpressionMapper): def map_local_hw_index(self, expr, type_context): return var("lid")(expr.axis) + def map_power(self, expr, type_context): + if self.infer_type(expr.exponent).is_integral() and not ( + self.infer_type(expr.base).is_complex()): + # opencl provides special implementation for int exponents + base_kind = 'f' if self.infer_type(expr.base).is_integral() else ( + None) + if type_context == 'i': + return var("(int)")(var("pown")( + self.rec(expr.base, base_kind), + self.rec(expr.exponent))+0.5) + else: + return var("pown")( + self.rec(expr.base, base_kind), + self.rec(expr.exponent)) + + return super(ExpressionToOpenCLCExpressionMapper, self).map_power(expr, + type_context) + # }}} diff --git a/test/test_loopy.py b/test/test_loopy.py index d506258c250ddf790a7e74b9e01fe0d4967d0850..e3fe54a65cb37c44cb0cd4e719268dd96cb47f52 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -3028,6 +3028,30 @@ def test_non_integral_array_idx_raises(): print(lp.generate_code_v2(knl).device_code()) +def test_ipow(ctx_factory): + knl = lp.make_kernel( + "{[i, j]: 0<=i<=4 and 0<=j<16}", + """ + out[j] = 0 {id=init} + out[i] = a[2**i-1] {dep=init} + """, [lp.GlobalArg('a', np.float64), '...']) + + a = np.random.randn(16) + expected_out = np.zeros(16) + + knl = lp.set_options(knl, 'write_cl') + + for i in range(5): + expected_out[i] = a[2**i-1] + + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + evt, (out, ) = knl(queue, a=a) + + assert np.allclose(out, expected_out) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1])