diff --git a/loopy/codegen/expression.py b/loopy/codegen/expression.py index 821b9dfd9c4232e8e102fd90a469b7e1b6f33171..1406ad995d6a4a49d5c0b877996743c8874650a8 100644 --- a/loopy/codegen/expression.py +++ b/loopy/codegen/expression.py @@ -34,6 +34,7 @@ from pymbolic.mapper import CombineMapper import islpy as isl import pyopencl as cl import pyopencl.array +from pytools import memoize_method # {{{ type inference @@ -84,16 +85,21 @@ class TypeInferenceMapper(CombineMapper): raise TypeInferenceFailure("integer constant '%s' too large" % expr) dt = np.asarray(expr).dtype - if dt.kind == "f": - # deduce the smaller type by default - return np.dtype(np.float32) + if hasattr(expr, "dtype"): + return expr.dtype + elif isinstance(expr, np.number): + # Numpy types are sized + return np.dtype(type(expr)) elif dt.kind == "f": # deduce the smaller type by default - return np.dtype(np.complex64) - elif hasattr(expr, "dtype"): - return expr.dtype + return np.dtype(np.float32) + elif dt.kind == "c": + # Codegen for complex types depends on exactly correct types. + # Refuse temptation to guess. + raise TypeInferenceFailure("Complex constant '%s' needs to " + "be sized for type inference " % expr) else: - raise TypeInferenceFailure("cannot deduce type of constant '%s'" % expr) + raise TypeInferenceFailure("Cannot deduce type of constant '%s'" % expr) def map_subscript(self, expr): return self.rec(expr.aggregate) @@ -157,17 +163,17 @@ class TypeInferenceMapper(CombineMapper): def map_reduction(self, expr): return expr.operation.result_dtype(self.rec(expr.expr), expr.inames) -# }}} + # {{{ use caching -def perform_cast(ccm, expr, expr_dtype, target_dtype): - # detect widen-to-complex, account for it. - if (ccm.allow_complex - and target_dtype.kind == "c" - and expr_dtype.kind != "c"): - from pymbolic import var - expr = var("%s_fromreal" % ccm.complex_type_name(target_dtype))(expr) + @memoize_method + def __call__(self, expr): + return CombineMapper.__call__(self, expr) - return expr + rec = __call__ + + # }}} + +# }}} # {{{ C code mapper @@ -244,9 +250,10 @@ class LoopyCCodeMapper(RecursiveMapper): self.seen_dtypes.add(result) return result - def join_rec(self, joiner, iterable, prec, type_context): + def join_rec(self, joiner, iterable, prec, type_context, needed_dtype=None): f = joiner.join("%s" for i in iterable) - return f % tuple(self.rec(i, prec, type_context) for i in iterable) + return f % tuple( + self.rec(i, prec, type_context, needed_dtype) for i in iterable) def parenthesize_if_needed(self, s, enclosing_prec, my_prec): if enclosing_prec > my_prec: @@ -254,6 +261,24 @@ class LoopyCCodeMapper(RecursiveMapper): else: return s + def rec(self, expr, prec, type_context, needed_dtype=None): + if needed_dtype is None: + return RecursiveMapper.rec(self, expr, prec, type_context) + + actual_type = self.infer_type(expr) + + if (actual_type.kind == "c" and needed_dtype.kind == "c" + and actual_type != needed_dtype): + result = RecursiveMapper.rec(self, expr, PREC_NONE, type_context) + return "%s_cast(%s)" % (self.complex_type_name(needed_dtype), result) + elif actual_type.kind != "c" and needed_dtype.kind == "c": + result = RecursiveMapper.rec(self, expr, PREC_NONE, type_context) + return "%s_fromreal(%s)" % (self.complex_type_name(needed_dtype), result) + else: + return RecursiveMapper.rec(self, expr, prec, type_context) + + __call__ = rec + # }}} def map_common_subexpression(self, expr, prec, type_context): @@ -473,10 +498,14 @@ class LoopyCCodeMapper(RecursiveMapper): enclosing_prec, PREC_COMPARISON) def map_constant(self, expr, enclosing_prec, type_context): - if isinstance(expr, complex): - cast_type = "cdouble_t" - if type_context == "f": + if isinstance(expr, (complex, np.complexfloating)): + if expr.dtype == np.complex128: + cast_type = "cdouble_t" + elif expr.dtype == np.complex64: cast_type = "cfloat_t" + else: + raise RuntimeError("unsupported complex type in expression " + "generation: %s" % type(expr)) return "(%s) (%s, %s)" % (cast_type, repr(expr.real), repr(expr.imag)) else: @@ -516,9 +545,8 @@ class LoopyCCodeMapper(RecursiveMapper): result_dtype, c_name, arg_tgt_dtypes = mangle_result str_parameters = [ - self.rec( - perform_cast(self, par, par_dtype, tgt_dtype), - PREC_NONE, dtype_to_type_context(tgt_dtype)) + self.rec(par, PREC_NONE, dtype_to_type_context(tgt_dtype), + tgt_dtype) for par, par_dtype, tgt_dtype in zip( expr.parameters, par_dtypes, arg_tgt_dtypes)] else: @@ -573,7 +601,7 @@ class LoopyCCodeMapper(RecursiveMapper): if 'c' == self.infer_type(child).kind] real_sum = self.join_rec(" + ", reals, PREC_SUM, type_context) - complex_sum = self.join_rec(" + ", complexes, PREC_SUM, type_context) + complex_sum = self.join_rec(" + ", complexes, PREC_SUM, type_context, tgt_dtype) if real_sum: result = "%s_fromreal(%s) + %s" % (tgt_name, real_sum, complex_sum) @@ -606,18 +634,19 @@ class LoopyCCodeMapper(RecursiveMapper): complexes = [child for child in expr.children if 'c' == self.infer_type(child).kind] - real_prd = self.join_rec("*", reals, PREC_PRODUCT, type_context) + real_prd = self.join_rec(" * ", reals, PREC_PRODUCT, + type_context) if len(complexes) == 1: myprec = PREC_PRODUCT else: myprec = PREC_NONE - complex_prd = self.rec(complexes[0], myprec, type_context) + complex_prd = self.rec(complexes[0], myprec, type_context, tgt_dtype) for child in complexes[1:]: complex_prd = "%s_mul(%s, %s)" % ( tgt_name, complex_prd, - self.rec(child, PREC_NONE, type_context)) + self.rec(child, PREC_NONE, type_context, tgt_dtype)) if real_prd: # elementwise semantics are correct @@ -628,12 +657,13 @@ class LoopyCCodeMapper(RecursiveMapper): return self.parenthesize_if_needed(result, enclosing_prec, PREC_PRODUCT) def map_quotient(self, expr, enclosing_prec, type_context): - def base_impl(expr, enclosing_prec, type_context): + def base_impl(expr, enclosing_prec, type_context, num_tgt_dtype=None): return self.parenthesize_if_needed( "%s / %s" % ( - # space is necessary--otherwise '/*' becomes + # Space is necessary--otherwise '/*' + # (i.e. divide-dererference) becomes # start-of-comment in C. - self.rec(expr.numerator, PREC_PRODUCT, type_context), + self.rec(expr.numerator, PREC_PRODUCT, type_context, num_tgt_dtype), # analogous to ^{-1} self.rec(expr.denominator, PREC_POWER, type_context)), enclosing_prec, PREC_PRODUCT) @@ -650,17 +680,18 @@ class LoopyCCodeMapper(RecursiveMapper): return base_impl(expr, enclosing_prec, type_context) elif n_complex and not d_complex: # elementwise semantics are correct - return base_impl(expr, enclosing_prec, type_context) + return base_impl(expr, enclosing_prec, type_context, + num_tgt_dtype=tgt_dtype) elif not n_complex and d_complex: return "%s_rdivide(%s, %s)" % ( self.complex_type_name(tgt_dtype), self.rec(expr.numerator, PREC_NONE, type_context), - self.rec(expr.denominator, PREC_NONE, type_context)) + self.rec(expr.denominator, PREC_NONE, type_context, tgt_dtype)) else: return "%s_divide(%s, %s)" % ( self.complex_type_name(tgt_dtype), - self.rec(expr.numerator, PREC_NONE, type_context), - self.rec(expr.denominator, PREC_NONE, type_context)) + self.rec(expr.numerator, PREC_NONE, type_context, tgt_dtype), + self.rec(expr.denominator, PREC_NONE, type_context, tgt_dtype)) def map_remainder(self, expr, enclosing_prec, type_context): tgt_dtype = self.infer_type(expr) @@ -704,22 +735,18 @@ class LoopyCCodeMapper(RecursiveMapper): if b_complex and not e_complex: return "%s_powr(%s, %s)" % ( self.complex_type_name(tgt_dtype), - self.rec(expr.base, PREC_NONE, type_context), + self.rec(expr.base, PREC_NONE, type_context, tgt_dtype), self.rec(expr.exponent, PREC_NONE, type_context)) else: return "%s_pow(%s, %s)" % ( self.complex_type_name(tgt_dtype), - self.rec(expr.base, PREC_NONE, type_context), - self.rec(expr.exponent, PREC_NONE, type_context)) + self.rec(expr.base, PREC_NONE, type_context, tgt_dtype), + self.rec(expr.exponent, PREC_NONE, type_context, tgt_dtype)) return base_impl(self, expr, enclosing_prec, type_context) # }}} - def __call__(self, expr, type_context, prec=PREC_NONE): - from pymbolic.mapper import RecursiveMapper - return RecursiveMapper.__call__(self, expr, prec, type_context) - # }}} # vim: fdm=marker diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index fe808ec364f6ed376cda507e4d3cfa255999c351..0858a67d933c63e05582a02efea9741e5c792f1e 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -60,19 +60,14 @@ def generate_instruction_code(kernel, insn, codegen_state): expr = insn.expression - from loopy.codegen.expression import perform_cast target_dtype = kernel.get_var_descriptor(insn.get_assignee_var_name()).dtype - expr_dtype = ccm.infer_type(expr) - - expr = perform_cast(ccm, expr, - expr_dtype=expr_dtype, - target_dtype=target_dtype) from cgen import Assign from loopy.codegen.expression import dtype_to_type_context insn_code = Assign( ccm(insn.assignee, prec=None, type_context=None), - ccm(expr, prec=None, type_context=dtype_to_type_context(target_dtype))) + ccm(expr, prec=None, type_context=dtype_to_type_context(target_dtype), + needed_dtype=target_dtype)) insn_inames = kernel.insn_inames(insn) insn_code, impl_domain = wrap_in_bounds_checks( diff --git a/test/test_loopy.py b/test/test_loopy.py index 37c9633263713667d640020d09175391e4531dca..e3d2880befba9382288105923271267982344b48 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -344,7 +344,11 @@ def make_random_value(): elif v == 1: return uniform(-10, 10) else: - return uniform(-10, 10) + 1j*uniform(-10, 10) + cval = uniform(-10, 10) + 1j*uniform(-10, 10) + if randrange(0, 2) == 0: + return np.complex128(cval) + else: + return np.complex128(cval) @@ -371,9 +375,15 @@ def make_random_expression(var_values, size): var_values[var_name] = make_random_value() return p.Variable(var_name) elif v < 1250: - return make_random_expression(var_values, size) - make_random_expression(var_values, size) + # Cannot use '-' because that destroys numpy constants. + return p.Sum(( + make_random_expression(var_values, size), + - make_random_expression(var_values, size))) elif v < 1500: - return make_random_expression(var_values, size) / make_random_expression(var_values, size) + # Cannot use '/' because that destroys numpy constants. + return p.Quotient( + make_random_expression(var_values, size), + make_random_expression(var_values, size)) def generate_random_fuzz_examples(count): @@ -388,13 +398,13 @@ def test_fuzz_code_generator(ctx_factory): queue = cl.CommandQueue(ctx) #from expr_fuzz import get_fuzz_examples - for expr, var_values in generate_random_fuzz_examples(20): + for expr, var_values in generate_random_fuzz_examples(50): #for expr, var_values in get_fuzz_examples(): from pymbolic import evaluate true_value = evaluate(expr, var_values) def get_dtype(x): - if isinstance(x, complex): + if isinstance(x, (complex, np.complexfloating)): return np.complex128 else: return np.float64