diff --git a/MEMO b/MEMO index 34286cc941f21483746265df716c3438301fa6b8..b03133efe6e622f6fb09d0cea043d2e25469fc6d 100644 --- a/MEMO +++ b/MEMO @@ -65,7 +65,6 @@ To-do - Scalar insn priority -- What to do about constants in codegen? (...f suffix, complex types) - If finding a maximum proves troublesome, move parameters into the domain @@ -123,6 +122,9 @@ Future ideas Dealt with ^^^^^^^^^^ +- What to do about constants in codegen? (...f suffix, complex types) + -> dealt with by type contexts + - relating to Multi-Domain - Make sure that variables that enter into loop bounds are only written exactly once. [DONE] diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 88a64e7b270f4c1077f645710d5ba9a8111f4357..0237295b2ec60c59d010a3cc23f213c25886ea10 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -173,7 +173,7 @@ def wrap_in_for_from_constraints(ccm, iname, constraint_bset, stmt): from pymbolic import var rhs += iname_coeff*var(iname) end_conds.append("%s >= 0" % - ccm(cfm(rhs))) + ccm(cfm(rhs), 'i')) else: # iname_coeff > 0 kind, bound = solve_constraint_for_bound(cns, iname) assert kind == ">=" @@ -205,7 +205,7 @@ def wrap_in_for_from_constraints(ccm, iname, constraint_bset, stmt): from cgen import For from loopy.codegen import wrap_in return wrap_in(For, - "int %s = %s" % (iname, ccm(start_expr)), + "int %s = %s" % (iname, ccm(start_expr, 'i')), " && ".join(end_conds), "++%s" % iname, stmt) diff --git a/loopy/codegen/expression.py b/loopy/codegen/expression.py index 22f9624d15fffb28ea109e7f64caca3753cc993d..e5001e95fb06b4ab84e2c0d461bb0d2773c666d4 100644 --- a/loopy/codegen/expression.py +++ b/loopy/codegen/expression.py @@ -2,8 +2,9 @@ from __future__ import division import numpy as np -from pymbolic.mapper.c_code import CCodeMapper as CCodeMapper -from pymbolic.mapper.stringifier import PREC_NONE +from pymbolic.mapper import RecursiveMapper +from pymbolic.mapper.stringifier import (PREC_NONE, PREC_CALL, PREC_PRODUCT, + PREC_POWER) from pymbolic.mapper import CombineMapper # {{{ type inference @@ -57,7 +58,7 @@ class TypeInferenceMapper(CombineMapper): if isinstance(identifier, Variable): identifier = identifier.name - arg_dtypes = tuple(self.rec(par) for par in expr.parameters) + arg_dtypes = tuple(self.rec(par, None) for par in expr.parameters) mangle_result = self.kernel.mangle_function(identifier, arg_dtypes) if mangle_result is not None: @@ -118,7 +119,25 @@ def perform_cast(ccm, expr, expr_dtype, target_dtype): # {{{ C code mapper -class LoopyCCodeMapper(CCodeMapper): +# type_context may be: +# - 'i' for integer - +# - 'f' for single-precision floating point +# - 'd' for double-precision floating point +# or None for 'no known context'. + +def dtype_to_type_context(dtype): + dtype = np.dtype(dtype) + + if dtype.kind == 'i': + return 'i' + if dtype in [np.float64, np.complex128]: + return 'd' + if dtype in [np.float32, np.complex64]: + return 'f' + return None + + +class LoopyCCodeMapper(RecursiveMapper): def __init__(self, kernel, seen_dtypes, seen_functions, var_subst_map={}, with_annotation=False, allow_complex=False): """ @@ -127,7 +146,6 @@ class LoopyCCodeMapper(CCodeMapper): functions that were encountered. """ - CCodeMapper.__init__(self) self.kernel = kernel self.seen_dtypes = seen_dtypes self.seen_functions = seen_functions @@ -138,6 +156,8 @@ class LoopyCCodeMapper(CCodeMapper): self.with_annotation = with_annotation self.var_subst_map = var_subst_map.copy() + # {{{ copy helpers + def copy(self, var_subst_map=None): if var_subst_map is None: var_subst_map = self.var_subst_map @@ -146,11 +166,6 @@ class LoopyCCodeMapper(CCodeMapper): with_annotation=self.with_annotation, allow_complex=self.allow_complex) - def infer_type(self, expr): - result = self.type_inf_mapper(expr) - self.seen_dtypes.add(result) - return result - def copy_and_assign(self, name, value): """Make a copy of self with variable *name* fixed to *value*.""" var_subst_map = self.var_subst_map.copy() @@ -164,18 +179,41 @@ class LoopyCCodeMapper(CCodeMapper): var_subst_map.update(assignments) return self.copy(var_subst_map=var_subst_map) - def map_common_subexpression(self, expr, prec): + # }}} + + # {{{ helpers + + def infer_type(self, expr): + result = self.type_inf_mapper(expr) + self.seen_dtypes.add(result) + return result + + def join_rec(self, joiner, iterable, prec, type_context): + f = joiner.join("%s" for i in iterable) + return f % tuple(self.rec(i, prec, type_context) for i in iterable) + + def parenthesize_if_needed(self, s, enclosing_prec, my_prec): + if enclosing_prec > my_prec: + return "(%s)" % s + else: + return s + + # }}} + + def map_common_subexpression(self, expr, prec, type_context): raise RuntimeError("common subexpression should have been eliminated upon " "entry to loopy") - def map_variable(self, expr, prec): + def map_variable(self, expr, enclosing_prec, type_context): if expr.name in self.var_subst_map: if self.with_annotation: return " /* %s */ %s" % ( expr.name, - self.rec(self.var_subst_map[expr.name], prec)) + self.rec(self.var_subst_map[expr.name], + enclosing_prec, type_context)) else: - return str(self.rec(self.var_subst_map[expr.name], prec)) + return str(self.rec(self.var_subst_map[expr.name], + enclosing_prec, type_context)) elif expr.name in self.kernel.arg_dict: arg = self.kernel.arg_dict[expr.name] from loopy.kernel import _ShapedArg @@ -188,15 +226,22 @@ class LoopyCCodeMapper(CCodeMapper): _, c_name = result return c_name - return CCodeMapper.map_variable(self, expr, prec) + return expr.name - def map_tagged_variable(self, expr, enclosing_prec): + def map_tagged_variable(self, expr, enclosing_prec, type_context): return expr.name - def map_subscript(self, expr, enclosing_prec): + def map_subscript(self, expr, enclosing_prec, type_context): + def base_impl(expr, enclosing_prec, type_context): + return self.parenthesize_if_needed( + "%s[%s]" % ( + self.rec(expr.aggregate, PREC_CALL, type_context), + self.rec(expr.index, PREC_NONE, 'i')), + enclosing_prec, PREC_CALL) + from pymbolic.primitives import Variable if not isinstance(expr.aggregate, Variable): - return CCodeMapper.map_subscript(self, expr, enclosing_prec) + return base_impl(expr, enclosing_prec, type_context) if expr.aggregate.name in self.kernel.arg_dict: arg = self.kernel.arg_dict[expr.aggregate.name] @@ -207,7 +252,7 @@ class LoopyCCodeMapper(CCodeMapper): base_access = ("read_imagef(%s, loopy_sampler, (float%d)(%s))" % (arg.name, arg.dimensions, - ", ".join(self.rec(idx, PREC_NONE) + ", ".join(self.rec(idx, PREC_NONE, 'i') for idx in expr.index[::-1]))) if arg.dtype == np.float32: @@ -239,10 +284,11 @@ class LoopyCCodeMapper(CCodeMapper): return "*" + expr.aggregate.name from pymbolic.primitives import Subscript - return CCodeMapper.map_subscript(self, + return base_impl( Subscript(expr.aggregate, arg.offset+sum( stride*expr_i for stride, expr_i in zip( - ary_strides, index_expr))), enclosing_prec) + ary_strides, index_expr))), + enclosing_prec, type_context) elif expr.aggregate.name in self.kernel.temporary_variables: @@ -252,53 +298,68 @@ class LoopyCCodeMapper(CCodeMapper): else: index = (expr.index,) - return (temp_var.name + "".join("[%s]" % self.rec(idx, PREC_NONE) + return (temp_var.name + "".join("[%s]" % self.rec(idx, PREC_NONE, 'i') for idx in index)) else: raise RuntimeError("nothing known about variable '%s'" % expr.aggregate.name) - def map_floor_div(self, expr, prec): + def map_floor_div(self, expr, enclosing_prec, type_context): from loopy.isl_helpers import is_nonnegative num_nonneg = is_nonnegative(expr.numerator, self.kernel.domain) den_nonneg = is_nonnegative(expr.denominator, self.kernel.domain) if den_nonneg: if num_nonneg: - return CCodeMapper.map_floor_div(self, expr, prec) + return self.parenthesize_if_needed( + "%s // %s" % ( + self.rec(expr.numerator, PREC_PRODUCT, type_context), + # analogous to ^{-1} + self.rec(expr.denominator, PREC_POWER, type_context)), + enclosing_prec, PREC_PRODUCT) else: return ("int_floor_div_pos_b(%s, %s)" - % (self.rec(expr.numerator, PREC_NONE), - expr.denominator)) + % (self.rec(expr.numerator, PREC_NONE, 'i'), + self.rec(expr.denominator, PREC_NONE, 'i'))) else: return ("int_floor_div(%s, %s)" - % (self.rec(expr.numerator, PREC_NONE), - self.rec(expr.denominator, PREC_NONE))) + % (self.rec(expr.numerator, PREC_NONE, 'i'), + self.rec(expr.denominator, PREC_NONE, 'i'))) - def map_min(self, expr, prec): + def map_min(self, expr, prec, type_context): what = type(expr).__name__.lower() children = expr.children[:] - result = self.rec(children.pop(), PREC_NONE) + result = self.rec(children.pop(), PREC_NONE, type_context) while children: result = "%s(%s, %s)" % (what, - self.rec(children.pop(), PREC_NONE), + self.rec(children.pop(), PREC_NONE, type_context), result) return result map_max = map_min - def map_constant(self, expr, enclosing_prec): + def map_constant(self, expr, enclosing_prec, type_context): if isinstance(expr, complex): - # FIXME: type-variable - return "(cdouble_t) (%s, %s)" % (repr(expr.real), repr(expr.imag)) + cast_type = "cdouble_t" + if type_context == "f": + cast_type = "cfloat_t" + + return "(%s) (%s, %s)" % (cast_type, repr(expr.real), repr(expr.imag)) else: - # FIXME: type-variable - return repr(float(expr)) + if type_context == "f": + return repr(float(expr))+"f" + elif type_context == "d": + return repr(float(expr)) + elif type_context == "i": + return str(int(expr)) + else: + raise RuntimeError("don't know how to generated code " + "for constant '%s'" % expr) - def map_call(self, expr, enclosing_prec): + def map_call(self, expr, enclosing_prec, type_context): from pymbolic.primitives import Variable from pymbolic.mapper.stringifier import PREC_NONE @@ -311,7 +372,7 @@ class LoopyCCodeMapper(CCodeMapper): par_dtypes = tuple(self.infer_type(par) for par in expr.parameters) - parameters = expr.parameters + str_parameters = None mangle_result = self.kernel.mangle_function(identifier, par_dtypes) if mangle_result is not None: @@ -320,23 +381,28 @@ class LoopyCCodeMapper(CCodeMapper): elif len(mangle_result) == 3: result_dtype, c_name, arg_tgt_dtypes = mangle_result - parameters = [ - perform_cast(self, par, par_dtype, tgt_dtype) + str_parameters = [ + self.rec( + perform_cast(self, par, par_dtype, tgt_dtype), + PREC_NONE, dtype_to_type_context(tgt_dtype)) for par, par_dtype, tgt_dtype in zip( - parameters, par_dtypes, arg_tgt_dtypes)] + expr.parameters, par_dtypes, arg_tgt_dtypes)] else: raise RuntimeError("result of function mangler " "for function '%s' not understood" % identifier) self.seen_functions.add((identifier, c_name, par_dtypes)) + if str_parameters is None: + str_parameters = [ + self.rec(par, PREC_NONE, type_context) + for par in expr.parameters] if c_name is None: raise RuntimeError("unable to find C name for function identifier '%s'" % identifier) - return self.format("%s(%s)", - c_name, self.join_rec(", ", parameters, PREC_NONE)) + return "%s(%s)" % (c_name, ", ".join(str_parameters)) # {{{ deal with complex-valued variables @@ -348,15 +414,22 @@ class LoopyCCodeMapper(CCodeMapper): else: raise RuntimeError - def map_sum(self, expr, enclosing_prec): + def map_sum(self, expr, enclosing_prec, type_context): + from pymbolic.mapper.stringifier import PREC_SUM + + def base_impl(expr, enclosing_prec, type_context): + return self.parenthesize_if_needed( + self.join_rec(" + ", expr.children, PREC_SUM, type_context), + enclosing_prec, PREC_SUM) + if not self.allow_complex: - return CCodeMapper.map_sum(self, expr, enclosing_prec) + return base_impl(expr, enclosing_prec, type_context) tgt_dtype = self.infer_type(expr) is_complex = tgt_dtype.kind == 'c' if not is_complex: - return CCodeMapper.map_sum(self, expr, enclosing_prec) + return base_impl(expr, enclosing_prec, type_context) else: tgt_name = self.complex_type_name(tgt_dtype) @@ -365,9 +438,8 @@ class LoopyCCodeMapper(CCodeMapper): complexes = [child for child in expr.children if 'c' == self.infer_type(child).kind] - from pymbolic.mapper.stringifier import PREC_SUM - real_sum = self.join_rec(" + ", reals, PREC_SUM) - complex_sum = self.join_rec(" + ", complexes, PREC_SUM) + real_sum = self.join_rec(" + ", reals, PREC_SUM, type_context) + complex_sum = self.join_rec(" + ", complexes, PREC_SUM, type_context) if real_sum: result = "%s_fromreal(%s) + %s" % (tgt_name, real_sum, complex_sum) @@ -376,15 +448,22 @@ class LoopyCCodeMapper(CCodeMapper): return self.parenthesize_if_needed(result, enclosing_prec, PREC_SUM) - def map_product(self, expr, enclosing_prec): + def map_product(self, expr, enclosing_prec, type_context): + def base_impl(expr, enclosing_prec, type_context): + # Spaces prevent '**z' (times dereference z), which + # is hard to read. + return self.parenthesize_if_needed( + self.join_rec(" * ", expr.children, PREC_PRODUCT, type_context), + enclosing_prec, PREC_PRODUCT) + if not self.allow_complex: - return CCodeMapper.map_product(self, expr, enclosing_prec) + return base_impl(expr, enclosing_prec, type_context) tgt_dtype = self.infer_type(expr) is_complex = 'c' == tgt_dtype.kind if not is_complex: - return CCodeMapper.map_product(self, expr, enclosing_prec) + return base_impl(expr, enclosing_prec, type_context) else: tgt_name = self.complex_type_name(tgt_dtype) @@ -393,19 +472,18 @@ class LoopyCCodeMapper(CCodeMapper): complexes = [child for child in expr.children if 'c' == self.infer_type(child).kind] - from pymbolic.mapper.stringifier import PREC_PRODUCT - real_prd = self.join_rec("*", reals, PREC_PRODUCT) + real_prd = self.join_rec("*", reals, PREC_PRODUCT, type_context) if len(complexes) == 1: myprec = PREC_PRODUCT else: myprec = PREC_NONE - complex_prd = self.rec(complexes[0], myprec) + complex_prd = self.rec(complexes[0], myprec, type_context) for child in complexes[1:]: complex_prd = "%s_mul(%s, %s)" % ( tgt_name, complex_prd, - self.rec(child, PREC_NONE)) + self.rec(child, PREC_NONE, type_context)) if real_prd: # elementwise semantics are correct @@ -415,9 +493,19 @@ class LoopyCCodeMapper(CCodeMapper): return self.parenthesize_if_needed(result, enclosing_prec, PREC_PRODUCT) - def map_quotient(self, expr, enclosing_prec): + def map_quotient(self, expr, enclosing_prec, type_context): + def base_impl(expr, enclosing_prec, type_context): + return self.parenthesize_if_needed( + "%s / %s" % ( + # space is necessary--otherwise '/*' becomes + # start-of-comment in C. + self.rec(expr.numerator, PREC_PRODUCT, type_context), + # analogous to ^{-1} + self.rec(expr.denominator, PREC_POWER, type_context)), + enclosing_prec, PREC_PRODUCT) + if not self.allow_complex: - return CCodeMapper.map_quotient(self, expr, enclosing_prec) + return base_impl(expr, enclosing_prec, type_context) n_complex = 'c' == self.infer_type(expr.numerator).kind d_complex = 'c' == self.infer_type(expr.denominator).kind @@ -425,36 +513,48 @@ class LoopyCCodeMapper(CCodeMapper): tgt_dtype = self.infer_type(expr) if not (n_complex or d_complex): - return CCodeMapper.map_quotient(self, expr, enclosing_prec) + return base_impl(expr, enclosing_prec, type_context) elif n_complex and not d_complex: # elementwise semnatics are correct - return CCodeMapper.map_quotient(self, expr, enclosing_prec) + return base_impl(expr, enclosing_prec, type_context) elif not n_complex and d_complex: return "%s_rdivide(%s, %s)" % ( self.complex_type_name(tgt_dtype), - self.rec(expr.numerator, PREC_NONE), - self.rec(expr.denominator, PREC_NONE)) + self.rec(expr.numerator, PREC_NONE, type_context), + self.rec(expr.denominator, PREC_NONE, type_context)) else: return "%s_divide(%s, %s)" % ( self.complex_type_name(tgt_dtype), - self.rec(expr.numerator, PREC_NONE), - self.rec(expr.denominator, PREC_NONE)) - - def map_remainder(self, expr, enclosing_prec): - if not self.allow_complex: - return CCodeMapper.map_remainder(self, expr, enclosing_prec) + self.rec(expr.numerator, PREC_NONE, type_context), + self.rec(expr.denominator, PREC_NONE, type_context)) + def map_remainder(self, expr, enclosing_prec, type_context): tgt_dtype = self.infer_type(expr) if 'c' == tgt_dtype.kind: raise RuntimeError("complex remainder not defined") - return CCodeMapper.map_remainder(self, expr, enclosing_prec) + return "(%s %% %s)" % ( + self.rec(expr.numerator, PREC_PRODUCT, type_context), + self.rec(expr.denominator, PREC_POWER, type_context)) # analogous to ^{-1} + + def map_power(self, expr, enclosing_prec, type_context): + def base_impl(expr, enclosing_prec, type_context): + from pymbolic.mapper.stringifier import PREC_NONE + from pymbolic.primitives import is_constant, is_zero + if is_constant(expr.exponent): + if is_zero(expr.exponent): + return "1" + elif is_zero(expr.exponent - 1): + return self.rec(expr.base, enclosing_prec, type_context) + elif is_zero(expr.exponent - 2): + return self.rec(expr.base*expr.base, enclosing_prec, type_context) + + return "pow(%s, %s)" % ( + self.rec(expr.base, PREC_NONE, type_context), + self.rec(expr.exponent, PREC_NONE, type_context)) - def map_power(self, expr, enclosing_prec): if not self.allow_complex: - return CCodeMapper.map_power(self, expr, enclosing_prec) - - from pymbolic.mapper.stringifier import PREC_NONE + return base_impl(expr, enclosing_prec, type_context) tgt_dtype = self.infer_type(expr) if 'c' == tgt_dtype.kind: @@ -462,7 +562,7 @@ class LoopyCCodeMapper(CCodeMapper): value = expr.base for i in range(expr.exponent-1): value = value * expr.base - return self.rec(value, enclosing_prec) + return self.rec(value, enclosing_prec, type_context) else: b_complex = 'c' == self.infer_type(expr.base).kind e_complex = 'c' == self.infer_type(expr.exponent).kind @@ -470,18 +570,22 @@ class LoopyCCodeMapper(CCodeMapper): if b_complex and not e_complex: return "%s_powr(%s, %s)" % ( self.complex_type_name(tgt_dtype), - self.rec(expr.base, PREC_NONE), - self.rec(expr.exponent, PREC_NONE)) + self.rec(expr.base, PREC_NONE, type_context), + self.rec(expr.exponent, PREC_NONE, type_context)) else: return "%s_pow(%s, %s)" % ( self.complex_type_name(tgt_dtype), - self.rec(expr.base, PREC_NONE), - self.rec(expr.exponent, PREC_NONE)) + self.rec(expr.base, PREC_NONE, type_context), + self.rec(expr.exponent, PREC_NONE, type_context)) - return CCodeMapper.map_power(self, expr, enclosing_prec) + return base_impl(self, expr, enclosing_prec, type_context) # }}} + def __call__(self, expr, type_context, prec=PREC_NONE): + from pymbolic.mapper import RecursiveMapper + return RecursiveMapper.__call__(self, expr, prec, type_context) + # }}} # vim: fdm=marker diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index 1cf5a8561a287feeb9457d1c4e98a3b6268069d6..80c8a79eafe61f341c5b5b84e7ae2ef54d5c11db 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -12,11 +12,18 @@ def generate_instruction_code(kernel, insn, codegen_state): expr = insn.expression from loopy.codegen.expression import perform_cast - expr = perform_cast(ccm, expr, expr_dtype=ccm.infer_type(expr), - target_dtype=kernel.get_var_descriptor(insn.get_assignee_var_name()).dtype) + target_dtype = kernel.get_var_descriptor(insn.get_assignee_var_name()).dtype + expr_dtype = ccm.infer_type(expr) + + expr = perform_cast(ccm, expr, + expr_dtype=expr_dtype, + target_dtype=target_dtype) from cgen import Assign - insn_code = Assign(ccm(insn.assignee), ccm(expr)) + from loopy.codegen.expression import dtype_to_type_context + insn_code = Assign( + ccm(insn.assignee, prec=None, type_context=None), + ccm(expr, prec=None, type_context=dtype_to_type_context(target_dtype))) from loopy.codegen.bounds import wrap_in_bounds_checks insn_inames = kernel.insn_inames(insn) insn_code, impl_domain = wrap_in_bounds_checks(