diff --git a/MEMO b/MEMO
index 34286cc941f21483746265df716c3438301fa6b8..b03133efe6e622f6fb09d0cea043d2e25469fc6d 100644
--- a/MEMO
+++ b/MEMO
@@ -65,7 +65,6 @@ To-do
 
 - Scalar insn priority
 
-- What to do about constants in codegen? (...f suffix, complex types)
 
 - If finding a maximum proves troublesome, move parameters into the domain
 
@@ -123,6 +122,9 @@ Future ideas
 Dealt with
 ^^^^^^^^^^
 
+- What to do about constants in codegen? (...f suffix, complex types)
+  -> dealt with by type contexts
+
 - relating to Multi-Domain
   - Make sure that variables that enter into loop bounds are only written
     exactly once. [DONE]
diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py
index 88a64e7b270f4c1077f645710d5ba9a8111f4357..0237295b2ec60c59d010a3cc23f213c25886ea10 100644
--- a/loopy/codegen/bounds.py
+++ b/loopy/codegen/bounds.py
@@ -173,7 +173,7 @@ def wrap_in_for_from_constraints(ccm, iname, constraint_bset, stmt):
             from pymbolic import var
             rhs += iname_coeff*var(iname)
             end_conds.append("%s >= 0" %
-                    ccm(cfm(rhs)))
+                    ccm(cfm(rhs), 'i'))
         else: #  iname_coeff > 0
             kind, bound = solve_constraint_for_bound(cns, iname)
             assert kind == ">="
@@ -205,7 +205,7 @@ def wrap_in_for_from_constraints(ccm, iname, constraint_bset, stmt):
         from cgen import For
         from loopy.codegen import wrap_in
         return wrap_in(For,
-                "int %s = %s" % (iname, ccm(start_expr)),
+                "int %s = %s" % (iname, ccm(start_expr, 'i')),
                 " && ".join(end_conds),
                 "++%s" % iname,
                 stmt)
diff --git a/loopy/codegen/expression.py b/loopy/codegen/expression.py
index 22f9624d15fffb28ea109e7f64caca3753cc993d..e5001e95fb06b4ab84e2c0d461bb0d2773c666d4 100644
--- a/loopy/codegen/expression.py
+++ b/loopy/codegen/expression.py
@@ -2,8 +2,9 @@ from __future__ import division
 
 import numpy as np
 
-from pymbolic.mapper.c_code import CCodeMapper as CCodeMapper
-from pymbolic.mapper.stringifier import PREC_NONE
+from pymbolic.mapper import RecursiveMapper
+from pymbolic.mapper.stringifier import (PREC_NONE, PREC_CALL, PREC_PRODUCT,
+        PREC_POWER)
 from pymbolic.mapper import CombineMapper
 
 # {{{ type inference
@@ -57,7 +58,7 @@ class TypeInferenceMapper(CombineMapper):
         if isinstance(identifier, Variable):
             identifier = identifier.name
 
-        arg_dtypes = tuple(self.rec(par) for par in expr.parameters)
+        arg_dtypes = tuple(self.rec(par, None) for par in expr.parameters)
 
         mangle_result = self.kernel.mangle_function(identifier, arg_dtypes)
         if mangle_result is not None:
@@ -118,7 +119,25 @@ def perform_cast(ccm, expr, expr_dtype, target_dtype):
 
 # {{{ C code mapper
 
-class LoopyCCodeMapper(CCodeMapper):
+# type_context may be:
+# - 'i' for integer -
+# - 'f' for single-precision floating point
+# - 'd' for double-precision floating point
+# or None for 'no known context'.
+
+def dtype_to_type_context(dtype):
+    dtype = np.dtype(dtype)
+
+    if dtype.kind == 'i':
+        return 'i'
+    if dtype in [np.float64, np.complex128]:
+        return 'd'
+    if dtype in [np.float32, np.complex64]:
+        return 'f'
+    return None
+
+
+class LoopyCCodeMapper(RecursiveMapper):
     def __init__(self, kernel, seen_dtypes, seen_functions, var_subst_map={},
             with_annotation=False, allow_complex=False):
         """
@@ -127,7 +146,6 @@ class LoopyCCodeMapper(CCodeMapper):
             functions that were encountered.
         """
 
-        CCodeMapper.__init__(self)
         self.kernel = kernel
         self.seen_dtypes = seen_dtypes
         self.seen_functions = seen_functions
@@ -138,6 +156,8 @@ class LoopyCCodeMapper(CCodeMapper):
         self.with_annotation = with_annotation
         self.var_subst_map = var_subst_map.copy()
 
+    # {{{ copy helpers
+
     def copy(self, var_subst_map=None):
         if var_subst_map is None:
             var_subst_map = self.var_subst_map
@@ -146,11 +166,6 @@ class LoopyCCodeMapper(CCodeMapper):
                 with_annotation=self.with_annotation,
                 allow_complex=self.allow_complex)
 
-    def infer_type(self, expr):
-        result = self.type_inf_mapper(expr)
-        self.seen_dtypes.add(result)
-        return result
-
     def copy_and_assign(self, name, value):
         """Make a copy of self with variable *name* fixed to *value*."""
         var_subst_map = self.var_subst_map.copy()
@@ -164,18 +179,41 @@ class LoopyCCodeMapper(CCodeMapper):
         var_subst_map.update(assignments)
         return self.copy(var_subst_map=var_subst_map)
 
-    def map_common_subexpression(self, expr, prec):
+    # }}}
+
+    # {{{ helpers
+
+    def infer_type(self, expr):
+        result = self.type_inf_mapper(expr)
+        self.seen_dtypes.add(result)
+        return result
+
+    def join_rec(self, joiner, iterable, prec, type_context):
+        f = joiner.join("%s" for i in iterable)
+        return f % tuple(self.rec(i, prec, type_context) for i in iterable)
+
+    def parenthesize_if_needed(self, s, enclosing_prec, my_prec):
+        if enclosing_prec > my_prec:
+            return "(%s)" % s
+        else:
+            return s
+
+    # }}}
+
+    def map_common_subexpression(self, expr, prec, type_context):
         raise RuntimeError("common subexpression should have been eliminated upon "
                 "entry to loopy")
 
-    def map_variable(self, expr, prec):
+    def map_variable(self, expr, enclosing_prec, type_context):
         if expr.name in self.var_subst_map:
             if self.with_annotation:
                 return " /* %s */ %s" % (
                         expr.name,
-                        self.rec(self.var_subst_map[expr.name], prec))
+                        self.rec(self.var_subst_map[expr.name],
+                            enclosing_prec, type_context))
             else:
-                return str(self.rec(self.var_subst_map[expr.name], prec))
+                return str(self.rec(self.var_subst_map[expr.name],
+                    enclosing_prec, type_context))
         elif expr.name in self.kernel.arg_dict:
             arg = self.kernel.arg_dict[expr.name]
             from loopy.kernel import _ShapedArg
@@ -188,15 +226,22 @@ class LoopyCCodeMapper(CCodeMapper):
                 _, c_name = result
                 return c_name
 
-        return CCodeMapper.map_variable(self, expr, prec)
+        return expr.name
 
-    def map_tagged_variable(self, expr, enclosing_prec):
+    def map_tagged_variable(self, expr, enclosing_prec, type_context):
         return expr.name
 
-    def map_subscript(self, expr, enclosing_prec):
+    def map_subscript(self, expr, enclosing_prec, type_context):
+        def base_impl(expr, enclosing_prec, type_context):
+            return self.parenthesize_if_needed(
+                    "%s[%s]" % (
+                        self.rec(expr.aggregate, PREC_CALL, type_context),
+                        self.rec(expr.index, PREC_NONE, 'i')),
+                    enclosing_prec, PREC_CALL)
+
         from pymbolic.primitives import Variable
         if not isinstance(expr.aggregate, Variable):
-            return CCodeMapper.map_subscript(self, expr, enclosing_prec)
+            return base_impl(expr, enclosing_prec, type_context)
 
         if expr.aggregate.name in self.kernel.arg_dict:
             arg = self.kernel.arg_dict[expr.aggregate.name]
@@ -207,7 +252,7 @@ class LoopyCCodeMapper(CCodeMapper):
 
                 base_access = ("read_imagef(%s, loopy_sampler, (float%d)(%s))"
                         % (arg.name, arg.dimensions,
-                            ", ".join(self.rec(idx, PREC_NONE)
+                            ", ".join(self.rec(idx, PREC_NONE, 'i')
                                 for idx in expr.index[::-1])))
 
                 if arg.dtype == np.float32:
@@ -239,10 +284,11 @@ class LoopyCCodeMapper(CCodeMapper):
                     return "*" + expr.aggregate.name
 
                 from pymbolic.primitives import Subscript
-                return CCodeMapper.map_subscript(self,
+                return base_impl(
                         Subscript(expr.aggregate, arg.offset+sum(
                             stride*expr_i for stride, expr_i in zip(
-                                ary_strides, index_expr))), enclosing_prec)
+                                ary_strides, index_expr))),
+                        enclosing_prec, type_context)
 
 
         elif expr.aggregate.name in self.kernel.temporary_variables:
@@ -252,53 +298,68 @@ class LoopyCCodeMapper(CCodeMapper):
             else:
                 index = (expr.index,)
 
-            return (temp_var.name + "".join("[%s]" % self.rec(idx, PREC_NONE)
+            return (temp_var.name + "".join("[%s]" % self.rec(idx, PREC_NONE, 'i')
                 for idx in index))
 
         else:
             raise RuntimeError("nothing known about variable '%s'" % expr.aggregate.name)
 
-    def map_floor_div(self, expr, prec):
+    def map_floor_div(self, expr, enclosing_prec, type_context):
         from loopy.isl_helpers import is_nonnegative
         num_nonneg = is_nonnegative(expr.numerator, self.kernel.domain)
         den_nonneg = is_nonnegative(expr.denominator, self.kernel.domain)
 
         if den_nonneg:
             if num_nonneg:
-                return CCodeMapper.map_floor_div(self, expr, prec)
+                return self.parenthesize_if_needed(
+                        "%s // %s" % (
+                            self.rec(expr.numerator, PREC_PRODUCT, type_context),
+                            # analogous to ^{-1}
+                            self.rec(expr.denominator, PREC_POWER, type_context)),
+                        enclosing_prec, PREC_PRODUCT)
             else:
                 return ("int_floor_div_pos_b(%s, %s)"
-                        % (self.rec(expr.numerator, PREC_NONE),
-                            expr.denominator))
+                        % (self.rec(expr.numerator, PREC_NONE, 'i'),
+                            self.rec(expr.denominator, PREC_NONE, 'i')))
         else:
             return ("int_floor_div(%s, %s)"
-                    % (self.rec(expr.numerator, PREC_NONE),
-                        self.rec(expr.denominator, PREC_NONE)))
+                    % (self.rec(expr.numerator, PREC_NONE, 'i'),
+                        self.rec(expr.denominator, PREC_NONE, 'i')))
 
-    def map_min(self, expr, prec):
+    def map_min(self, expr, prec, type_context):
         what = type(expr).__name__.lower()
 
         children = expr.children[:]
 
-        result = self.rec(children.pop(), PREC_NONE)
+        result = self.rec(children.pop(), PREC_NONE, type_context)
         while children:
             result = "%s(%s, %s)" % (what,
-                        self.rec(children.pop(), PREC_NONE),
+                        self.rec(children.pop(), PREC_NONE, type_context),
                         result)
 
         return result
 
     map_max = map_min
 
-    def map_constant(self, expr, enclosing_prec):
+    def map_constant(self, expr, enclosing_prec, type_context):
         if isinstance(expr, complex):
-            # FIXME: type-variable
-            return "(cdouble_t) (%s, %s)" % (repr(expr.real), repr(expr.imag))
+            cast_type = "cdouble_t"
+            if type_context == "f":
+                cast_type = "cfloat_t"
+
+            return "(%s) (%s, %s)" % (cast_type, repr(expr.real), repr(expr.imag))
         else:
-            # FIXME: type-variable
-            return repr(float(expr))
+            if type_context == "f":
+                return repr(float(expr))+"f"
+            elif type_context == "d":
+                return repr(float(expr))
+            elif type_context == "i":
+                return str(int(expr))
+            else:
+                raise RuntimeError("don't know how to generated code "
+                        "for constant '%s'" % expr)
 
-    def map_call(self, expr, enclosing_prec):
+    def map_call(self, expr, enclosing_prec, type_context):
         from pymbolic.primitives import Variable
         from pymbolic.mapper.stringifier import PREC_NONE
 
@@ -311,7 +372,7 @@ class LoopyCCodeMapper(CCodeMapper):
 
         par_dtypes = tuple(self.infer_type(par) for par in expr.parameters)
 
-        parameters = expr.parameters
+        str_parameters = None
 
         mangle_result = self.kernel.mangle_function(identifier, par_dtypes)
         if mangle_result is not None:
@@ -320,23 +381,28 @@ class LoopyCCodeMapper(CCodeMapper):
             elif len(mangle_result) == 3:
                 result_dtype, c_name, arg_tgt_dtypes = mangle_result
 
-                parameters = [
-                        perform_cast(self, par, par_dtype, tgt_dtype)
+                str_parameters = [
+                        self.rec(
+                            perform_cast(self, par, par_dtype, tgt_dtype),
+                            PREC_NONE, dtype_to_type_context(tgt_dtype))
                         for par, par_dtype, tgt_dtype in zip(
-                            parameters, par_dtypes, arg_tgt_dtypes)]
+                            expr.parameters, par_dtypes, arg_tgt_dtypes)]
             else:
                 raise RuntimeError("result of function mangler "
                         "for function '%s' not understood"
                         % identifier)
 
         self.seen_functions.add((identifier, c_name, par_dtypes))
+        if str_parameters is None:
+            str_parameters = [
+                    self.rec(par, PREC_NONE, type_context)
+                    for par in expr.parameters]
 
         if c_name is None:
             raise RuntimeError("unable to find C name for function identifier '%s'"
                     % identifier)
 
-        return self.format("%s(%s)",
-                c_name, self.join_rec(", ", parameters, PREC_NONE))
+        return "%s(%s)" % (c_name, ", ".join(str_parameters))
 
     # {{{ deal with complex-valued variables
 
@@ -348,15 +414,22 @@ class LoopyCCodeMapper(CCodeMapper):
         else:
             raise RuntimeError
 
-    def map_sum(self, expr, enclosing_prec):
+    def map_sum(self, expr, enclosing_prec, type_context):
+        from pymbolic.mapper.stringifier import PREC_SUM
+
+        def base_impl(expr, enclosing_prec, type_context):
+            return self.parenthesize_if_needed(
+                    self.join_rec(" + ", expr.children, PREC_SUM, type_context),
+                    enclosing_prec, PREC_SUM)
+
         if not self.allow_complex:
-            return CCodeMapper.map_sum(self, expr, enclosing_prec)
+            return base_impl(expr, enclosing_prec, type_context)
 
         tgt_dtype = self.infer_type(expr)
         is_complex = tgt_dtype.kind == 'c'
 
         if not is_complex:
-            return CCodeMapper.map_sum(self, expr, enclosing_prec)
+            return base_impl(expr, enclosing_prec, type_context)
         else:
             tgt_name = self.complex_type_name(tgt_dtype)
 
@@ -365,9 +438,8 @@ class LoopyCCodeMapper(CCodeMapper):
             complexes = [child for child in expr.children
                     if 'c' == self.infer_type(child).kind]
 
-            from pymbolic.mapper.stringifier import PREC_SUM
-            real_sum = self.join_rec(" + ", reals, PREC_SUM)
-            complex_sum = self.join_rec(" + ", complexes, PREC_SUM)
+            real_sum = self.join_rec(" + ", reals, PREC_SUM, type_context)
+            complex_sum = self.join_rec(" + ", complexes, PREC_SUM, type_context)
 
             if real_sum:
                 result = "%s_fromreal(%s) + %s" % (tgt_name, real_sum, complex_sum)
@@ -376,15 +448,22 @@ class LoopyCCodeMapper(CCodeMapper):
 
             return self.parenthesize_if_needed(result, enclosing_prec, PREC_SUM)
 
-    def map_product(self, expr, enclosing_prec):
+    def map_product(self, expr, enclosing_prec, type_context):
+        def base_impl(expr, enclosing_prec, type_context):
+            # Spaces prevent '**z' (times dereference z), which
+            # is hard to read.
+            return self.parenthesize_if_needed(
+                    self.join_rec(" * ", expr.children, PREC_PRODUCT, type_context),
+                    enclosing_prec, PREC_PRODUCT)
+
         if not self.allow_complex:
-            return CCodeMapper.map_product(self, expr, enclosing_prec)
+            return base_impl(expr, enclosing_prec, type_context)
 
         tgt_dtype = self.infer_type(expr)
         is_complex = 'c' == tgt_dtype.kind
 
         if not is_complex:
-            return CCodeMapper.map_product(self, expr, enclosing_prec)
+            return base_impl(expr, enclosing_prec, type_context)
         else:
             tgt_name = self.complex_type_name(tgt_dtype)
 
@@ -393,19 +472,18 @@ class LoopyCCodeMapper(CCodeMapper):
             complexes = [child for child in expr.children
                     if 'c' == self.infer_type(child).kind]
 
-            from pymbolic.mapper.stringifier import PREC_PRODUCT
-            real_prd = self.join_rec("*", reals, PREC_PRODUCT)
+            real_prd = self.join_rec("*", reals, PREC_PRODUCT, type_context)
 
             if len(complexes) == 1:
                 myprec = PREC_PRODUCT
             else:
                 myprec = PREC_NONE
 
-            complex_prd = self.rec(complexes[0], myprec)
+            complex_prd = self.rec(complexes[0], myprec, type_context)
             for child in complexes[1:]:
                 complex_prd = "%s_mul(%s, %s)" % (
                         tgt_name, complex_prd,
-                        self.rec(child, PREC_NONE))
+                        self.rec(child, PREC_NONE, type_context))
 
             if real_prd:
                 # elementwise semantics are correct
@@ -415,9 +493,19 @@ class LoopyCCodeMapper(CCodeMapper):
 
             return self.parenthesize_if_needed(result, enclosing_prec, PREC_PRODUCT)
 
-    def map_quotient(self, expr, enclosing_prec):
+    def map_quotient(self, expr, enclosing_prec, type_context):
+        def base_impl(expr, enclosing_prec, type_context):
+            return self.parenthesize_if_needed(
+                    "%s / %s" % (
+                        # space is necessary--otherwise '/*' becomes
+                        # start-of-comment in C.
+                        self.rec(expr.numerator, PREC_PRODUCT, type_context),
+                        # analogous to ^{-1}
+                        self.rec(expr.denominator, PREC_POWER, type_context)),
+                    enclosing_prec, PREC_PRODUCT)
+
         if not self.allow_complex:
-            return CCodeMapper.map_quotient(self, expr, enclosing_prec)
+            return base_impl(expr, enclosing_prec, type_context)
 
         n_complex = 'c' == self.infer_type(expr.numerator).kind
         d_complex = 'c' == self.infer_type(expr.denominator).kind
@@ -425,36 +513,48 @@ class LoopyCCodeMapper(CCodeMapper):
         tgt_dtype = self.infer_type(expr)
 
         if not (n_complex or d_complex):
-            return CCodeMapper.map_quotient(self, expr, enclosing_prec)
+            return base_impl(expr, enclosing_prec, type_context)
         elif n_complex and not d_complex:
             # elementwise semnatics are correct
-            return CCodeMapper.map_quotient(self, expr, enclosing_prec)
+            return base_impl(expr, enclosing_prec, type_context)
         elif not n_complex and d_complex:
             return "%s_rdivide(%s, %s)" % (
                     self.complex_type_name(tgt_dtype),
-                    self.rec(expr.numerator, PREC_NONE),
-                    self.rec(expr.denominator, PREC_NONE))
+                    self.rec(expr.numerator, PREC_NONE, type_context),
+                    self.rec(expr.denominator, PREC_NONE, type_context))
         else:
             return "%s_divide(%s, %s)" % (
                     self.complex_type_name(tgt_dtype),
-                    self.rec(expr.numerator, PREC_NONE),
-                    self.rec(expr.denominator, PREC_NONE))
-
-    def map_remainder(self, expr, enclosing_prec):
-        if not self.allow_complex:
-            return CCodeMapper.map_remainder(self, expr, enclosing_prec)
+                    self.rec(expr.numerator, PREC_NONE, type_context),
+                    self.rec(expr.denominator, PREC_NONE, type_context))
 
+    def map_remainder(self, expr, enclosing_prec, type_context):
         tgt_dtype = self.infer_type(expr)
         if 'c' == tgt_dtype.kind:
             raise RuntimeError("complex remainder not defined")
 
-        return CCodeMapper.map_remainder(self, expr, enclosing_prec)
+        return "(%s %% %s)" % (
+                    self.rec(expr.numerator, PREC_PRODUCT, type_context),
+                    self.rec(expr.denominator, PREC_POWER, type_context)) # analogous to ^{-1}
+
+    def map_power(self, expr, enclosing_prec, type_context):
+        def base_impl(expr, enclosing_prec, type_context):
+            from pymbolic.mapper.stringifier import PREC_NONE
+            from pymbolic.primitives import is_constant, is_zero
+            if is_constant(expr.exponent):
+                if is_zero(expr.exponent):
+                    return "1"
+                elif is_zero(expr.exponent - 1):
+                    return self.rec(expr.base, enclosing_prec, type_context)
+                elif is_zero(expr.exponent - 2):
+                    return self.rec(expr.base*expr.base, enclosing_prec, type_context)
+
+            return "pow(%s, %s)" % (
+                    self.rec(expr.base, PREC_NONE, type_context),
+                    self.rec(expr.exponent, PREC_NONE, type_context))
 
-    def map_power(self, expr, enclosing_prec):
         if not self.allow_complex:
-            return CCodeMapper.map_power(self, expr, enclosing_prec)
-
-        from pymbolic.mapper.stringifier import PREC_NONE
+            return base_impl(expr, enclosing_prec, type_context)
 
         tgt_dtype = self.infer_type(expr)
         if 'c' == tgt_dtype.kind:
@@ -462,7 +562,7 @@ class LoopyCCodeMapper(CCodeMapper):
                 value = expr.base
                 for i in range(expr.exponent-1):
                     value = value * expr.base
-                return self.rec(value, enclosing_prec)
+                return self.rec(value, enclosing_prec, type_context)
             else:
                 b_complex = 'c' == self.infer_type(expr.base).kind
                 e_complex = 'c' == self.infer_type(expr.exponent).kind
@@ -470,18 +570,22 @@ class LoopyCCodeMapper(CCodeMapper):
                 if b_complex and not e_complex:
                     return "%s_powr(%s, %s)" % (
                             self.complex_type_name(tgt_dtype),
-                            self.rec(expr.base, PREC_NONE),
-                            self.rec(expr.exponent, PREC_NONE))
+                            self.rec(expr.base, PREC_NONE, type_context),
+                            self.rec(expr.exponent, PREC_NONE, type_context))
                 else:
                     return "%s_pow(%s, %s)" % (
                             self.complex_type_name(tgt_dtype),
-                            self.rec(expr.base, PREC_NONE),
-                            self.rec(expr.exponent, PREC_NONE))
+                            self.rec(expr.base, PREC_NONE, type_context),
+                            self.rec(expr.exponent, PREC_NONE, type_context))
 
-        return CCodeMapper.map_power(self, expr, enclosing_prec)
+        return base_impl(self, expr, enclosing_prec, type_context)
 
     # }}}
 
+    def __call__(self, expr, type_context, prec=PREC_NONE):
+        from pymbolic.mapper import RecursiveMapper
+        return RecursiveMapper.__call__(self, expr, prec, type_context)
+
 # }}}
 
 # vim: fdm=marker
diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py
index 1cf5a8561a287feeb9457d1c4e98a3b6268069d6..80c8a79eafe61f341c5b5b84e7ae2ef54d5c11db 100644
--- a/loopy/codegen/instruction.py
+++ b/loopy/codegen/instruction.py
@@ -12,11 +12,18 @@ def generate_instruction_code(kernel, insn, codegen_state):
     expr = insn.expression
 
     from loopy.codegen.expression import perform_cast
-    expr = perform_cast(ccm, expr, expr_dtype=ccm.infer_type(expr),
-            target_dtype=kernel.get_var_descriptor(insn.get_assignee_var_name()).dtype)
+    target_dtype = kernel.get_var_descriptor(insn.get_assignee_var_name()).dtype
+    expr_dtype = ccm.infer_type(expr)
+
+    expr = perform_cast(ccm, expr,
+            expr_dtype=expr_dtype,
+            target_dtype=target_dtype)
 
     from cgen import Assign
-    insn_code = Assign(ccm(insn.assignee), ccm(expr))
+    from loopy.codegen.expression import dtype_to_type_context
+    insn_code = Assign(
+            ccm(insn.assignee, prec=None, type_context=None),
+            ccm(expr, prec=None, type_context=dtype_to_type_context(target_dtype)))
     from loopy.codegen.bounds import wrap_in_bounds_checks
     insn_inames = kernel.insn_inames(insn)
     insn_code, impl_domain = wrap_in_bounds_checks(