diff --git a/loopy/creation.py b/loopy/creation.py
index 71806998f22d063d4a6d4ab3380084e6483998ce..0908498ce013d76e538ad16076595e12e491e787 100644
--- a/loopy/creation.py
+++ b/loopy/creation.py
@@ -51,7 +51,8 @@ def expand_cses(knl):
         newly_created_vars.add(new_var_name)
 
         if dtype is None:
-            dtype = tim(expr)
+            from loopy import infer_type
+            dtype = infer_type
 
         from loopy.kernel import TemporaryVariable
         new_temp_vars[new_var_name] = TemporaryVariable(
@@ -78,9 +79,6 @@ def expand_cses(knl):
     newly_created_insn_ids = set()
     new_temp_vars = knl.temporary_variables.copy()
 
-    from loopy.codegen.expression import TypeInferenceMapper
-    tim = TypeInferenceMapper(knl, new_temp_vars)
-
     for insn in knl.instructions:
         new_insns.append(insn.copy(expression=cseam(insn.expression)))
 
@@ -96,9 +94,6 @@ def create_temporaries(knl):
     new_insns = []
     new_temp_vars = knl.temporary_variables.copy()
 
-    from loopy.codegen.expression import TypeInferenceMapper
-    tim = TypeInferenceMapper(knl, new_temp_vars)
-
     for insn in knl.instructions:
         from loopy.kernel import (
                 find_var_base_indices_and_shape_from_inames,
@@ -107,13 +102,6 @@ def create_temporaries(knl):
         if insn.temp_var_type is not None:
             assignee_name = insn.get_assignee_var_name()
 
-            temp_var_type = insn.temp_var_type
-            from loopy import infer_type
-            if temp_var_type is infer_type:
-                # FIXME dependencies among type-inferred variables
-                # are not allowed yet.
-                temp_var_type = tim(insn.expression)
-
             assignee_indices = []
             from pymbolic.primitives import Variable
             for index_expr in insn.get_assignee_indices():
@@ -133,7 +121,7 @@ def create_temporaries(knl):
 
             new_temp_vars[assignee_name] = TemporaryVariable(
                     name=assignee_name,
-                    dtype=temp_var_type,
+                    dtype=insn.temp_var_type,
                     is_local=None,
                     base_indices=base_indices,
                     shape=shape)
diff --git a/loopy/preprocess.py b/loopy/preprocess.py
index f479c2e48cae5575988e4a4f871c19ed19121238..0030fba0633ce996b0e075c4b50ddc8bb2fae953 100644
--- a/loopy/preprocess.py
+++ b/loopy/preprocess.py
@@ -6,6 +6,59 @@ import pyopencl.characterize as cl_char
 
 
 
+# {{{ infer types of temporaries
+
+def infer_types_of_temporaries(kernel):
+    new_temp_vars = {}
+
+    queue = []
+
+    from loopy import infer_type
+    for tv in kernel.temporary_variables.itervalues():
+        if tv.dtype is infer_type:
+            queue.append(tv)
+        else:
+            new_temp_vars[tv.name] = tv
+
+    from loopy.codegen.expression import (
+            TypeInferenceMapper, TypeInferenceFailure)
+    tim = TypeInferenceMapper(kernel)
+
+    first_failure = None
+    while queue:
+        tv = queue.pop(0)
+
+        try:
+            dtypes = [
+                    tim(kernel.id_to_insn[writer].expression)
+                    for writer in kernel.writer_map()[tv.name]]
+        except TypeInferenceFailure:
+            if tv is first_failure:
+                # this has failed before, give up.
+                raise RuntimeError("could not determine type of '%s'"
+                        % tv.name)
+
+            if first_failure is None:
+                # remember the first failure for this round through the queue
+                first_failure = tv
+
+            # can't infer type yet, put back into queue
+            queue.append(tv)
+
+        from pytools import is_single_valued
+        if not is_single_valued(dtypes):
+            raise RuntimeError("ambiguous type inference for '%s'"
+                    % tv.name)
+
+        new_temp_vars[tv.name] = tv.copy(dtype=dtypes[0])
+
+        # we've made progress, reset failure flag.
+        first_failure = None
+
+    return kernel.copy(temporary_variables=new_temp_vars)
+
+# }}}
+
 # {{{ transform ilp into lower-level constructs
 
 def realize_ilp(kernel):
@@ -776,6 +829,11 @@ def adjust_local_temp_var_storage(kernel):
 
 
 def preprocess_kernel(kernel):
+    # all type inference must happen *after* this point (because only then all
+    # the functions return dtype getters are available.)
+
+    kernel = infer_types_of_temporaries(kernel)
+
     from loopy.subst import expand_subst
     kernel = expand_subst(kernel)