diff --git a/contrib/floopy-highlighting/floopy.vim b/contrib/floopy-highlighting/floopy.vim
index 59c5b15a431e3c28072c96afd824b6937f973b56..57c09a652c0cb9141d6764d300ebb3618577b05d 100644
--- a/contrib/floopy-highlighting/floopy.vim
+++ b/contrib/floopy-highlighting/floopy.vim
@@ -7,7 +7,7 @@
 " :set filetype=floopy
 "
 " You may also include a line
-" vim: filetype=pyopencl.python
+" vim: filetype=floopy.python
 " at the end of your file to set the file type automatically.
 "
 " Another option is to include the following in your .vimrc
@@ -16,24 +16,7 @@
 runtime! syntax/fortran.vim
 
 unlet b:current_syntax
-try
-  syntax include @clCode syntax/opencl.vim
-catch
-  syntax include @clCode syntax/c.vim
-endtry
-
-if exists('b:current_syntax')
-  let s:current_syntax=b:current_syntax
-  " Remove current syntax definition, as some syntax files (e.g. cpp.vim)
-  " do nothing if b:current_syntax is defined.
-  unlet b:current_syntax
-endif
-
 syntax include @LoopyPython syntax/python.vim
-try
-  syntax include @LoopyPython after/syntax/python.vim
-catch
-endtry
 
 if exists('s:current_syntax')
   let b:current_syntax=s:current_syntax
@@ -43,6 +26,6 @@ endif
 
 syntax region textSnipLoopyPython
 \ matchgroup=Comment
-\ start='$loopy begin transform' end='$loopy end transform'
+\ start='$loopy begin' end='$loopy end'
 \ containedin=ALL
 \ contains=@LoopyPython
diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
index 1f64e59e6c9e8ab6d599ea0cc61937df5a60ffcb..f930df60c93b1be4df80bcfd2789c60dca1e3654 100644
--- a/loopy/kernel/__init__.py
+++ b/loopy/kernel/__init__.py
@@ -902,43 +902,54 @@ class LoopKernel(RecordWithoutPickling):
     def __str__(self):
         lines = []
 
+        from loopy.preprocess import add_default_dependencies
+        kernel = add_default_dependencies(self)
+
         sep = 75*"-"
         lines.append(sep)
-        lines.append("KERNEL: " + self.name)
+        lines.append("KERNEL: " + kernel.name)
         lines.append(sep)
         lines.append("ARGUMENTS:")
-        for arg_name in sorted(self.arg_dict):
-            lines.append(str(self.arg_dict[arg_name]))
+        for arg_name in sorted(kernel.arg_dict):
+            lines.append(str(kernel.arg_dict[arg_name]))
         lines.append(sep)
         lines.append("DOMAINS:")
-        for dom, parents in zip(self.domains, self.all_parents_per_domain()):
+        for dom, parents in zip(kernel.domains, kernel.all_parents_per_domain()):
             lines.append(len(parents)*"  " + str(dom))
 
         lines.append(sep)
         lines.append("INAME IMPLEMENTATION TAGS:")
-        for iname in sorted(self.all_inames()):
-            line = "%s: %s" % (iname, self.iname_to_tag.get(iname))
+        for iname in sorted(kernel.all_inames()):
+            line = "%s: %s" % (iname, kernel.iname_to_tag.get(iname))
             lines.append(line)
 
-        if self.temporary_variables:
+        if kernel.temporary_variables:
             lines.append(sep)
             lines.append("TEMPORARIES:")
-            for tv in sorted(six.itervalues(self.temporary_variables),
+            for tv in sorted(six.itervalues(kernel.temporary_variables),
                     key=lambda tv: tv.name):
                 lines.append(str(tv))
 
-        if self.substitutions:
+        if kernel.substitutions:
             lines.append(sep)
             lines.append("SUBSTIUTION RULES:")
-            for rule_name in sorted(six.iterkeys(self.substitutions)):
-                lines.append(str(self.substitutions[rule_name]))
+            for rule_name in sorted(six.iterkeys(kernel.substitutions)):
+                lines.append(str(kernel.substitutions[rule_name]))
 
         lines.append(sep)
         lines.append("INSTRUCTIONS:")
         loop_list_width = 35
 
-        import loopy as lp
-        for insn in self.instructions:
+        printed_insn_ids = set()
+
+        def print_insn(insn):
+            if insn.id in printed_insn_ids:
+                return
+            printed_insn_ids.add(insn.id)
+
+            for dep_id in insn.insn_deps:
+                print_insn(kernel.id_to_insn[dep_id])
+
             if isinstance(insn, lp.ExpressionInstruction):
                 lhs = str(insn.assignee)
                 rhs = str(insn.expression)
@@ -952,7 +963,7 @@ class LoopKernel(RecordWithoutPickling):
 
                 trailing = ["    "+l for l in insn.code.split("\n")]
 
-            loop_list = ",".join(sorted(self.insn_inames(insn)))
+            loop_list = ",".join(sorted(kernel.insn_inames(insn)))
 
             options = [insn.id]
             if insn.priority:
@@ -975,8 +986,12 @@ class LoopKernel(RecordWithoutPickling):
             if insn.predicates:
                 lines.append(10*" " + "if (%s)" % " && ".join(insn.predicates))
 
+        import loopy as lp
+        for insn in kernel.instructions:
+            print_insn(insn)
+
         dep_lines = []
-        for insn in self.instructions:
+        for insn in kernel.instructions:
             if insn.insn_deps:
                 dep_lines.append("%s : %s" % (insn.id, ",".join(insn.insn_deps)))
         if dep_lines:
@@ -987,10 +1002,10 @@ class LoopKernel(RecordWithoutPickling):
 
         lines.append(sep)
 
-        if self.schedule is not None:
+        if kernel.schedule is not None:
             lines.append("SCHEDULE:")
             from loopy.schedule import dump_schedule
-            lines.append(dump_schedule(self.schedule))
+            lines.append(dump_schedule(kernel.schedule))
             lines.append(sep)
 
         return "\n".join(lines)
diff --git a/loopy/precompute.py b/loopy/precompute.py
index 935d6d44040cf56b3ca3bdbbec26e22ec750a05d..726cc0786d44c59d5ba6179e6e69cc837a0b2992 100644
--- a/loopy/precompute.py
+++ b/loopy/precompute.py
@@ -132,7 +132,7 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper):
             access_descriptors, array_base_map,
             storage_axis_names, storage_axis_sources,
             non1_storage_axis_names,
-            target_var_name):
+            temporary_name):
         super(RuleInvocationReplacer, self).__init__(rule_mapping_context)
 
         self.subst_name = subst_name
@@ -146,7 +146,7 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper):
         self.storage_axis_sources = storage_axis_sources
         self.non1_storage_axis_names = non1_storage_axis_names
 
-        self.target_var_name = target_var_name
+        self.temporary_name = temporary_name
 
     def map_substitution(self, name, tag, arguments, expn_state):
         if not (
@@ -196,7 +196,7 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper):
             ax_index = simplify_via_aff(ax_index - sax_base_idx)
             stor_subscript.append(ax_index)
 
-        new_outer_expr = var(self.target_var_name)
+        new_outer_expr = var(self.temporary_name)
         if stor_subscript:
             new_outer_expr = new_outer_expr.index(tuple(stor_subscript))
 
@@ -210,9 +210,9 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper):
 
 
 def precompute(kernel, subst_use, sweep_inames=[], within=None,
-        storage_axes=None, precompute_inames=None, storage_axis_to_tag={},
-        default_tag="l.auto", dtype=None, fetch_bounding_box=False,
-        temporary_is_local=None):
+        storage_axes=None, temporary_name=None, precompute_inames=None,
+        storage_axis_to_tag={}, default_tag="l.auto", dtype=None,
+        fetch_bounding_box=False, temporary_is_local=None):
     """Precompute the expression described in the substitution rule determined by
     *subst_use* and store it in a temporary array. A precomputation needs two
     things to operate, a list of *sweep_inames* (order irrelevant) and an
@@ -263,6 +263,11 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
         May also equivalently be a comma-separated string.
     :arg within: a stack match as understood by
         :func:`loopy.context_matching.parse_stack_match`.
+    :arg temporary_name:
+        The temporary variable name to use for storing the precomputed data.
+        If it does not exist, it will be created. If it does exist, its properties
+        (such as size, type) are checked (and updated, if possible) to match
+        its use.
     :arg precompute_inames:
         If the specified inames do not already exist, they will be
         created. If they do already exist, their loop domain is verified
@@ -584,8 +589,10 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
 
     # {{{ set up compute insn
 
-    target_var_name = var_name_gen(based_on=c_subst_name)
-    assignee = var(target_var_name)
+    if temporary_name is None:
+        temporary_name = var_name_gen(based_on=c_subst_name)
+
+    assignee = var(temporary_name)
 
     if non1_storage_axis_names:
         assignee = assignee.index(
@@ -633,7 +640,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
             access_descriptors, abm,
             storage_axis_names, storage_axis_sources,
             non1_storage_axis_names,
-            target_var_name)
+            temporary_name)
 
     kernel = invr.map_kernel(kernel)
     kernel = kernel.copy(
@@ -655,15 +662,69 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
     if temporary_is_local is None:
         temporary_is_local = lp.auto
 
+    new_temp_shape = tuple(abm.non1_storage_shape)
+
     new_temporary_variables = kernel.temporary_variables.copy()
-    temp_var = lp.TemporaryVariable(
-            name=target_var_name,
-            dtype=dtype,
-            base_indices=(0,)*len(abm.non1_storage_shape),
-            shape=tuple(abm.non1_storage_shape),
-            is_local=temporary_is_local)
-
-    new_temporary_variables[target_var_name] = temp_var
+    if temporary_name not in new_temporary_variables:
+        temp_var = lp.TemporaryVariable(
+                name=temporary_name,
+                dtype=dtype,
+                base_indices=(0,)*len(new_temp_shape),
+                shape=tuple(abm.non1_storage_shape),
+                is_local=temporary_is_local)
+
+    else:
+        temp_var = new_temporary_variables[temporary_name]
+
+        # {{{ check and adapt existing temporary
+
+        if temp_var.dtype is lp.auto:
+            pass
+        elif temp_var.dtype is not lp.auto and dtype is lp.auto:
+            dtype = temp_var.dtype
+        elif temp_var.dtype is not lp.auto and dtype is not lp.auto:
+            if temp_var.dtype != dtype:
+                raise LoopyError("Existing and new dtype of temporary '%s' "
+                        "do not match (existing: %s, new: %s)"
+                        % (temporary_name, temp_var.dtype, dtype))
+
+        temp_var = temp_var.copy(dtype=dtype)
+
+        if len(temp_var.shape) != len(new_temp_shape):
+            raise LoopyError("Existing and new temporary '%s' do not "
+                    "have matching number of dimensions "
+                    % (temporary_name,
+                        len(temp_var.shape), len(new_temp_shape)))
+
+        if temp_var.base_indices != (0,) * len(new_temp_shape):
+            raise LoopyError("Existing and new temporary '%s' do not "
+                    "have matching number of dimensions "
+                    % (temporary_name,
+                        len(temp_var.shape), len(new_temp_shape)))
+
+        new_temp_shape = tuple(
+                max(i, ex_i)
+                for i, ex_i in zip(new_temp_shape, temp_var.shape))
+
+        temp_var = temp_var.copy(shape=new_temp_shape)
+
+        if temporary_is_local == temp_var.is_local:
+            pass
+        elif temporary_is_local is lp.auto:
+            temporary_is_local = temp_var.is_local
+        elif temp_var.is_local is lp.auto:
+            pass
+        else:
+            raise LoopyError("Existing and new temporary '%s' do not "
+                    "have matching values of 'is_local'"
+                    % (temporary_name,
+                        temp_var.is_local, temporary_is_local))
+
+        temp_var = temp_var.copy(is_local=temporary_is_local)
+
+        # }}}
+
+    new_temporary_variables[temporary_name] = temp_var
 
     kernel = kernel.copy(
             temporary_variables=new_temporary_variables)