diff --git a/loopy/__init__.py b/loopy/__init__.py
index 450aa4ac45272a580afd7dc5fb933355b5a8dcfd..3bc1d587c897d41b27c1f56e5a938881f58b0f03 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -1016,7 +1016,7 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None,
                     kernel,  rule_name, sweep_inames,
                     footprint_subscripts, arg)
     new_kernel = precompute(kernel, subst_use, sweep_inames,
-            new_storage_axis_names=dim_arg_names,
+            precompute_inames=dim_arg_names,
             default_tag=default_tag, dtype=arg.dtype,
             fetch_bounding_box=fetch_bounding_box)
 
diff --git a/loopy/array_buffer_map.py b/loopy/array_buffer_map.py
index 9189421271ea28c95be861c785243aabbd7bd934..0be16ce5b3d9b7af477f7ee849d0dbe8e6b90c7c 100644
--- a/loopy/array_buffer_map.py
+++ b/loopy/array_buffer_map.py
@@ -303,8 +303,7 @@ class ArrayToBufferMap(object):
             boxify_sweep=False):
 
         renamed_aug_domain = self.aug_domain
-        first_storage_index = (
-                renamed_aug_domain.dim(dim_type.set)
+        first_storage_index = (renamed_aug_domain.dim(dim_type.set)
                 - len(self.non1_storage_shape))
 
         inon1 = 0
@@ -323,7 +322,10 @@ class ArrayToBufferMap(object):
 
             inon1 += 1
 
-        domain, renamed_aug_domain = isl.align_two(domain, renamed_aug_domain)
+        # Order of arguments to align_two matters--'domain' should be the
+        # 'guiding' ordering.
+        renamed_aug_domain, domain = isl.align_two(renamed_aug_domain, domain)
+
         domain = domain & renamed_aug_domain
 
         from loopy.isl_helpers import convexify, boxify
diff --git a/loopy/precompute.py b/loopy/precompute.py
index e516dde139d370f60b05fe3d6740d6fbd9d833e8..5675537f1f3915dcce8b68b2d36200bdcaef3f5a 100644
--- a/loopy/precompute.py
+++ b/loopy/precompute.py
@@ -28,6 +28,7 @@ THE SOFTWARE.
 from loopy.symbolic import (get_dependencies,
         RuleAwareIdentityMapper, RuleAwareSubstitutionMapper,
         SubstitutionRuleMappingContext)
+from loopy.diagnostic import LoopyError
 from pymbolic.mapper.substitutor import make_subst_func
 import numpy as np
 
@@ -209,7 +210,7 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper):
 
 
 def precompute(kernel, subst_use, sweep_inames=[], within=None,
-        storage_axes=None, new_storage_axis_names=None, storage_axis_to_tag={},
+        storage_axes=None, precompute_inames=None, storage_axis_to_tag={},
         default_tag="l.auto", dtype=None, fetch_bounding_box=False,
         temporary_is_local=None):
     """Precompute the expression described in the substitution rule determined by
@@ -262,6 +263,10 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
         May also equivalently be a comma-separated string.
     :arg within: a stack match as understood by
         :func:`loopy.context_matching.parse_stack_match`.
+    :arg precompute_inames:
+        If the specified inames do not already exist, they will be
+        created. If they do already exist, their loop domain is verified
+        against the one required for this precomputation.
 
     If `storage_axes` is not specified, it defaults to the arrangement
     `<direct sweep axes><arguments>` with the direct sweep axes being the
@@ -274,7 +279,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
     # {{{ check, standardize arguments
 
     if isinstance(sweep_inames, str):
-        sweep_inames = sweep_inames.split(",")
+        sweep_inames = [iname.strip() for iname in sweep_inames.split(",")]
 
     for iname in sweep_inames:
         if iname not in kernel.all_inames():
@@ -285,7 +290,10 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
     sweep_inames_set = frozenset(sweep_inames)
 
     if isinstance(storage_axes, str):
-        storage_axes = storage_axes.split(",")
+        storage_axes = [ax.strip() for ax in storage_axes.split(",")]
+
+    if isinstance(precompute_inames, str):
+        precompute_inames = [iname.strip() for iname in precompute_inames.split(",")]
 
     if isinstance(subst_use, str):
         subst_use = [subst_use]
@@ -424,6 +432,29 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
     storage_axis_names = []
     storage_axis_sources = []  # number for arg#, or iname
 
+    # {{{ check for pre-existing precompute_inames
+
+    if precompute_inames is not None:
+        preexisting_precompute_inames = (
+                set(precompute_inames) & kernel.all_inames())
+
+        if (
+                preexisting_precompute_inames
+                and
+                len(preexisting_precompute_inames) < len(precompute_inames)):
+            raise LoopyError("some (but not all) of the inames in "
+                    "precompute_inames already exist. existing: %s non-existing: %s"
+                    % (
+                        preexisting_precompute_inames,
+                        set(precompute_inames) - preexisting_precompute_inames))
+
+        precompute_inames_already_exist = bool(preexisting_precompute_inames)
+
+    else:
+        precompute_inames_already_exist = False
+
+    # }}}
+
     for i, saxis in enumerate(storage_axes):
         tag_lookup_saxis = saxis
 
@@ -439,14 +470,15 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
             old_name = saxis
             name = "%s_%s" % (c_subst_name, old_name)
 
-        if new_storage_axis_names is not None and i < len(new_storage_axis_names):
-            name = new_storage_axis_names[i]
+        if precompute_inames is not None and i < len(precompute_inames):
+            name = precompute_inames[i]
             tag_lookup_saxis = name
-            if var_name_gen.is_name_conflicting(name):
+            if not precompute_inames_already_exist and var_name_gen.is_name_conflicting(name):
                 raise RuntimeError("new storage axis name '%s' "
                         "conflicts with existing name" % name)
 
-        name = var_name_gen(name)
+        if not precompute_inames_already_exist:
+            name = var_name_gen(name)
 
         storage_axis_names.append(name)
         new_iname_to_tag[name] = storage_axis_to_tag.get(
@@ -456,7 +488,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
 
     del storage_axis_to_tag
     del storage_axes
-    del new_storage_axis_names
+    del precompute_inames
 
     # }}}
 
@@ -476,8 +508,12 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
     if storage_axis_names:
         # {{{ find domain to be changed
 
+        change_inames = expanding_inames
+        if precompute_inames_already_exist:
+            change_inames = change_inames | preexisting_precompute_inames
+
         from loopy.kernel.tools import DomainChanger
-        domch = DomainChanger(kernel, expanding_inames)
+        domch = DomainChanger(kernel, change_inames)
 
         if domch.leaf_domain_index is not None:
             # If the sweep inames are at home in parent domains, then we'll add
@@ -501,10 +537,14 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
             else:
                 del new_iname_to_tag[saxis]
 
-        new_kernel_domains = domch.get_domains_with(
-                abm.augment_domain_with_sweep(
-                    domch.domain, non1_storage_axis_names,
-                    boxify_sweep=fetch_bounding_box))
+        if not precompute_inames_already_exist:
+            new_kernel_domains = domch.get_domains_with(
+                    abm.augment_domain_with_sweep(
+                        domch.domain, non1_storage_axis_names,
+                        boxify_sweep=fetch_bounding_box))
+        else:
+            new_kernel_domains = domch.get_domains_with(domch.domain)
+
 
     else:
         # leave kernel domains unchanged