diff --git a/MEMO b/MEMO
index 38f88de62a33bc0ef8b83e1483780ada4bc377c6..170019de51d8514a051de9c13a5b55e00b50a709 100644
--- a/MEMO
+++ b/MEMO
@@ -53,12 +53,8 @@ To-do
 - rename IndexTag -> InameTag
 
 - Data implementation tags
-  TODO initial bringup:
-  - Adapt padding
-  - Adapt automatic padding of temp variables
-  - turn base_indices into offset
-
   TODO further:
+  - turn base_indices into offset
   - vectorization
   - automatic copies
   - write_image()
diff --git a/loopy/cse.py b/loopy/cse.py
index 8d8c7477baabe2b8f65c50d33330452c1ae0e592..6d9cc491308d220faee337fe4344799b51754a33 100644
--- a/loopy/cse.py
+++ b/loopy/cse.py
@@ -864,7 +864,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
             name=target_var_name,
             dtype=dtype,
             base_indices=(0,)*len(non1_storage_shape),
-            shape=non1_storage_shape,
+            shape=tuple(non1_storage_shape),
             is_local=None)
 
     new_temporary_variables[target_var_name] = temp_var
diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py
index beb2b6d9f8e072489c9d2290d0af981defe32a13..831996ba210c7b6ae8104afbb9dd31029e5898aa 100644
--- a/loopy/kernel/array.py
+++ b/loopy/kernel/array.py
@@ -285,6 +285,7 @@ def _parse_shape_or_strides(x):
     x = _pymbolic_parse_if_necessary(x)
     if isinstance(x, lp.auto):
         return x
+    assert not isinstance(x, list)
     if not isinstance(x, tuple):
         assert x is not lp.auto
         x = (x,)
diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index 6d8e27c4f1528c74bc85e6f1b86154346b477a3f..60cba66f0eb808e281970f250096b5c94572a376 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -262,8 +262,9 @@ class TemporaryVariable(ArrayBase):
         # FIXME take into account storage_shape, or something like it
         storage_shape = self.shape
 
-        for l in storage_shape:
-            temp_var_decl = ArrayOf(temp_var_decl, l)
+        if storage_shape:
+            temp_var_decl = ArrayOf(temp_var_decl,
+                    " * ".join(str(s) for s in storage_shape))
 
         if self.is_local:
             temp_var_decl = CLLocal(temp_var_decl)
diff --git a/loopy/padding.py b/loopy/padding.py
index 65d90a8f48c86579e0d444306e8de3e6d10dfa85..ee43f2590c915c582ab11d5891e2a23d0fdbf266 100644
--- a/loopy/padding.py
+++ b/loopy/padding.py
@@ -108,24 +108,34 @@ def split_arg_axis(kernel, args_and_axes, count):
 
         # }}}
 
-        # {{{ adjust strides
+        # {{{ adjust dim tags
 
-        new_strides = list(arg.strides)
-        old_stride = new_strides[axis]
+        if arg.dim_tags is None:
+            raise RuntimeError("dim_tags of '%s' are not known" % arg.name)
+        new_dim_tags = list(arg.dim_tags)
+
+        old_dim_tag = arg.dim_tags[axis]
+
+        from loopy.kernel.array import FixedStrideArrayDimTag
+        if not isinstance(old_dim_tag, FixedStrideArrayDimTag):
+            raise RuntimeError("axis %d of '%s' is not tagged fixed-stride"
+                    % (axis, arg.name))
+
+        old_stride = old_dim_tag.stride
         outer_stride = count*old_stride
 
         if order == "F":
-            new_strides.insert(axis+1, outer_stride)
+            new_dim_tags.insert(axis+1, FixedStrideArrayDimTag(outer_stride))
         elif order == "C":
-            new_strides.insert(axis, outer_stride)
+            new_dim_tags.insert(axis, FixedStrideArrayDimTag(outer_stride))
         else:
             raise RuntimeError("order '%s' not understood" % order)
 
-        new_strides = tuple(new_strides)
+        new_dim_tags = tuple(new_dim_tags)
 
         # }}}
 
-        new_args[arg_idx] = arg.copy(shape=new_shape, strides=new_strides)
+        new_args[arg_idx] = arg.copy(shape=new_shape, dim_tags=new_dim_tags)
 
     # }}}
 
@@ -187,9 +197,22 @@ def split_arg_axis(kernel, args_and_axes, count):
 def find_padding_multiple(kernel, variable, axis, align_bytes, allowed_waste=0.1):
     arg = kernel.arg_dict[variable]
 
-    stride = arg.strides[axis]
+    if arg.dim_tags is None:
+        raise RuntimeError("cannot find padding multiple--dim_tags of '%s' "
+                "are not known" % variable)
+
+    dim_tag = arg.dim_tags[axis]
+
+    from loopy.kernel.array import FixedStrideArrayDimTag
+    if not isinstance(dim_tag, FixedStrideArrayDimTag):
+        raise RuntimeError("cannot find padding multiple--"
+                "axis %d of '%s' is not tagged fixed-stride"
+                % (axis, variable))
+
+    stride = dim_tag.stride
+
     if not isinstance(stride, int):
-        raise RuntimeError("cannot find padding multi--stride is not a "
+        raise RuntimeError("cannot find padding multiple--stride is not a "
                 "known integer")
 
     from pytools import div_ceil
@@ -212,21 +235,31 @@ def add_padding(kernel, variable, axis, align_bytes):
     new_args = kernel.args[:]
     arg = new_args[arg_idx]
 
-    new_strides = list(arg.strides)
-    stride = new_strides[axis]
+    if arg.dim_tags is None:
+        raise RuntimeError("cannot add padding--dim_tags of '%s' "
+                "are not known" % variable)
+
+    new_dim_tags = list(arg.dim_tags)
+    dim_tag = new_dim_tags[axis]
+
+    from loopy.kernel.array import FixedStrideArrayDimTag
+    if not isinstance(dim_tag, FixedStrideArrayDimTag):
+        raise RuntimeError("cannot find padding multiple--"
+                "axis %d of '%s' is not tagged fixed-stride"
+                % (axis, variable))
+
+    stride = dim_tag.stride
     if not isinstance(stride, int):
         raise RuntimeError("cannot find split granularity--stride is not a "
                 "known integer")
+
     from pytools import div_ceil
-    new_strides[axis] = div_ceil(stride, align_bytes) * align_bytes
+    new_dim_tags[axis] = FixedStrideArrayDimTag(
+            div_ceil(stride, align_bytes) * align_bytes)
 
-    new_args[arg_idx] = arg.copy(strides=tuple(new_strides))
+    new_args[arg_idx] = arg.copy(dim_tags=tuple(new_dim_tags))
 
     return kernel.copy(args=new_args)
 
 
-
-
-
-
 # vim: foldmethod=marker
diff --git a/loopy/preprocess.py b/loopy/preprocess.py
index 09adb1278fb45bfafd91a2603c5253a163f421ff..dc32e01454e361b96d9bb5c16d1e994d4bf7bdde 100644
--- a/loopy/preprocess.py
+++ b/loopy/preprocess.py
@@ -710,14 +710,24 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn):
         ary_name = aae.aggregate.name
         arg = kernel.arg_dict.get(ary_name)
 
-        ary_strides = arg.strides
-        if ary_strides is None and len(index_expr) == 1:
-            ary_strides = (1,)
+        if arg.dim_tags is None:
+            from warnings import warn
+            warn("Strides for '%s' are not known. Local axis assignment "
+                    "is likely suboptimal." % arg.name)
+            ary_strides = [1] * len(index_expr)
+        else:
+            ary_strides = []
+            from loopy.kernel.array import FixedStrideArrayDimTag
+            for dim_tag in arg.dim_tags:
+                if isinstance(dim_tag, FixedStrideArrayDimTag):
+                    ary_strides.append(dim_tag.stride)
 
         # {{{ construct iname_to_stride_expr
 
         iname_to_stride_expr = {}
         for iexpr_i, stride in zip(index_expr, ary_strides):
+            if stride is None:
+                continue
             coeffs = CoefficientCollector()(iexpr_i)
             for var_name, coeff in coeffs.iteritems():
                 if var_name in auto_axis_inames:  # excludes '1', i.e.  the constant