diff --git a/MEMO b/MEMO
index 5336b8f48c6da1556f88c079723a47211d0f0048..75d421f5353d795200432acb85198e568f16e293 100644
--- a/MEMO
+++ b/MEMO
@@ -38,6 +38,8 @@ Things to consider
 
 - Loopy is semi-interactive.
 
+- Limitation: base index for parallel axes is 0.
+
 To-do
 ^^^^^
 
@@ -84,8 +86,6 @@ Future ideas
 
 - Generate automatic test against sequential code.
 
-- Automatically verify that all array access is within bounds.
-
 - Reason about generated code, give user feedback on potential
   improvements.
 
@@ -105,6 +105,8 @@ Future ideas
 Dealt with
 ^^^^^^^^^^
 
+- Automatically verify that all array access is within bounds.
+
 - : (as in, Matlab full-slice) in prefetches
 
 - Add dependencies after the fact
diff --git a/loopy/check.py b/loopy/check.py
index e200dab85ddc4d637c795cbcf6e7983b3a6d6b9a..f1f178000a0ec7a53abca7f094d6305ab56a5fb2 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -1,6 +1,7 @@
 from __future__ import division
 from islpy import dim_type
 import islpy as isl
+from loopy.symbolic import WalkMapper
 
 
 
@@ -195,6 +196,97 @@ def check_for_data_dependent_parallel_bounds(kernel):
                         "inames '%s'. This is not allowed (for now)."
                         % (i, par, ", ".join(par_inames)))
 
+class _AccessCheckMapper(WalkMapper):
+    def __init__(self, kernel, domain, insn_id):
+        self.kernel = kernel
+        self.domain = domain
+        self.insn_id = insn_id
+
+    def map_subscript(self, expr):
+        from pymbolic.primitives import Variable
+        assert isinstance(expr.aggregate, Variable)
+
+        shape = None
+        var_name = expr.aggregate.name
+        if var_name in self.kernel.arg_dict:
+            arg = self.kernel.arg_dict[var_name]
+            shape = arg.shape
+        elif var_name in self.kernel.temporary_variables:
+            tv = self.kernel.temporary_variables[var_name]
+            shape = tv.shape
+
+        if shape is not None:
+            index = expr.index
+
+            if not isinstance(index, tuple):
+                index = (index,)
+
+            from loopy.symbolic import get_dependencies, aff_from_expr
+            available_vars = set(self.domain.get_var_dict())
+            if (get_dependencies(index) <= available_vars
+                    and get_dependencies(shape) <= available_vars):
+
+                dims = len(index)
+
+                # we build access_map as a set because (idiocy!) Affs
+                # cannot live on maps.
+
+                # dims: [domain](dn)[storage]
+                access_map = self.domain
+
+                if isinstance(access_map, isl.BasicSet):
+                    access_map = isl.Set.from_basic_set(access_map)
+
+                dn = access_map.dim(dim_type.set)
+                access_map = access_map.insert_dims(dim_type.set, dn, dims)
+
+                for idim in xrange(dims):
+                    idx_aff = aff_from_expr(access_map.get_space(),
+                            index[idim])
+                    idx_aff = idx_aff.set_coefficient(
+                            dim_type.in_, dn+idim, -1)
+
+                    access_map = access_map.add_constraint(
+                            isl.Constraint.equality_from_aff(idx_aff))
+
+                access_map_as_map = isl.Map.universe(access_map.get_space())
+                access_map_as_map = access_map_as_map.intersect_range(access_map)
+                access_map = access_map_as_map.move_dims(
+                        dim_type.in_, 0,
+                        dim_type.out, 0, dn)
+                del access_map_as_map
+
+                access_range = access_map.range()
+
+                shape_domain = isl.BasicSet.universe(access_range.get_space())
+                for idim in xrange(dims):
+                    from loopy.isl_helpers import make_slab
+                    slab = make_slab(
+                            shape_domain.get_space(), (dim_type.in_, idim),
+                            0, shape[idim])
+
+                    shape_domain = shape_domain.intersect(slab)
+
+                if not access_range.is_subset(shape_domain):
+                    raise RuntimeError("'%s' in instruction '%s' "
+                            "accesses out-of-bounds array element"
+                            % (expr, self.insn_id))
+
+        WalkMapper.map_subscript(self, expr)
+
+def check_bounds(kernel):
+    temp_var_names = set(kernel.temporary_variables)
+    for insn in kernel.instructions:
+        domain = kernel.get_inames_domain(kernel.insn_inames(insn))
+
+        # data-dependent bounds? can't do much
+        if set(domain.get_var_names(dim_type.param)) & temp_var_names:
+            continue
+
+        acm = _AccessCheckMapper(kernel, domain, insn.id)
+        acm(insn.expression)
+        acm(insn.assignee)
+
 # }}}
 
 def run_automatic_checks(kernel):
@@ -204,6 +296,7 @@ def run_automatic_checks(kernel):
     check_for_inactive_iname_access(kernel)
     check_for_write_races(kernel)
     check_for_data_dependent_parallel_bounds(kernel)
+    check_bounds(kernel)
 
 # {{{ sanity-check for implemented domains of each instruction
 
diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py
index dab7648af2a4b407cfd693c447b49da9cbf85846..c0e0b970807f82006bac6119b50c7177eacb7178 100644
--- a/loopy/isl_helpers.py
+++ b/loopy/isl_helpers.py
@@ -110,13 +110,26 @@ def make_slab(space, iname, start, stop):
     if isinstance(stop, (isl.Aff, isl.PwAff)):
         stop, zero = isl.align_two(pw_aff_to_aff(stop), zero)
 
+    space = zero.get_domain_space()
+
+    from pymbolic.primitives import Expression
+    from loopy.symbolic import aff_from_expr
+    if isinstance(start, Expression):
+        start = aff_from_expr(space, start)
+    if isinstance(stop, Expression):
+        stop = aff_from_expr(space, stop)
+
     if isinstance(start, int): start = zero + start
     if isinstance(stop, int): stop = zero + stop
 
-    iname_dt, iname_idx = zero.get_space().get_var_dict()[iname]
+    if isinstance(iname, str):
+        iname_dt, iname_idx = zero.get_space().get_var_dict()[iname]
+    else:
+        iname_dt, iname_idx = iname
+
     iname_aff = zero.add_coefficient(iname_dt, iname_idx, 1)
 
-    result = (isl.Set.universe(space)
+    result = (isl.BasicSet.universe(space)
             # start <= iname
             .add_constraint(isl.Constraint.inequality_from_aff(
                 iname_aff - start))
diff --git a/loopy/kernel.py b/loopy/kernel.py
index 3067041d935fa19e2957dce39569339c535ba15e..b50ed89a630487bf9fe6283060df56efe1597607 100644
--- a/loopy/kernel.py
+++ b/loopy/kernel.py
@@ -245,6 +245,9 @@ class TemporaryVariable(Record):
         if base_indices is None:
             base_indices = (0,) * len(shape)
 
+        if shape is not None and not isinstance(shape, tuple):
+            shape = tuple(shape)
+
         Record.__init__(self, name=name, dtype=dtype, shape=shape, is_local=is_local,
                 base_indices=base_indices,
                 storage_shape=storage_shape)
@@ -603,7 +606,7 @@ def _parse_domains(ctx, args_and_vars, domains, defines):
                 dom = "[%s] -> %s" % (",".join(parameters), dom)
 
             try:
-                dom = isl.Set.read_from_str(ctx, dom)
+                dom = isl.BasicSet.read_from_str(ctx, dom)
             except:
                 print "failed to parse domain '%s'" % dom
                 raise
diff --git a/loopy/symbolic.py b/loopy/symbolic.py
index 29b2512ece0f22665d3f5ab36a75fdf6117babf9..5d6a6c5f496cfcb1095a7a80c496c0f43156897e 100644
--- a/loopy/symbolic.py
+++ b/loopy/symbolic.py
@@ -128,8 +128,16 @@ class IdentityMapper(IdentityMapperBase, IdentityMapperMixin):
 
 class WalkMapper(WalkMapperBase):
     def map_reduction(self, expr):
+        if not self.visit(expr):
+            return
+
         self.rec(expr.expr)
 
+    map_tagged_variable = WalkMapperBase.map_variable
+
+    def map_loopy_function_identifier(self, expr):
+        self.visit(expr)
+
 class CallbackMapper(CallbackMapperBase, IdentityMapper):
     map_reduction = CallbackMapperBase.map_constant
 
diff --git a/test/test_linalg.py b/test/test_linalg.py
index 3b2cad4dc1e84906e4f3c4a6e1cfc43db4d10efe..da16cdbda200945da3c5cc4aabab02969c18a112 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -241,9 +241,9 @@ def test_variable_size_matrix_mul(ctx_factory):
                 "c[i, j] = sum(k, a[i, k]*b[k, j]) {id=labl}"
                 ],
             [
-                lp.GlobalArg("a", dtype, shape=(n, n), order=order),
-                lp.GlobalArg("b", dtype, shape=(n, n), order=order),
-                lp.GlobalArg("c", dtype, shape=(n, n), order=order),
+                lp.GlobalArg("a", dtype, shape="n, n", order=order),
+                lp.GlobalArg("b", dtype, shape="n, n", order=order),
+                lp.GlobalArg("c", dtype, shape="n, n", order=order),
                 lp.ValueArg("n", np.int32, approximately=n),
                 ],
             name="matmul", assumptions="n >= 16")
@@ -283,9 +283,9 @@ def test_rank_one(ctx_factory):
                 "c[i, j] = a[i]*b[j] {id=mylabel, priority =5}"
                 ],
             [
-                lp.GlobalArg("a", dtype, shape=(n,), order=order),
-                lp.GlobalArg("b", dtype, shape=(n,), order=order),
-                lp.GlobalArg("c", dtype, shape=(n, n), order=order),
+                lp.GlobalArg("a", dtype, shape=("n",), order=order),
+                lp.GlobalArg("b", dtype, shape=("n",), order=order),
+                lp.GlobalArg("c", dtype, shape=("n, n"), order=order),
                 lp.ValueArg("n", np.int32, approximately=n),
                 ],
             name="rank_one", assumptions="n >= 16")
@@ -338,7 +338,7 @@ def test_rank_one(ctx_factory):
     seq_knl = knl
 
     for variant in [variant_1, variant_2, variant_3, variant_4]:
-    #for variant in [variant_4]:
+    #for variant in [variant_1]:
         kernel_gen = lp.generate_loop_schedules(variant(knl))
         kernel_gen = lp.check_kernels(kernel_gen, dict(n=n))
 
diff --git a/test/test_loopy.py b/test/test_loopy.py
index db2b5ca9df9e4094829c89488e001898ba23c097..8dfd64537f51bbd82714a2a98c9791cd31047967 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -289,7 +289,7 @@ def test_empty_reduction(ctx_factory):
     knl = lp.make_kernel(ctx.devices[0],
             [
                 "{[i]: 0<=i<20}",
-                "{[j]: 0<=j<0}"
+                "[i] -> {[j]: 0<=j<0}"
                 ],
             [
                 "a[i] = sum(j, j)",