diff --git a/test/test_linalg.py b/test/test_linalg.py
index aee835112134b09efc984a269cd74a021eeec0da..52bdd7b75a87477c6a78804aca2cbbac4ea1535b 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -565,6 +565,39 @@ def test_image_matrix_mul_ilp(ctx_factory):
             op_count=[2*n**3/1e9], op_label=["GFlops"],
             parameters={})
 
+def test_image_matrix_mul_ilp_simplified(ctx_factory):
+    dtype = np.float32
+    ctx = ctx_factory()
+    order = "C"
+
+    n = 9 #get_suitable_size(ctx)
+
+    knl = lp.make_kernel(ctx.devices[0],
+            "{[i,j,k]: 0<=i,j,k<%d}" % n,
+            [
+                "c[i, j] = sum(k, a[i, k]*b[k, j])"
+                ],
+            [
+                lp.ImageArg("a", dtype, shape=(n, n)),
+                lp.ImageArg("b", dtype, shape=(n, n)),
+                lp.GlobalArg("c", dtype, shape=(n, n), order=order),
+                ],
+            name="matmul")
+
+    seq_knl = knl
+
+    knl = lp.split_dimension(knl, "j", 4, inner_tag="l.0")
+    knl = lp.split_dimension(knl, "k", 2)
+    knl = lp.add_prefetch(knl, 'b', ["j_inner", "k_inner"])
+
+    kernel_gen = lp.generate_loop_schedules(knl)
+    kernel_gen = lp.check_kernels(kernel_gen, dict(n=n))
+
+    lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen,
+            op_count=[2*n**3/1e9], op_label=["GFlops"],
+            parameters={}, options=["-g"])
+
+