From 3f5585ea911f9dbae354a275eb7fbce8a4ea4c09 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 25 Oct 2017 16:58:58 +0200
Subject: [PATCH 1/6] Placate flake8 re: usage of ell

---
 test/test_fortran.py    |  22 ++--
 test/test_linalg.py     |   8 +-
 test/test_reduction.py  |  10 +-
 test/test_statistics.py | 231 ++++++++++++++++++++--------------------
 4 files changed, 138 insertions(+), 133 deletions(-)
diff --git a/test/test_fortran.py b/test/test_fortran.py
index 6e05aa6ad..aa5a7e6e2 100644
--- a/test/test_fortran.py
+++ b/test/test_fortran.py
@@ -278,14 +278,14 @@ def test_matmul(ctx_factory, buffer_inames):
     logging.basicConfig(level=logging.INFO)
 
     fortran_src = """
-        subroutine dgemm(m,n,l,a,b,c)
+        subroutine dgemm(m,n,ell,a,b,c)
           implicit none
-          real*8 a(m,l),b(l,n),c(m,n)
-          integer m,n,k,i,j,l
+          real*8 a(m,ell),b(l,n),c(m,n)
+          integer m,n,k,i,j,ell
 
           do j = 1,n
             do i = 1,m
-              do k = 1,l
+              do k = 1,ell
                 c(i,j) = c(i,j) + b(k,j)*a(i,k)
               end do
             end do
@@ -317,7 +317,7 @@ def test_matmul(ctx_factory, buffer_inames):
             init_expression="0", store_expression="base+buffer")
 
     ctx = ctx_factory()
-    lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=128, m=128, l=128))
+    lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=128, m=128, ell=128))
 
 
 @pytest.mark.xfail
@@ -457,14 +457,14 @@ def test_parse_and_fuse_two_kernels():
 
 def test_precompute_some_exist(ctx_factory):
     fortran_src = """
-        subroutine dgemm(m,n,l,a,b,c)
+        subroutine dgemm(m,n,ell,a,b,c)
           implicit none
-          real*8 a(m,l),b(l,n),c(m,n)
-          integer m,n,k,i,j,l
+          real*8 a(m,ell),b(,n),c(m,n)
+          integer m,n,k,i,j,ell
 
           do j = 1,n
             do i = 1,m
-              do k = 1,l
+              do k = 1,ell
                 c(i,j) = c(i,j) + b(k,j)*a(i,k)
               end do
             end do
@@ -483,7 +483,7 @@ def test_precompute_some_exist(ctx_factory):
     knl = lp.split_iname(knl, "k", 8)
     knl = lp.assume(knl, "n mod 8 = 0")
     knl = lp.assume(knl, "m mod 8 = 0")
-    knl = lp.assume(knl, "l mod 8 = 0")
+    knl = lp.assume(knl, "ell mod 8 = 0")
 
     knl = lp.extract_subst(knl, "a_acc", "a[i1,i2]", parameters="i1, i2")
     knl = lp.extract_subst(knl, "b_acc", "b[i1,i2]", parameters="i1, i2")
@@ -495,7 +495,7 @@ def test_precompute_some_exist(ctx_factory):
     ref_knl = knl
 
     ctx = ctx_factory()
-    lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=128, m=128, l=128))
+    lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=128, m=128, ell=128))
 
 
 if __name__ == "__main__":
diff --git a/test/test_linalg.py b/test/test_linalg.py
index 772d536d1..3d422f1d8 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -230,14 +230,14 @@ def test_funny_shape_matrix_mul(ctx_factory):
 
     n = get_suitable_size(ctx)
     m = n+12
-    l = m+12
+    ell = m+12
 
     knl = lp.make_kernel(
-            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
+            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
             [
                 "c[i, j] = sum(k, a[i, k]*b[k, j])"
                 ],
-            name="matmul", assumptions="n,m,l >= 1")
+            name="matmul", assumptions="n,m,ell >= 1")
 
     knl = lp.add_dtypes(knl, {
         "a": np.float32,
@@ -261,7 +261,7 @@ def test_funny_shape_matrix_mul(ctx_factory):
 
     lp.auto_test_vs_ref(ref_knl, ctx, knl,
             op_count=[2*n**3/1e9], op_label=["GFlops"],
-            parameters={"n": n, "m": m, "l": l})
+            parameters={"n": n, "m": m, "ell": ell})
 
 
 def test_rank_one(ctx_factory):
diff --git a/test/test_reduction.py b/test/test_reduction.py
index 555b8c0cc..0c37d2228 100644
--- a/test/test_reduction.py
+++ b/test/test_reduction.py
@@ -97,22 +97,22 @@ def test_nested_dependent_reduction(ctx_factory):
                 "{[j]: 0<=j<i+sumlen}"
                 ],
             [
-                "<> sumlen = l[i]",
+                "<> sumlen = ell[i]",
                 "a[i] = sum(j, j)",
                 ],
             [
                 lp.ValueArg("n", np.int32),
                 lp.GlobalArg("a", dtype, ("n",)),
-                lp.GlobalArg("l", np.int32, ("n",)),
+                lp.GlobalArg("ell", np.int32, ("n",)),
                 ])
 
     cknl = lp.CompiledKernel(ctx, knl)
 
     n = 330
-    l = np.arange(n, dtype=np.int32)
-    evt, (a,) = cknl(queue, l=l, n=n, out_host=True)
+    ell = np.arange(n, dtype=np.int32)
+    evt, (a,) = cknl(queue, ell=ell, n=n, out_host=True)
 
-    tgt_result = (2*l-1)*2*l/2
+    tgt_result = (2*ell-1)*2*ell/2
     assert (a == tgt_result).all()
 
 
diff --git a/test/test_statistics.py b/test/test_statistics.py
index cf86539ef..3b33a8eff 100644
--- a/test/test_statistics.py
+++ b/test/test_statistics.py
@@ -37,14 +37,14 @@ from pymbolic.primitives import Variable
 def test_op_counter_basic():
 
     knl = lp.make_kernel(
-            "[n,m,l] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
+            "[n,m,ell] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
             [
                 """
                 c[i, j, k] = a[i,j,k]*b[i,j,k]/3.0+a[i,j,k]
                 e[i, k+1] = -g[i,k]*h[i,k+1]
                 """
             ],
-            name="basic", assumptions="n,m,l >= 1")
+            name="basic", assumptions="n,m,ell >= 1")
 
     knl = lp.add_and_infer_dtypes(knl,
                                   dict(a=np.float32, b=np.float32,
@@ -52,14 +52,14 @@ def test_op_counter_basic():
     op_map = lp.get_op_map(knl, count_redundant_work=True)
     n = 512
     m = 256
-    l = 128
-    params = {'n': n, 'm': m, 'l': l}
+    ell = 128
+    params = {'n': n, 'm': m, 'ell': ell}
     f32add = op_map[lp.Op(np.float32, 'add')].eval_with_dict(params)
     f32mul = op_map[lp.Op(np.float32, 'mul')].eval_with_dict(params)
     f32div = op_map[lp.Op(np.float32, 'div')].eval_with_dict(params)
     f64mul = op_map[lp.Op(np.dtype(np.float64), 'mul')].eval_with_dict(params)
     i32add = op_map[lp.Op(np.dtype(np.int32), 'add')].eval_with_dict(params)
-    assert f32add == f32mul == f32div == n*m*l
+    assert f32add == f32mul == f32div == n*m*ell
     assert f64mul == n*m
     assert i32add == n*m*2
 
@@ -67,21 +67,21 @@ def test_op_counter_basic():
 def test_op_counter_reduction():
 
     knl = lp.make_kernel(
-            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
+            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
             [
                 "c[i, j] = sum(k, a[i, k]*b[k, j])"
             ],
-            name="matmul_serial", assumptions="n,m,l >= 1")
+            name="matmul_serial", assumptions="n,m,ell >= 1")
 
     knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32, b=np.float32))
     op_map = lp.get_op_map(knl, count_redundant_work=True)
     n = 512
     m = 256
-    l = 128
-    params = {'n': n, 'm': m, 'l': l}
+    ell = 128
+    params = {'n': n, 'm': m, 'ell': ell}
     f32add = op_map[lp.Op(np.float32, 'add')].eval_with_dict(params)
     f32mul = op_map[lp.Op(np.dtype(np.float32), 'mul')].eval_with_dict(params)
-    assert f32add == f32mul == n*m*l
+    assert f32add == f32mul == n*m*ell
 
     op_map_dtype = op_map.group_by('dtype')
     f32 = op_map_dtype[lp.Op(dtype=np.float32)].eval_with_dict(params)
@@ -91,20 +91,23 @@ def test_op_counter_reduction():
 def test_op_counter_logic():
 
     knl = lp.make_kernel(
-            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
+            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
             [
                 """
-                e[i,k] = if(not(k<l-2) and k>6 or k/2==l, g[i,k]*2, g[i,k]+h[i,k]/2)
+                e[i,k] = if(
+                        not(k<ell-2) and k>6 or k/2==ell,
+                        g[i,k]*2,
+                        g[i,k]+h[i,k]/2)
                 """
             ],
-            name="logic", assumptions="n,m,l >= 1")
+            name="logic", assumptions="n,m,ell >= 1")
 
     knl = lp.add_and_infer_dtypes(knl, dict(g=np.float32, h=np.float64))
     op_map = lp.get_op_map(knl, count_redundant_work=True)
     n = 512
     m = 256
-    l = 128
-    params = {'n': n, 'm': m, 'l': l}
+    ell = 128
+    params = {'n': n, 'm': m, 'ell': ell}
     f32mul = op_map[lp.Op(np.float32, 'mul')].eval_with_dict(params)
     f64add = op_map[lp.Op(np.float64, 'add')].eval_with_dict(params)
     f64div = op_map[lp.Op(np.dtype(np.float64), 'div')].eval_with_dict(params)
@@ -118,14 +121,14 @@ def test_op_counter_logic():
 def test_op_counter_specialops():
 
     knl = lp.make_kernel(
-            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
+            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
             [
                 """
                 c[i, j, k] = (2*a[i,j,k])%(2+b[i,j,k]/3.0)
                 e[i, k] = (1+g[i,k])**(1+h[i,k+1])+rsqrt(g[i,k])*sin(g[i,k])
                 """
             ],
-            name="specialops", assumptions="n,m,l >= 1")
+            name="specialops", assumptions="n,m,ell >= 1")
 
     knl = lp.add_and_infer_dtypes(knl,
                                   dict(a=np.float32, b=np.float32,
@@ -133,8 +136,8 @@ def test_op_counter_specialops():
     op_map = lp.get_op_map(knl, count_redundant_work=True)
     n = 512
     m = 256
-    l = 128
-    params = {'n': n, 'm': m, 'l': l}
+    ell = 128
+    params = {'n': n, 'm': m, 'ell': ell}
     f32mul = op_map[lp.Op(np.float32, 'mul')].eval_with_dict(params)
     f32div = op_map[lp.Op(np.float32, 'div')].eval_with_dict(params)
     f32add = op_map[lp.Op(np.float32, 'add')].eval_with_dict(params)
@@ -143,8 +146,8 @@ def test_op_counter_specialops():
     i32add = op_map[lp.Op(np.dtype(np.int32), 'add')].eval_with_dict(params)
     f64rsq = op_map[lp.Op(np.dtype(np.float64), 'func:rsqrt')].eval_with_dict(params)
     f64sin = op_map[lp.Op(np.dtype(np.float64), 'func:sin')].eval_with_dict(params)
-    assert f32div == 2*n*m*l
-    assert f32mul == f32add == n*m*l
+    assert f32div == 2*n*m*ell
+    assert f32mul == f32add == n*m*ell
     assert f64add == 3*n*m
     assert f64pow == i32add == f64rsq == f64sin == n*m
 
@@ -152,14 +155,14 @@ def test_op_counter_specialops():
 def test_op_counter_bitwise():
 
     knl = lp.make_kernel(
-            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
+            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
             [
                 """
                 c[i, j, k] = (a[i,j,k] | 1) + (b[i,j,k] & 1)
                 e[i, k] = (g[i,k] ^ k)*(~h[i,k+1]) + (g[i, k] << (h[i,k] >> k))
                 """
             ],
-            name="bitwise", assumptions="n,m,l >= 1")
+            name="bitwise", assumptions="n,m,ell >= 1")
 
     knl = lp.add_and_infer_dtypes(
             knl, dict(
@@ -169,16 +172,16 @@ def test_op_counter_bitwise():
     op_map = lp.get_op_map(knl, count_redundant_work=True)
     n = 512
     m = 256
-    l = 128
-    params = {'n': n, 'm': m, 'l': l}
+    ell = 128
+    params = {'n': n, 'm': m, 'ell': ell}
     i32add = op_map[lp.Op(np.int32, 'add')].eval_with_dict(params)
     i32bw = op_map[lp.Op(np.int32, 'bw')].eval_with_dict(params)
     i64bw = op_map[lp.Op(np.dtype(np.int64), 'bw')].eval_with_dict(params)
     i64mul = op_map[lp.Op(np.dtype(np.int64), 'mul')].eval_with_dict(params)
     i64add = op_map[lp.Op(np.dtype(np.int64), 'add')].eval_with_dict(params)
     i64shift = op_map[lp.Op(np.dtype(np.int64), 'shift')].eval_with_dict(params)
-    assert i32add == n*m+n*m*l
-    assert i32bw == 2*n*m*l
+    assert i32add == n*m+n*m*ell
+    assert i32bw == 2*n*m*ell
     assert i64bw == 2*n*m
     assert i64add == i64mul == n*m
     assert i64shift == 2*n*m
@@ -218,22 +221,22 @@ def test_op_counter_triangular_domain():
 def test_mem_access_counter_basic():
 
     knl = lp.make_kernel(
-            "[n,m,l] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
+            "[n,m,ell] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
             [
                 """
                 c[i, j, k] = a[i,j,k]*b[i,j,k]/3.0+a[i,j,k]
                 e[i, k] = g[i,k]*h[i,k+1]
                 """
             ],
-            name="basic", assumptions="n,m,l >= 1")
+            name="basic", assumptions="n,m,ell >= 1")
 
     knl = lp.add_and_infer_dtypes(knl,
                         dict(a=np.float32, b=np.float32, g=np.float64, h=np.float64))
     mem_map = lp.get_mem_access_map(knl, count_redundant_work=True)
     n = 512
     m = 256
-    l = 128
-    params = {'n': n, 'm': m, 'l': l}
+    ell = 128
+    params = {'n': n, 'm': m, 'ell': ell}
     f32l = mem_map[lp.MemAccess('global', np.float32,
                          stride=0, direction='load', variable='a')
                    ].eval_with_dict(params)
@@ -246,7 +249,7 @@ def test_mem_access_counter_basic():
     f64l += mem_map[lp.MemAccess('global', np.float64,
                           stride=0, direction='load', variable='h')
                     ].eval_with_dict(params)
-    assert f32l == 3*n*m*l
+    assert f32l == 3*n*m*ell
     assert f64l == 2*n*m
 
     f32s = mem_map[lp.MemAccess('global', np.dtype(np.float32),
@@ -255,37 +258,37 @@ def test_mem_access_counter_basic():
     f64s = mem_map[lp.MemAccess('global', np.dtype(np.float64),
                          stride=0, direction='store', variable='e')
                    ].eval_with_dict(params)
-    assert f32s == n*m*l
+    assert f32s == n*m*ell
     assert f64s == n*m
 
 
 def test_mem_access_counter_reduction():
 
     knl = lp.make_kernel(
-            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
+            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
             [
                 "c[i, j] = sum(k, a[i, k]*b[k, j])"
             ],
-            name="matmul", assumptions="n,m,l >= 1")
+            name="matmul", assumptions="n,m,ell >= 1")
 
     knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32, b=np.float32))
     mem_map = lp.get_mem_access_map(knl, count_redundant_work=True)
     n = 512
     m = 256
-    l = 128
-    params = {'n': n, 'm': m, 'l': l}
+    ell = 128
+    params = {'n': n, 'm': m, 'ell': ell}
     f32l = mem_map[lp.MemAccess('global', np.float32,
                          stride=0, direction='load', variable='a')
                    ].eval_with_dict(params)
     f32l += mem_map[lp.MemAccess('global', np.float32,
                           stride=0, direction='load', variable='b')
                     ].eval_with_dict(params)
-    assert f32l == 2*n*m*l
+    assert f32l == 2*n*m*ell
 
     f32s = mem_map[lp.MemAccess('global', np.dtype(np.float32),
                          stride=0, direction='store', variable='c')
                    ].eval_with_dict(params)
-    assert f32s == n*l
+    assert f32s == n*ell
 
     ld_bytes = mem_map.filter_by(mtype=['global'], direction=['load']
                                  ).to_bytes().eval_and_sum(params)
@@ -298,20 +301,22 @@ def test_mem_access_counter_reduction():
 def test_mem_access_counter_logic():
 
     knl = lp.make_kernel(
-            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
+            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
             [
                 """
-                e[i,k] = if(not(k<l-2) and k>6 or k/2==l, g[i,k]*2, g[i,k]+h[i,k]/2)
+                e[i,k] = if(not(k<ell-2) and k>6 or k/2==ell,
+                    g[i,k]*2,
+                    g[i,k]+h[i,k]/2)
                 """
             ],
-            name="logic", assumptions="n,m,l >= 1")
+            name="logic", assumptions="n,m,ell >= 1")
 
     knl = lp.add_and_infer_dtypes(knl, dict(g=np.float32, h=np.float64))
     mem_map = lp.get_mem_access_map(knl, count_redundant_work=True)
     n = 512
     m = 256
-    l = 128
-    params = {'n': n, 'm': m, 'l': l}
+    ell = 128
+    params = {'n': n, 'm': m, 'ell': ell}
 
     reduced_map = mem_map.group_by('mtype', 'dtype', 'direction')
 
@@ -332,22 +337,22 @@ def test_mem_access_counter_logic():
 def test_mem_access_counter_specialops():
 
     knl = lp.make_kernel(
-            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
+            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
             [
                 """
                 c[i, j, k] = (2*a[i,j,k])%(2+b[i,j,k]/3.0)
                 e[i, k] = (1+g[i,k])**(1+h[i,k+1])
                 """
             ],
-            name="specialops", assumptions="n,m,l >= 1")
+            name="specialops", assumptions="n,m,ell >= 1")
 
     knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32, b=np.float32,
                                             g=np.float64, h=np.float64))
     mem_map = lp.get_mem_access_map(knl, count_redundant_work=True)
     n = 512
     m = 256
-    l = 128
-    params = {'n': n, 'm': m, 'l': l}
+    ell = 128
+    params = {'n': n, 'm': m, 'ell': ell}
     f32 = mem_map[lp.MemAccess('global', np.float32,
                          stride=0, direction='load', variable='a')
                   ].eval_with_dict(params)
@@ -360,7 +365,7 @@ def test_mem_access_counter_specialops():
     f64 += mem_map[lp.MemAccess('global', np.dtype(np.float64),
                           stride=0, direction='load', variable='h')
                    ].eval_with_dict(params)
-    assert f32 == 2*n*m*l
+    assert f32 == 2*n*m*ell
     assert f64 == 2*n*m
 
     f32 = mem_map[lp.MemAccess('global', np.float32,
@@ -369,26 +374,26 @@ def test_mem_access_counter_specialops():
     f64 = mem_map[lp.MemAccess('global', np.float64,
                          stride=0, direction='store', variable='e')
                   ].eval_with_dict(params)
-    assert f32 == n*m*l
+    assert f32 == n*m*ell
     assert f64 == n*m
 
     filtered_map = mem_map.filter_by(direction=['load'], variable=['a', 'g'])
     #tot = lp.eval_and_sum_polys(filtered_map, params)
     tot = filtered_map.eval_and_sum(params)
-    assert tot == n*m*l + n*m
+    assert tot == n*m*ell + n*m
 
 
 def test_mem_access_counter_bitwise():
 
     knl = lp.make_kernel(
-            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
+            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
             [
                 """
                 c[i, j, k] = (a[i,j,k] | 1) + (b[i,j,k] & 1)
                 e[i, k] = (g[i,k] ^ k)*(~h[i,k+1]) + (g[i, k] << (h[i,k] >> k))
                 """
             ],
-            name="bitwise", assumptions="n,m,l >= 1")
+            name="bitwise", assumptions="n,m,ell >= 1")
 
     knl = lp.add_and_infer_dtypes(
             knl, dict(
@@ -398,8 +403,8 @@ def test_mem_access_counter_bitwise():
     mem_map = lp.get_mem_access_map(knl, count_redundant_work=True)
     n = 512
     m = 256
-    l = 128
-    params = {'n': n, 'm': m, 'l': l}
+    ell = 128
+    params = {'n': n, 'm': m, 'ell': ell}
     i32 = mem_map[lp.MemAccess('global', np.int32,
                          stride=0, direction='load', variable='a')
                   ].eval_with_dict(params)
@@ -412,7 +417,7 @@ def test_mem_access_counter_bitwise():
     i32 += mem_map[lp.MemAccess('global', np.dtype(np.int32),
                           stride=0, direction='load', variable='h')
                    ].eval_with_dict(params)
-    assert i32 == 4*n*m+2*n*m*l
+    assert i32 == 4*n*m+2*n*m*ell
 
     i32 = mem_map[lp.MemAccess('global', np.int32,
                          stride=0, direction='store', variable='c')
@@ -420,20 +425,20 @@ def test_mem_access_counter_bitwise():
     i32 += mem_map[lp.MemAccess('global', np.int32,
                           stride=0, direction='store', variable='e')
                    ].eval_with_dict(params)
-    assert i32 == n*m+n*m*l
+    assert i32 == n*m+n*m*ell
 
 
 def test_mem_access_counter_mixed():
 
     knl = lp.make_kernel(
-            "[n,m,l] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
+            "[n,m,ell] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
             [
                 """
             c[i, j, k] = a[i,j,k]*b[i,j,k]/3.0+a[i,j,k]+x[i,k]
             e[i, k] = g[i,k]*(2+h[i,k])
             """
             ],
-            name="mixed", assumptions="n,m,l >= 1")
+            name="mixed", assumptions="n,m,ell >= 1")
     knl = lp.add_and_infer_dtypes(knl, dict(
                 a=np.float32, b=np.float32, g=np.float64, h=np.float64,
                 x=np.float32))
@@ -444,8 +449,8 @@ def test_mem_access_counter_mixed():
     mem_map = lp.get_mem_access_map(knl, count_redundant_work=True)  # noqa
     n = 512
     m = 256
-    l = 128
-    params = {'n': n, 'm': m, 'l': l}
+    ell = 128
+    params = {'n': n, 'm': m, 'ell': ell}
     f64uniform = mem_map[lp.MemAccess('global', np.float64,
                                 stride=0, direction='load', variable='g')
                          ].eval_with_dict(params)
@@ -463,9 +468,9 @@ def test_mem_access_counter_mixed():
                                    stride=Variable('m'), direction='load',
                                    variable='b')
                             ].eval_with_dict(params)
-    assert f64uniform == 2*n*m*l/bsize
-    assert f32uniform == n*m*l/bsize
-    assert f32nonconsec == 3*n*m*l
+    assert f64uniform == 2*n*m*ell/bsize
+    assert f32uniform == n*m*ell/bsize
+    assert f32nonconsec == 3*n*m*ell
 
     f64uniform = mem_map[lp.MemAccess('global', np.float64,
                                 stride=0, direction='store', variable='e')
@@ -474,21 +479,21 @@ def test_mem_access_counter_mixed():
                                   stride=Variable('m'), direction='store',
                                   variable='c')
                            ].eval_with_dict(params)
-    assert f64uniform == n*m*l/bsize
-    assert f32nonconsec == n*m*l
+    assert f64uniform == n*m*ell/bsize
+    assert f32nonconsec == n*m*ell
 
 
 def test_mem_access_counter_nonconsec():
 
     knl = lp.make_kernel(
-            "[n,m,l] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
+            "[n,m,ell] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
             [
                 """
             c[i, j, k] = a[i,j,k]*b[i,j,k]/3.0+a[i,j,k]
             e[i, k] = g[i,k]*(2+h[i,k])
             """
             ],
-            name="nonconsec", assumptions="n,m,l >= 1")
+            name="nonconsec", assumptions="n,m,ell >= 1")
     knl = lp.add_and_infer_dtypes(knl, dict(
                 a=np.float32, b=np.float32, g=np.float64, h=np.float64))
     knl = lp.split_iname(knl, "i", 16)
@@ -497,8 +502,8 @@ def test_mem_access_counter_nonconsec():
     mem_map = lp.get_mem_access_map(knl, count_redundant_work=True)  # noqa
     n = 512
     m = 256
-    l = 128
-    params = {'n': n, 'm': m, 'l': l}
+    ell = 128
+    params = {'n': n, 'm': m, 'ell': ell}
     f64nonconsec = mem_map[lp.MemAccess('global', np.float64,
                                   stride=Variable('m'), direction='load',
                                   variable='g')
@@ -508,39 +513,39 @@ def test_mem_access_counter_nonconsec():
                                    variable='h')
                             ].eval_with_dict(params)
     f32nonconsec = mem_map[lp.MemAccess('global', np.dtype(np.float32),
-                                  stride=Variable('m')*Variable('l'),
+                                  stride=Variable('m')*Variable('ell'),
                                   direction='load', variable='a')
                            ].eval_with_dict(params)
     f32nonconsec += mem_map[lp.MemAccess('global', np.dtype(np.float32),
-                                   stride=Variable('m')*Variable('l'),
+                                   stride=Variable('m')*Variable('ell'),
                                    direction='load', variable='b')
                             ].eval_with_dict(params)
     assert f64nonconsec == 2*n*m
-    assert f32nonconsec == 3*n*m*l
+    assert f32nonconsec == 3*n*m*ell
 
     f64nonconsec = mem_map[lp.MemAccess('global', np.float64,
                                   stride=Variable('m'), direction='store',
                                   variable='e')
                            ].eval_with_dict(params)
     f32nonconsec = mem_map[lp.MemAccess('global', np.float32,
-                                  stride=Variable('m')*Variable('l'),
+                                  stride=Variable('m')*Variable('ell'),
                                   direction='store', variable='c')
                            ].eval_with_dict(params)
     assert f64nonconsec == n*m
-    assert f32nonconsec == n*m*l
+    assert f32nonconsec == n*m*ell
 
 
 def test_mem_access_counter_consec():
 
     knl = lp.make_kernel(
-            "[n,m,l] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
+            "[n,m,ell] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
             [
                 """
             c[i, j, k] = a[i,j,k]*b[i,j,k]/3.0+a[i,j,k]
             e[i, k] = g[i,k]*(2+h[i,k])
             """
             ],
-            name="consec", assumptions="n,m,l >= 1")
+            name="consec", assumptions="n,m,ell >= 1")
     knl = lp.add_and_infer_dtypes(knl, dict(
                 a=np.float32, b=np.float32, g=np.float64, h=np.float64))
     knl = lp.tag_inames(knl, {"k": "l.0", "i": "g.0", "j": "g.1"})
@@ -548,8 +553,8 @@ def test_mem_access_counter_consec():
     mem_map = lp.get_mem_access_map(knl, count_redundant_work=True)
     n = 512
     m = 256
-    l = 128
-    params = {'n': n, 'm': m, 'l': l}
+    ell = 128
+    params = {'n': n, 'm': m, 'ell': ell}
 
     f64consec = mem_map[lp.MemAccess('global', np.float64,
                         stride=1, direction='load', variable='g')
@@ -563,8 +568,8 @@ def test_mem_access_counter_consec():
     f32consec += mem_map[lp.MemAccess('global', np.dtype(np.float32),
                         stride=1, direction='load', variable='b')
                          ].eval_with_dict(params)
-    assert f64consec == 2*n*m*l
-    assert f32consec == 3*n*m*l
+    assert f64consec == 2*n*m*ell
+    assert f32consec == 3*n*m*ell
 
     f64consec = mem_map[lp.MemAccess('global', np.float64,
                         stride=1, direction='store', variable='e')
@@ -572,29 +577,29 @@ def test_mem_access_counter_consec():
     f32consec = mem_map[lp.MemAccess('global', np.float32,
                         stride=1, direction='store', variable='c')
                         ].eval_with_dict(params)
-    assert f64consec == n*m*l
-    assert f32consec == n*m*l
+    assert f64consec == n*m*ell
+    assert f32consec == n*m*ell
 
 
 def test_barrier_counter_nobarriers():
 
     knl = lp.make_kernel(
-            "[n,m,l] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
+            "[n,m,ell] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
             [
                 """
                 c[i, j, k] = a[i,j,k]*b[i,j,k]/3.0+a[i,j,k]
                 e[i, k] = g[i,k]*h[i,k+1]
                 """
             ],
-            name="basic", assumptions="n,m,l >= 1")
+            name="basic", assumptions="n,m,ell >= 1")
 
     knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32, b=np.float32,
                                             g=np.float64, h=np.float64))
     sync_map = lp.get_synchronization_map(knl)
     n = 512
     m = 256
-    l = 128
-    params = {'n': n, 'm': m, 'l': l}
+    ell = 128
+    params = {'n': n, 'm': m, 'ell': ell}
     assert len(sync_map) == 1
     assert sync_map["kernel_launch"].eval_with_dict(params) == 1
 
@@ -602,7 +607,7 @@ def test_barrier_counter_nobarriers():
 def test_barrier_counter_barriers():
 
     knl = lp.make_kernel(
-            "[n,m,l] -> {[i,k,j]: 0<=i<50 and 1<=k<98 and 0<=j<10}",
+            "[n,m,ell] -> {[i,k,j]: 0<=i<50 and 1<=k<98 and 0<=j<10}",
             [
                 """
             c[i,j,k] = 2*a[i,j,k] {id=first}
@@ -620,8 +625,8 @@ def test_barrier_counter_barriers():
     print(sync_map)
     n = 512
     m = 256
-    l = 128
-    params = {'n': n, 'm': m, 'l': l}
+    ell = 128
+    params = {'n': n, 'm': m, 'ell': ell}
     barrier_count = sync_map["barrier_local"].eval_with_dict(params)
     assert barrier_count == 50*10*2
 
@@ -630,13 +635,13 @@ def test_all_counters_parallel_matmul():
 
     bsize = 16
     knl = lp.make_kernel(
-            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
+            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
             [
                 "c[i, j] = sum(k, a[i, k]*b[k, j])"
             ],
-            name="matmul", assumptions="n,m,l >= 1")
+            name="matmul", assumptions="n,m,ell >= 1")
     knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32, b=np.float32))
-    knl = lp.split_iname(knl, "i", bsize, outer_tag="g.0", inner_tag="l.1")
+    knl = lp.split_iname(knl, "i", bsize, outer_tag="g.0", inner_tag="ell.1")
     knl = lp.split_iname(knl, "j", bsize, outer_tag="g.1", inner_tag="l.0")
     knl = lp.split_iname(knl, "k", bsize)
     knl = lp.add_prefetch(knl, "a", ["k_inner", "i_inner"])
@@ -644,8 +649,8 @@ def test_all_counters_parallel_matmul():
 
     n = 512
     m = 256
-    l = 128
-    params = {'n': n, 'm': m, 'l': l}
+    ell = 128
+    params = {'n': n, 'm': m, 'ell': ell}
 
     sync_map = lp.get_synchronization_map(knl)
     assert len(sync_map) == 2
@@ -666,7 +671,7 @@ def test_all_counters_parallel_matmul():
                         lp.Op(np.dtype(np.int32), 'mul')
                         ].eval_with_dict(params)
 
-    assert f32mul+f32add == n*m*l*2
+    assert f32mul+f32add == n*m*ell*2
 
     op_map = lp.get_mem_access_map(knl, count_redundant_work=True)
 
@@ -677,21 +682,21 @@ def test_all_counters_parallel_matmul():
                      stride=1, direction='load', variable='a')
                      ].eval_with_dict(params)
 
-    assert f32s1lb == n*m*l/bsize
-    assert f32s1la == n*m*l/bsize
+    assert f32s1lb == n*m*ell/bsize
+    assert f32s1la == n*m*ell/bsize
 
     f32coal = op_map[lp.MemAccess('global', np.float32,
                      stride=1, direction='store', variable='c')
                      ].eval_with_dict(params)
 
-    assert f32coal == n*l
+    assert f32coal == n*ell
 
     local_mem_map = lp.get_mem_access_map(knl,
                         count_redundant_work=True).filter_by(mtype=['local'])
     local_mem_l = local_mem_map[lp.MemAccess('local', np.dtype(np.float32),
                                              direction='load')
                                 ].eval_with_dict(params)
-    assert local_mem_l == n*m*l*2
+    assert local_mem_l == n*m*ell*2
 
 
 def test_gather_access_footprint():
@@ -729,38 +734,38 @@ def test_gather_access_footprint_2():
 def test_summations_and_filters():
 
     knl = lp.make_kernel(
-            "[n,m,l] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
+            "[n,m,ell] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
             [
                 """
                 c[i, j, k] = a[i,j,k]*b[i,j,k]/3.0+a[i,j,k]
                 e[i, k+1] = -g[i,k]*h[i,k+1]
                 """
             ],
-            name="basic", assumptions="n,m,l >= 1")
+            name="basic", assumptions="n,m,ell >= 1")
 
     knl = lp.add_and_infer_dtypes(knl,
                         dict(a=np.float32, b=np.float32, g=np.float64, h=np.float64))
     n = 512
     m = 256
-    l = 128
-    params = {'n': n, 'm': m, 'l': l}
+    ell = 128
+    params = {'n': n, 'm': m, 'ell': ell}
 
     mem_map = lp.get_mem_access_map(knl, count_redundant_work=True)
 
     loads_a = mem_map.filter_by(direction=['load'], variable=['a']
                                 ).eval_and_sum(params)
-    assert loads_a == 2*n*m*l
+    assert loads_a == 2*n*m*ell
 
     global_stores = mem_map.filter_by(mtype=['global'], direction=['store']
                                       ).eval_and_sum(params)
-    assert global_stores == n*m*l + n*m
+    assert global_stores == n*m*ell + n*m
 
     ld_bytes = mem_map.filter_by(mtype=['global'], direction=['load']
                                  ).to_bytes().eval_and_sum(params)
     st_bytes = mem_map.filter_by(mtype=['global'], direction=['store']
                                  ).to_bytes().eval_and_sum(params)
-    assert ld_bytes == 4*n*m*l*3 + 8*n*m*2
-    assert st_bytes == 4*n*m*l + 8*n*m
+    assert ld_bytes == 4*n*m*ell*3 + 8*n*m*2
+    assert st_bytes == 4*n*m*ell + 8*n*m
 
     # ignore stride and variable names in this map
     reduced_map = mem_map.group_by('mtype', 'dtype', 'direction')
@@ -768,7 +773,7 @@ def test_summations_and_filters():
                           ].eval_with_dict(params)
     f64lall = reduced_map[lp.MemAccess('global', np.float64, direction='load')
                           ].eval_with_dict(params)
-    assert f32lall == 3*n*m*l
+    assert f32lall == 3*n*m*ell
     assert f64lall == 2*n*m
 
     op_map = lp.get_op_map(knl, count_redundant_work=True)
@@ -779,14 +784,14 @@ def test_summations_and_filters():
     f32 = op_map_dtype[lp.Op(dtype=np.float32)].eval_with_dict(params)
     f64 = op_map_dtype[lp.Op(dtype=np.float64)].eval_with_dict(params)
     i32 = op_map_dtype[lp.Op(dtype=np.int32)].eval_with_dict(params)
-    assert f32 == n*m*l*3
+    assert f32 == n*m*ell*3
     assert f64 == n*m
     assert i32 == n*m*2
 
     addsub_all = op_map.filter_by(name=['add', 'sub']).eval_and_sum(params)
     f32ops_all = op_map.filter_by(dtype=[np.float32]).eval_and_sum(params)
-    assert addsub_all == n*m*l + n*m*2
-    assert f32ops_all == n*m*l*3
+    assert addsub_all == n*m*ell + n*m*2
+    assert f32ops_all == n*m*ell*3
 
     non_field = op_map.filter_by(xxx=[np.float32]).eval_and_sum(params)
     assert non_field == 0
@@ -795,7 +800,7 @@ def test_summations_and_filters():
     ops_noname = op_map.group_by('dtype')
     mul_all = ops_nodtype[lp.Op(name='mul')].eval_with_dict(params)
     f64ops_all = ops_noname[lp.Op(dtype=np.float64)].eval_with_dict(params)
-    assert mul_all == n*m*l + n*m
+    assert mul_all == n*m*ell + n*m
     assert f64ops_all == n*m
 
     def func_filter(key):
-- 
GitLab


From 848bbac09b4ce88c42adffbae774868ded97a76d Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 25 Oct 2017 18:14:00 +0200
Subject: [PATCH 2/6] More flake8 fixery

---
 loopy/check.py                      |  4 ++--
 loopy/isl_helpers.py                |  2 +-
 loopy/kernel/creation.py            | 10 +++++-----
 loopy/kernel/tools.py               |  6 +++---
 loopy/transform/array_buffer_map.py |  6 +++---
 5 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/loopy/check.py b/loopy/check.py
index 11e045d86..6bac36838 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -401,7 +401,7 @@ def pre_schedule_checks(kernel):
         logger.debug("%s: pre-schedule check: done" % kernel.name)
     except KeyboardInterrupt:
         raise
-    except:
+    except Exception:
         print(75*"=")
         print("failing kernel during pre-schedule check:")
         print(75*"=")
@@ -659,7 +659,7 @@ def pre_codegen_checks(kernel):
         check_that_shapes_and_strides_are_arguments(kernel)
 
         logger.debug("pre-codegen check %s: done" % kernel.name)
-    except:
+    except Exception:
         print(75*"=")
         print("failing kernel during pre-schedule check:")
         print(75*"=")
diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py
index f7ce5d9fc..49ab3fd68 100644
--- a/loopy/isl_helpers.py
+++ b/loopy/isl_helpers.py
@@ -329,7 +329,7 @@ def is_nonnegative(expr, over_set):
     from loopy.symbolic import aff_from_expr
     try:
         aff = aff_from_expr(space, -expr-1)
-    except:
+    except Exception:
         return None
     expr_neg_set = isl.BasicSet.universe(space).add_constraint(
             isl.Constraint.inequality_from_aff(aff))
diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py
index c6618d62f..d5f14bcac 100644
--- a/loopy/kernel/creation.py
+++ b/loopy/kernel/creation.py
@@ -434,7 +434,7 @@ def parse_insn(groups, insn_options):
     if "lhs" in groups:
         try:
             lhs = parse(groups["lhs"])
-        except:
+        except Exception:
             print("While parsing left hand side '%s', "
                     "the following error occurred:" % groups["lhs"])
             raise
@@ -443,7 +443,7 @@ def parse_insn(groups, insn_options):
 
     try:
         rhs = parse(groups["rhs"])
-    except:
+    except Exception:
         print("While parsing right hand side '%s', "
                 "the following error occurred:" % groups["rhs"])
         raise
@@ -517,14 +517,14 @@ def parse_subst_rule(groups):
     from loopy.symbolic import parse
     try:
         lhs = parse(groups["lhs"])
-    except:
+    except Exception:
         print("While parsing left hand side '%s', "
                 "the following error occurred:" % groups["lhs"])
         raise
 
     try:
         rhs = parse(groups["rhs"])
-    except:
+    except Exception:
         print("While parsing right hand side '%s', "
                 "the following error occurred:" % groups["rhs"])
         raise
@@ -991,7 +991,7 @@ def parse_domains(domains, defines):
 
             try:
                 dom = isl.BasicSet.read_from_str(isl.DEFAULT_CONTEXT, dom)
-            except:
+            except Exception:
                 print("failed to parse domain '%s'" % dom)
                 raise
         else:
diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py
index ad1e71e59..02df0f2b4 100644
--- a/loopy/kernel/tools.py
+++ b/loopy/kernel/tools.py
@@ -620,11 +620,11 @@ class DomainParameterFinder(object):
                         if dep.name in param_names:
                             from pymbolic.algorithm import solve_affine_equations_for
                             try:
-                                # friggin' overkill :)
+                                # overkill :)
                                 param_expr = solve_affine_equations_for(
                                         [dep.name], [(shape_i, var("shape_i"))]
                                         )[dep.name]
-                            except:
+                            except Exception:
                                 # went wrong? oh well
                                 pass
                             else:
@@ -1092,7 +1092,7 @@ def guess_var_shape(kernel, var_name):
                             kernel.cache_manager.dim_max(
                                 armap.access_range, i) + 1,
                             constants_only=False)))
-            except:
+            except Exception:
                 print("While trying to find shape axis %d of "
                         "variable '%s', the following "
                         "exception occurred:" % (i, var_name),
diff --git a/loopy/transform/array_buffer_map.py b/loopy/transform/array_buffer_map.py
index f4e6526a7..618e36f20 100644
--- a/loopy/transform/array_buffer_map.py
+++ b/loopy/transform/array_buffer_map.py
@@ -239,14 +239,14 @@ class ArrayToBufferMap(object):
         non1_storage_axis_flags = []
         non1_storage_shape = []
 
-        for saxis, bi, l in zip(
+        for saxis, bi, saxis_len in zip(
                 storage_axis_names, storage_base_indices, storage_shape):
-            has_length_non1 = l != 1
+            has_length_non1 = saxis_len != 1
 
             non1_storage_axis_flags.append(has_length_non1)
 
             if has_length_non1:
-                non1_storage_shape.append(l)
+                non1_storage_shape.append(saxis_len)
 
         # }}}
 
-- 
GitLab


From 15ade2e9e26224da61723e0aec3ceb1de18cd4c0 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 25 Oct 2017 18:22:00 +0200
Subject: [PATCH 3/6] More flake8 placating

---
 loopy/preprocess.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/preprocess.py b/loopy/preprocess.py
index ae70a0d6c..ac7ac1988 100644
--- a/loopy/preprocess.py
+++ b/loopy/preprocess.py
@@ -610,7 +610,7 @@ def _try_infer_scan_stride(kernel, scan_iname, sweep_iname, sweep_lower_bound):
     if len(coeffs) == 0:
         try:
             scan_iname_aff.get_constant_val()
-        except:
+        except Exception:
             raise ValueError("range for aff isn't constant: '%s'" % scan_iname_aff)
 
         # If this point is reached we're assuming the domain is of the form
-- 
GitLab


From 43b0479f55894f63f49b20219c286237f8bea16a Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 25 Oct 2017 19:48:14 +0200
Subject: [PATCH 4/6] loopy kernel eq comparison for assumptions: fall back to
 is_equal

---
 loopy/kernel/__init__.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
index bdef1133e..cad11fc78 100644
--- a/loopy/kernel/__init__.py
+++ b/loopy/kernel/__init__.py
@@ -1374,7 +1374,9 @@ class LoopKernel(ImmutableRecordWithoutPickling):
                         return False
 
             elif field_name == "assumptions":
-                if not self.assumptions.plain_is_equal(other.assumptions):
+                if not (
+                        self.assumptions.plain_is_equal(other.assumptions)
+                        or self.assumptions.is_equal(other.assumptions)):
                     return False
 
             elif getattr(self, field_name) != getattr(other, field_name):
-- 
GitLab


From cd933cc7ce6e7a107f335633dd4b136ebad2ce92 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 25 Oct 2017 19:48:34 +0200
Subject: [PATCH 5/6] Mop up wreckage from l -> ell change

---
 test/test_fortran.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_fortran.py b/test/test_fortran.py
index aa5a7e6e2..d461c21a5 100644
--- a/test/test_fortran.py
+++ b/test/test_fortran.py
@@ -459,7 +459,7 @@ def test_precompute_some_exist(ctx_factory):
     fortran_src = """
         subroutine dgemm(m,n,ell,a,b,c)
           implicit none
-          real*8 a(m,ell),b(,n),c(m,n)
+          real*8 a(m,ell),b(ell,n),c(m,n)
           integer m,n,k,i,j,ell
 
           do j = 1,n
-- 
GitLab


From ddedb3d83cedaa96d58f055409ad6e9629a1debf Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 25 Oct 2017 19:52:06 +0200
Subject: [PATCH 6/6] Mop up wreckage from l -> ell change

---
 test/test_fortran.py    | 4 ++--
 test/test_statistics.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/test_fortran.py b/test/test_fortran.py
index d461c21a5..842a0127e 100644
--- a/test/test_fortran.py
+++ b/test/test_fortran.py
@@ -280,7 +280,7 @@ def test_matmul(ctx_factory, buffer_inames):
     fortran_src = """
         subroutine dgemm(m,n,ell,a,b,c)
           implicit none
-          real*8 a(m,ell),b(l,n),c(m,n)
+          real*8 a(m,ell),b(ell,n),c(m,n)
           integer m,n,k,i,j,ell
 
           do j = 1,n
@@ -306,7 +306,7 @@ def test_matmul(ctx_factory, buffer_inames):
     knl = lp.split_iname(knl, "k", 32)
     knl = lp.assume(knl, "n mod 32 = 0")
     knl = lp.assume(knl, "m mod 32 = 0")
-    knl = lp.assume(knl, "l mod 16 = 0")
+    knl = lp.assume(knl, "ell mod 16 = 0")
 
     knl = lp.extract_subst(knl, "a_acc", "a[i1,i2]", parameters="i1, i2")
     knl = lp.extract_subst(knl, "b_acc", "b[i1,i2]", parameters="i1, i2")
diff --git a/test/test_statistics.py b/test/test_statistics.py
index 3b33a8eff..eeb4a5a28 100644
--- a/test/test_statistics.py
+++ b/test/test_statistics.py
@@ -641,7 +641,7 @@ def test_all_counters_parallel_matmul():
             ],
             name="matmul", assumptions="n,m,ell >= 1")
     knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32, b=np.float32))
-    knl = lp.split_iname(knl, "i", bsize, outer_tag="g.0", inner_tag="ell.1")
+    knl = lp.split_iname(knl, "i", bsize, outer_tag="g.0", inner_tag="l.1")
     knl = lp.split_iname(knl, "j", bsize, outer_tag="g.1", inner_tag="l.0")
     knl = lp.split_iname(knl, "k", bsize)
     knl = lp.add_prefetch(knl, "a", ["k_inner", "i_inner"])
-- 
GitLab