diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a865134add02fd7527bf336d0c3596e2777aa232..7925564bcb41e500ba4d14ea5344f23dc75fabe7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,7 +21,7 @@ jobs: - name: "Main Script" run: | curl -L -O https://gitlab.tiker.net/inducer/ci-support/raw/main/prepare-and-run-flake8.sh - . ./prepare-and-run-flake8.sh "$(basename $GITHUB_REPOSITORY)" ./test examples + . ./prepare-and-run-flake8.sh "$(basename $GITHUB_REPOSITORY)" ./test examples proto-tests contrib pylint: name: Pylint diff --git a/contrib/c-integer-semantics.py b/contrib/c-integer-semantics.py index 23c7cb319177b762e83583e7bb5ea3eecd1d46da..1cc1a142d9b76725b9b4a700052c877fa2861fbe 100644 --- a/contrib/c-integer-semantics.py +++ b/contrib/c-integer-semantics.py @@ -80,7 +80,7 @@ def main(): with open("int-experiments.c", "w") as outf: outf.write(C_SRC) - system('gcc -Wall -shared int-experiments.c -o int-experiments.so') + system("gcc -Wall -shared int-experiments.c -o int-experiments.so") int_exp = ctypes.CDLL("int-experiments.so") for func in [ diff --git a/contrib/mem-pattern-explorer/pattern_vis.py b/contrib/mem-pattern-explorer/pattern_vis.py index 557efe33229997f756c31248a7a381bac4928ffe..54609acd7361182e36a9b81d45dd985dfea58478 100644 --- a/contrib/mem-pattern-explorer/pattern_vis.py +++ b/contrib/mem-pattern-explorer/pattern_vis.py @@ -26,7 +26,7 @@ class ArrayAccessPatternContext: self.arrays = [] - def l(self, index): # noqa: E743 + def l(self, index): # noqa: E741,E743 subscript = [np.newaxis] * self.ind_length subscript[len(self.gsize) + index] = slice(None) @@ -125,7 +125,7 @@ class Array: lin_index = np.array(lin_index)[subscript] self.array[lin_index, 0] = self.ctx.timestamp - for i, glength in enumerate(self.ctx.gsize): + for i, _glength in enumerate(self.ctx.gsize): if lin_index.shape[i] > 1: self.array[lin_index, 2+i] = self.ctx.g(i) diff --git a/doc/conf.py b/doc/conf.py index ce500cbecd1d15ab97c6bfa95934b4034d0bf17c..3ae7fa1cd05c35819a87df0bcb08b6693ba8df9c 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,7 +1,7 @@ import os from urllib.request import urlopen -_conf_url = "https://raw.githubusercontent.com/inducer/sphinxconfig/main/sphinxconfig.py" +_conf_url = "https://raw.githubusercontent.com/inducer/sphinxconfig/main/sphinxconfig.py" # noqa with urlopen(_conf_url) as _inf: exec(compile(_inf.read(), _conf_url, "exec"), globals()) diff --git a/proto-tests/test_fem_assembly.py b/proto-tests/test_fem_assembly.py index dde093d53be125c2b1eaf13022d51b3300b61314..2bee025f11c5d2e4149a9a6738f2c88a2ebb67ab 100644 --- a/proto-tests/test_fem_assembly.py +++ b/proto-tests/test_fem_assembly.py @@ -1,11 +1,9 @@ import numpy as np -import pyopencl as cl +import pyopencl as cl # noqa import loopy as lp from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests - - + as pytest_generate_tests # noqa def test_laplacian_stiffness(ctx_factory): @@ -13,22 +11,22 @@ def test_laplacian_stiffness(ctx_factory): ctx = ctx_factory() order = "C" - dim = 2 # (baked into code) + dim = 2 # (baked into code) - Nq = 40 # num. quadrature points (baked into code) - Nb = 20 # num. basis functions (baked into code) - Nc = 100 # num. cells (run-time symbolic) + Nq = 40 # num. quadrature points (baked into code) # noqa + Nb = 20 # num. basis functions (baked into code) # noqa + Nc = 100 # num. cells (run-time symbolic) # noqa from pymbolic import var - Nc_sym = var("Nc") + Nc_sym = var("Nc") # noqa knl = lp.make_kernel(ctx.devices[0], - "[Nc] -> {[K,i,j,q, dx_axis, ax_b]: 0<=K {[K,i,j,q, dx_axis, ax_b]: 0<=K 1: @@ -141,4 +137,3 @@ if __name__ == "__main__": else: from py.test.cmdline import main main([__file__]) - diff --git a/proto-tests/test_sem.py b/proto-tests/test_sem.py index b84d072d0546270e6d21702f7b0f5b6354f7a238..56a0d5e2569ed9722f09c4536d1ad60958badf67 100644 --- a/proto-tests/test_sem.py +++ b/proto-tests/test_sem.py @@ -1,17 +1,15 @@ import numpy as np -import pyopencl as cl +import pyopencl as cl # noqa import loopy as lp from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests + as pytest_generate_tests # noqa +1/0 # not ready - -1/0 # not ready - def test_laplacian(ctx_factory): - 1/0 # not adapted to new language + 1/0 # not adapted to new language dtype = np.float32 ctx = ctx_factory() @@ -20,29 +18,29 @@ def test_laplacian(ctx_factory): n = 8 from pymbolic import var - K_sym = var("K") + K_sym = var("K") # noqa field_shape = (K_sym, n, n, n) # load: 1+6 fields + 1/N D entry # store: 1 fields # perform: N*2*6 + 3*5 flops - # ratio: (12*N+15)/8 flops per 4 bytes on bus + # ratio: (12*N+15)/8 flops per 4 bytes on bus # ~ 14 FLOPS per 4 bytes at N=8 # ~ 525 GFLOPS max on a 150GB/s device at N=8 if done perfectly # K - run-time symbolic knl = lp.make_kernel(ctx.devices[0], - "[K] -> {[i,j,k,e,m,o1,o2,o3,gi]: 0<=i,j,k,m,o1,o2,o3<%d and 0<=e {[i,j,k,e,m,o1,o2,o3,gi]: 0<=i,j,k,m,o1,o2,o3<%d and 0<=e {[i,ip,j,jp,k,kp,m,e]: 0<=i,j,k,m<%d AND 0<=o,ip,jp,kp<%d 0<=e {[i,ip,j,jp,k,kp,m,e]: 0<=i,j,k,m<%d AND 0<=o,ip,jp,kp<%d 0<=e {[i,ip,j,jp,k,kp,e]: 0<=i,j,k<%d AND 0<=ip,jp,kp<%d 0<=e {[i,ip,j,jp,k,kp,e]: 0<=i,j,k<%d AND 0<=ip,jp,kp<%d 0<=e u1[i ,jp,kp,e] = sum_float32(ip, I[i,ip]*u [ip,jp,kp,e])", - "[|i,j ,kp] u2[i ,j ,kp,e] = sum_float32(jp, I[j,jp]*u1[i ,jp,kp,e])", - "[|i,j ,k ] u3[i ,j ,k ,e] = sum_float32(kp, I[k,kp]*u2[i ,j ,kp,e])", + "[|i,jp,kp] u1[i ,jp,kp,e] = sum_float32(ip, I[i,ip]*u [ip,jp,kp,e])", # noqa + "[|i,j ,kp] u2[i ,j ,kp,e] = sum_float32(jp, I[j,jp]*u1[i ,jp,kp,e])", # noqa + "[|i,j ,k ] u3[i ,j ,k ,e] = sum_float32(kp, I[k,kp]*u2[i ,j ,kp,e])", # noqa "[|i,j ,k ] Pu[i ,j ,k ,e] = P[i,j,k,e]*u3[i,j,k,e]", - "[|i,j ,kp] Pu3[i ,j ,kp,e] = sum_float32(k, V[kp,k]*Pu[i ,j , k,e])", - "[|i,jp,kp] Pu2[i ,jp,kp,e] = sum_float32(j, V[jp,j]*Pu[i ,j ,kp,e])", + "[|i,j ,kp] Pu3[i ,j ,kp,e] = sum_float32(k, V[kp,k]*Pu[i ,j , k,e])", # noqa + "[|i,jp,kp] Pu2[i ,jp,kp,e] = sum_float32(j, V[jp,j]*Pu[i ,j ,kp,e])", # noqa "Pu[ip,jp,kp,e] = sum_float32(i, V[ip,i]*Pu[i ,jp,kp,e])", ], [ @@ -522,7 +503,7 @@ def test_interp_diff(ctx_factory): print(knl) 1/0 - knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e", 16, outer_tag="g.0") # , slabs=(0, 1)) knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) @@ -532,14 +513,13 @@ def test_interp_diff(ctx_factory): kernel_gen = lp.generate_loop_schedules(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000), kill_level_min=5) - lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen, + K = 1000 # noqa + lp.auto_test_vs_ref(knl, ctx, kernel_gen, op_count=0, op_label="GFlops", parameters={"K": K}, print_seq_code=True,) - - if __name__ == "__main__": import sys if len(sys.argv) > 1: diff --git a/proto-tests/test_sem_tim.py b/proto-tests/test_sem_tim.py index 9d8dfcfa680fc484f20c9511b34210b15af8d635..a4af60a958e8a3f7ce4d02d2896ebfeb0229b9aa 100644 --- a/proto-tests/test_sem_tim.py +++ b/proto-tests/test_sem_tim.py @@ -1,17 +1,15 @@ import numpy as np -import pyopencl as cl +import pyopencl as cl # noqa import loopy as lp from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests - -1/0 # inspect me - + as pytest_generate_tests # noqa +1/0 # inspect me def test_laplacian(ctx_factory): - 1/0 # not adapted to new language + 1/0 # not adapted to new language dtype = np.float32 ctx = ctx_factory() @@ -20,29 +18,29 @@ def test_laplacian(ctx_factory): n = 8 from pymbolic import var - K_sym = var("K") + K_sym = var("K") # noqa field_shape = (K_sym, n, n, n) # load: 1+6 fields + 1/N D entry # store: 1 fields # perform: N*2*6 + 3*5 flops - # ratio: (12*N+15)/8 flops per 4 bytes on bus + # ratio: (12*N+15)/8 flops per 4 bytes on bus # ~ 14 FLOPS per 4 bytes at N=8 # ~ 525 GFLOPS max on a 150GB/s device at N=8 if done perfectly # K - run-time symbolic knl = lp.make_kernel(ctx.devices[0], - "[K] -> {[i,j,k,e,m,o1,o2,o3,gi]: 0<=i,j,k,m,o1,o2,o3<%d and 0<=e {[i,j,k,e,m,o1,o2,o3,gi]: 0<=i,j,k,m,o1,o2,o3<%d and 0<=e {[i,ip,j,jp,k,kp,m,e]: 0<=i,j,k,m<%d AND 0<=o,ip,jp,kp<%d 0<=e {[i,ip,j,jp,k,kp,m,e]: 0<=i,j,k,m<%d AND 0<=o,ip,jp,kp<%d 0<=e {[i,ip,j,jp,k,kp,e]: 0<=i,j,k<%d AND 0<=ip,jp,kp<%d 0<=e {[i,ip,j,jp,k,kp,e]: 0<=i,j,k<%d AND 0<=ip,jp,kp<%d 0<=e u1[i ,jp,kp,e] = sum_float32(ip, I[i,ip]*u [ip,jp,kp,e])", - "[|i,j ,kp] u2[i ,j ,kp,e] = sum_float32(jp, I[j,jp]*u1[i ,jp,kp,e])", - "[|i,j ,k ] u3[i ,j ,k ,e] = sum_float32(kp, I[k,kp]*u2[i ,j ,kp,e])", + "[|i,jp,kp] u1[i ,jp,kp,e] = sum_float32(ip, I[i,ip]*u [ip,jp,kp,e])", # noqa + "[|i,j ,kp] u2[i ,j ,kp,e] = sum_float32(jp, I[j,jp]*u1[i ,jp,kp,e])", # noqa + "[|i,j ,k ] u3[i ,j ,k ,e] = sum_float32(kp, I[k,kp]*u2[i ,j ,kp,e])", # noqa "[|i,j ,k ] Pu[i ,j ,k ,e] = P[i,j,k,e]*u3[i,j,k,e]", - "[|i,j ,kp] Pu3[i ,j ,kp,e] = sum_float32(k, V[kp,k]*Pu[i ,j , k,e])", - "[|i,jp,kp] Pu2[i ,jp,kp,e] = sum_float32(j, V[jp,j]*Pu[i ,j ,kp,e])", + "[|i,j ,kp] Pu3[i ,j ,kp,e] = sum_float32(k, V[kp,k]*Pu[i ,j , k,e])", # noqa + "[|i,jp,kp] Pu2[i ,jp,kp,e] = sum_float32(j, V[jp,j]*Pu[i ,j ,kp,e])", # noqa "Pu[ip,jp,kp,e] = sum_float32(i, V[ip,i]*Pu[i ,jp,kp,e])", ], [ @@ -528,7 +511,7 @@ def test_interp_diff(ctx_factory): print(knl) 1/0 - knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e", 16, outer_tag="g.0") # , slabs=(0, 1)) knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) @@ -538,14 +521,14 @@ def test_interp_diff(ctx_factory): kernel_gen = lp.generate_loop_schedules(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000), kill_level_min=5) - lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen, + K = 1000 # noqa + + lp.auto_test_vs_ref(knl, ctx, kernel_gen, op_count=0, op_label="GFlops", parameters={"K": K}, print_seq_code=True,) - - if __name__ == "__main__": import sys if len(sys.argv) > 1: diff --git a/proto-tests/test_tim.py b/proto-tests/test_tim.py index 773821dce08adb758f49da6e8a6102011005beec..04b11e3177976a874d7d6cb66faf4e72e1920c08 100644 --- a/proto-tests/test_tim.py +++ b/proto-tests/test_tim.py @@ -1,12 +1,11 @@ import numpy as np -import pyopencl as cl +import pyopencl as cl # noqa import loopy as lp from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests - -1/0 # see sem_reagan? + as pytest_generate_tests # noqa +1/0 # see sem_reagan? def test_tim2d(ctx_factory): @@ -17,7 +16,7 @@ def test_tim2d(ctx_factory): n = 8 from pymbolic import var - K_sym = var("K") + K_sym = var("K") # noqa field_shape = (K_sym, n, n) @@ -37,41 +36,40 @@ def test_tim2d(ctx_factory): lp.ArrayArg("u", dtype, shape=field_shape, order=order), lp.ArrayArg("lap", dtype, shape=field_shape, order=order), lp.ArrayArg("G", dtype, shape=(3,)+field_shape, order=order), -# lp.ConstantArrayArg("D", dtype, shape=(n, n), order=order), + # lp.ConstantArrayArg("D", dtype, shape=(n, n), order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), -# lp.ImageArg("D", dtype, shape=(n, n)), + # lp.ImageArg("D", dtype, shape=(n, n)), lp.ValueArg("K", np.int32, approximately=1000), ], - name="semlap2D", assumptions="K>=1") + name="semlap2D", assumptions="K>=1") - unroll = 32 + unroll = 32 # noqa seq_knl = knl - knl = lp.add_prefetch(knl, "D", ["m", "j", "i","o"], default_tag="l.auto") + knl = lp.add_prefetch(knl, "D", ["m", "j", "i", "o"], default_tag="l.auto") knl = lp.add_prefetch(knl, "u", ["i", "j", "o"], default_tag="l.auto") knl = lp.precompute(knl, "ur", np.float32, ["a", "b"], default_tag="l.auto") knl = lp.precompute(knl, "us", np.float32, ["a", "b"], default_tag="l.auto") - knl = lp.split_iname(knl, "e", 1, outer_tag="g.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e", 1, outer_tag="g.0") # , slabs=(0, 1)) knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) knl = lp.tag_inames(knl, dict(o="unr")) knl = lp.tag_inames(knl, dict(m="unr")) -# knl = lp.add_prefetch(knl, "G", [2,3], default_tag=None) # axis/argument indices on G - knl = lp.add_prefetch(knl, "G", [2,3], default_tag="l.auto") # axis/argument indices on G +# knl = lp.add_prefetch(knl, "G", [2,3], default_tag=None) # axis/argument indices on G # noqa + knl = lp.add_prefetch(knl, "G", [2, 3], default_tag="l.auto") # axis/argument indices on G # noqa kernel_gen = lp.generate_loop_schedules(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000)) - K = 1000 + K = 1000 # noqa lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen, op_count=K*(n*n*n*2*2 + n*n*2*3 + n**3 * 2*2)/1e9, op_label="GFlops", parameters={"K": K}) -#TW: ^^^^^^^^^^^^^^^ TypeError: auto_test_vs_ref() got an unexpected keyword argument 'print_seq_code' - +#TW: ^^^^^^^^^^^^^^^ TypeError: auto_test_vs_ref() got an unexpected keyword argument 'print_seq_code' # noqa def test_red2d(ctx_factory): @@ -82,7 +80,7 @@ def test_red2d(ctx_factory): n = 16 from pymbolic import var - K_sym = var("K") + K_sym = var("K") # noqa field_shape = (K_sym, n, n) @@ -104,12 +102,12 @@ def test_red2d(ctx_factory): lp.ArrayArg("D", dtype, shape=(n, n), order=order), lp.ValueArg("K", np.int32, approximately=1000), ], - name="semlap2D", assumptions="K>=1") + name="semlap2D", assumptions="K>=1") - unroll = 32 + unroll = 32 # noqa seq_knl = knl - knl = lp.add_prefetch(knl, "D", ["m", "j", "i","o"], default_tag="l.auto") + knl = lp.add_prefetch(knl, "D", ["m", "j", "i", "o"], default_tag="l.auto") knl = lp.add_prefetch(knl, "u", ["i", "j", "o"], default_tag="l.auto") knl = lp.precompute(knl, "ue", np.float32, ["a", "b", "m"], default_tag="l.auto") @@ -118,26 +116,24 @@ def test_red2d(ctx_factory): knl = lp.precompute(knl, "us", np.float32, ["a", "b"], default_tag="l.auto") knl = lp.split_iname(knl, "e", 2, outer_tag="g.0") - knl = lp.split_iname(knl, "j", n, inner_tag="l.0")#, slabs=(0, 1)) - knl = lp.split_iname(knl, "i", n, inner_tag="l.1")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "j", n, inner_tag="l.0") # , slabs=(0, 1)) + knl = lp.split_iname(knl, "i", n, inner_tag="l.1") # , slabs=(0, 1)) knl = lp.tag_inames(knl, dict(o="unr")) knl = lp.tag_inames(knl, dict(m="unr")) - - knl = lp.add_prefetch(knl, "G", [2,3], default_tag="l.auto") # axis/argument indices on G + knl = lp.add_prefetch(knl, "G", [2, 3], default_tag="l.auto") # axis/argument indices on G # noqa kernel_gen = lp.generate_loop_schedules(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000)) - K = 1000 + K = 1000 # noqa lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen, op_count=K*((n**3)*2*2 + n*n*2*3 + (n**3)*2*2)/1e9, op_label="GFlops", parameters={"K": K}) -#TW: ^^^^^^^^^^^^^^^ TypeError: auto_test_vs_ref() got an unexpected keyword argument 'print_seq_code' - +#TW: ^^^^^^^^^^^^^^^ TypeError: auto_test_vs_ref() got an unexpected keyword argument 'print_seq_code' # noqa def test_tim3d(ctx_factory): @@ -148,7 +144,7 @@ def test_tim3d(ctx_factory): n = 8 from pymbolic import var - K_sym = var("K") + K_sym = var("K") # noqa field_shape = (K_sym, n, n, n) @@ -161,24 +157,24 @@ def test_tim3d(ctx_factory): "ut(a,b,c) := sum_float32(@o, D[c,o]*u[e,a,b,o])", "lap[e,i,j,k] = " - " sum_float32(m, D[m,i]*(G[0,e,m,j,k]*ur(m,j,k) + G[1,e,m,j,k]*us(m,j,k) + G[2,e,m,j,k]*ut(m,j,k)))" - " + sum_float32(m, D[m,j]*(G[1,e,i,m,k]*ur(i,m,k) + G[3,e,i,m,k]*us(i,m,k) + G[4,e,i,m,k]*ut(i,m,k)))" - " + sum_float32(m, D[m,k]*(G[2,e,i,j,m]*ur(i,j,m) + G[4,e,i,j,m]*us(i,j,m) + G[5,e,i,j,m]*ut(i,j,m)))" + " sum_float32(m, D[m,i]*(G[0,e,m,j,k]*ur(m,j,k) + G[1,e,m,j,k]*us(m,j,k) + G[2,e,m,j,k]*ut(m,j,k)))" # noqa + " + sum_float32(m, D[m,j]*(G[1,e,i,m,k]*ur(i,m,k) + G[3,e,i,m,k]*us(i,m,k) + G[4,e,i,m,k]*ut(i,m,k)))" # noqa + " + sum_float32(m, D[m,k]*(G[2,e,i,j,m]*ur(i,j,m) + G[4,e,i,j,m]*us(i,j,m) + G[5,e,i,j,m]*ut(i,j,m)))" # noqa ], [ lp.ArrayArg("u", dtype, shape=field_shape, order=order), lp.ArrayArg("lap", dtype, shape=field_shape, order=order), lp.ArrayArg("G", dtype, shape=(6,)+field_shape, order=order), -# lp.ConstantArrayArg("D", dtype, shape=(n, n), order=order), + # lp.ConstantArrayArg("D", dtype, shape=(n, n), order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), -# lp.ImageArg("D", dtype, shape=(n, n)), + # lp.ImageArg("D", dtype, shape=(n, n)), lp.ValueArg("K", np.int32, approximately=1000), ], name="semlap3D", assumptions="K>=1") seq_knl = knl - knl = lp.add_prefetch(knl, "D", ["m", "j", "i", "k","o"], default_tag="l.auto") + knl = lp.add_prefetch(knl, "D", ["m", "j", "i", "k", "o"], default_tag="l.auto") knl = lp.add_prefetch(knl, "u", ["i", "j", "o", "k"], default_tag="l.auto") knl = lp.precompute(knl, "ur", np.float32, ["a", "b", "c"], default_tag="l.auto") @@ -186,10 +182,10 @@ def test_tim3d(ctx_factory): default_tag="l.auto") knl = lp.precompute(knl, "ut", np.float32, ["a", "b", "c"], default_tag="l.auto") - knl = lp.split_iname(knl, "e", 1, outer_tag="g.0")#, slabs=(0, 1)) - knl = lp.split_iname(knl, "k", n, inner_tag="l.2")#, slabs=(0, 1)) - knl = lp.split_iname(knl, "j", n, inner_tag="l.1")#, slabs=(0, 1)) - knl = lp.split_iname(knl, "i", n, inner_tag="l.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e", 1, outer_tag="g.0") # , slabs=(0, 1)) + knl = lp.split_iname(knl, "j", n, inner_tag="l.1") # , slabs=(0, 1)) + knl = lp.split_iname(knl, "k", n, inner_tag="l.2") # , slabs=(0, 1)) + knl = lp.split_iname(knl, "i", n, inner_tag="l.0") # , slabs=(0, 1)) # knl = lp.tag_inames(knl, dict(k_nner="unr")) @@ -197,19 +193,18 @@ def test_tim3d(ctx_factory): knl = lp.tag_inames(knl, dict(m="unr")) # knl = lp.tag_inames(knl, dict(i="unr")) - knl = lp.add_prefetch(knl, "G", [2,3,4], default_tag="l.auto") # axis/argument indices on G + knl = lp.add_prefetch(knl, "G", [2, 3, 4], default_tag="l.auto") # axis/argument indices on G # noqa kernel_gen = lp.generate_loop_schedules(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000)) - K = 4000 + K = 4000 # noqa lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen, op_count=K*((n**4)*3*2 + (n**3)*5*3 + (n**4)*3*2)/1e9, op_label="GFlops", parameters={"K": K}) -#TW: ^^^^^^^^^^^^^^^ TypeError: auto_test_vs_ref() got an unexpected keyword argument 'print_seq_code' - +#TW: ^^^^^^^^^^^^^^^ TypeError: auto_test_vs_ref() got an unexpected keyword argument 'print_seq_code' # noqa if __name__ == "__main__":