diff --git a/examples/sem_reagan.py b/examples/sem_reagan.py index 88dde57a6c5927ecb6f13bd0a6ce213008438fc9..42c3dcfec32907dd66cb125a0a51776636fcc720 100644 --- a/examples/sem_reagan.py +++ b/examples/sem_reagan.py @@ -35,129 +35,6 @@ from pyopencl.tools import pytest_generate_tests_for_pyopencl \ -def test_tim2d(ctx_factory): - dtype = np.float32 - ctx = ctx_factory() - order = "C" - - n = 8 - - from pymbolic import var - K_sym = var("K") - - field_shape = (K_sym, n, n) - - # K - run-time symbolic - knl = lp.make_kernel(ctx.devices[0], - "[K] -> {[i,j,e,m,o,gi]: 0<=i,j,m,o<%d and 0<=e<K and 0<=gi<3}" % n, - [ - "ur(a,b) := sum(@o, D[a,o]*u[e,o,b])", - "us(a,b) := sum(@o, D[b,o]*u[e,a,o])", - - #"Gu(mat_entry,a,b) := G[mat_entry,e,m,j]*ur(m,j)", - - "Gux(a,b) := G$x[0,e,a,b]*ur(a,b)+G$x[1,e,a,b]*us(a,b)", - "Guy(a,b) := G$y[1,e,a,b]*ur(a,b)+G$y[2,e,a,b]*us(a,b)", - "lap[e,i,j] = " - " sum(m, D[m,i]*Gux(m,j))" - "+ sum(m, D[m,j]*Guy(i,m))" - - ], - [ - lp.GlobalArg("u", dtype, shape=field_shape, order=order), - lp.GlobalArg("lap", dtype, shape=field_shape, order=order), - lp.GlobalArg("G", dtype, shape=(3,)+field_shape, order=order), - # lp.ConstantArrayArg("D", dtype, shape=(n, n), order=order), - lp.GlobalArg("D", dtype, shape=(n, n), order=order), - # lp.ImageArg("D", dtype, shape=(n, n)), - lp.ValueArg("K", np.int32, approximately=1000), - ], - name="semlap2D", assumptions="K>=1") - - seq_knl = knl - - def variant_orig(knl): - knl = lp.tag_inames(knl, dict(i="l.0", j="l.1", e="g.0")) - - knl = lp.add_prefetch(knl, "D[:,:]") - knl = lp.add_prefetch(knl, "u[e, :, :]") - - knl = lp.precompute(knl, "ur(m,j)", np.float32, ["m", "j"]) - knl = lp.precompute(knl, "us(i,m)", np.float32, ["i", "m"]) - - knl = lp.precompute(knl, "Gux(m,j)", np.float32, ["m", "j"]) - knl = lp.precompute(knl, "Guy(i,m)", np.float32, ["i", "m"]) - - knl = lp.add_prefetch(knl, "G$x[:,e,:,:]") - knl = lp.add_prefetch(knl, "G$y[:,e,:,:]") - - knl = lp.tag_inames(knl, dict(o="unr")) - knl = lp.tag_inames(knl, dict(m="unr")) - - knl = lp.set_instruction_priority(knl, "D_fetch", 5) - print knl - - return knl - - for variant in [variant_orig]: - kernel_gen = lp.generate_loop_schedules(variant(knl)) - kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000)) - - K = 1000 - lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen, - op_count=[K*(n*n*n*2*2 + n*n*2*3 + n**3 * 2*2)/1e9], - op_label=["GFlops"], - parameters={"K": K}) - - - - -def make_me_a_test_test_tim3d_slab(ctx_factory): - dtype = np.float32 - ctx = ctx_factory() - order = "C" - - n = 8 - - from pymbolic import var - - # K - run-time symbolic - knl = lp.make_kernel(ctx.devices[0], - "[E] -> {[i,j,k, o, e]: 0<=i,j,k,o < n and 0<=e<E }", - """ - <> ur[i,j,k] = sum(@o, D[i,o]*u[e,o,j,k]) - <> us[i,j,k] = sum(@o, D[j,o]*u[e,i,o,k]) - <> ut[i,j,k] = sum(@o, D[k,o]*u[e,i,j,o]) - """, - [ - lp.GlobalArg("u", dtype, shape="E,n,n,n", order=order), - # lp.GlobalArg("G", dtype, shape=(3,)+field_shape, order=order), - # lp.ConstantArrayArg("D", dtype, shape=(n, n), order=order), - lp.GlobalArg("D", dtype, shape=(n, n), order=order), - # lp.ImageArg("D", dtype, shape=(n, n)), - lp.ValueArg("E", np.int32, approximately=1000), - ], - name="semdiff3D", assumptions="E>=1", - defines={"n": n}) - - seq_knl = knl - - def variant_orig(knl): - return knl - - for variant in [variant_orig]: - kernel_gen = lp.generate_loop_schedules(variant(knl)) - kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000)) - - E = 1000 - lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen, - op_count=[E*(n*n*n*2*2 + n*n*2*3 + n**3 * 2*2)/1e9], - op_label=["GFlops"], - parameters={"E": E}) - - - - def test_tim3d_slab(ctx_factory): dtype = np.float32 ctx = ctx_factory() @@ -165,9 +42,6 @@ def test_tim3d_slab(ctx_factory): Nq = 8 - from pymbolic import var - - # K - run-time symbolic knl = lp.make_kernel(ctx.devices[0], "[E] -> {[i,j, k, o,m, e]: 0<=i,j,k,o,m < Nq and 0<=e<E }", """ @@ -179,9 +53,9 @@ def test_tim3d_slab(ctx_factory): Gus(a,b,c) := G[1,e,a,b,c]*ur(a,b,c)+G[3,e,a,b,c]*us(a,b,c)+G[4,e,a,b,c]*ut(a,b,c) Gut(a,b,c) := G[2,e,a,b,c]*ur(a,b,c)+G[4,e,a,b,c]*us(a,b,c)+G[5,e,a,b,c]*ut(a,b,c) - lapr(a,b,c):= sum(m, D[m,a]*Gur(m,b,c)) - laps(a,b,c):= sum(m, D[m,b]*Gus(a,m,c)) - lapt(a,b,c):= sum(m, D[m,c]*Gut(a,b,m)) + lapr(a,b,c):= sum(m, D[m,a]*Gur(m,b,c)) + laps(a,b,c):= sum(m, D[m,b]*Gus(a,m,c)) + lapt(a,b,c):= sum(m, D[m,c]*Gut(a,b,m)) lap[e,i,j,k] = lapr(i,j,k) + laps(i,j,k) + lapt(i,j,k) """, @@ -194,7 +68,7 @@ def test_tim3d_slab(ctx_factory): lp.ValueArg("E", np.int32, approximately=1000), ], name="semdiff3D", assumptions="E>=1", - defines={"Nq": Nq}) + defines={"Nq": Nq}) for derivative in "rst": knl = lp.duplicate_inames(knl, "o", within="... < lap"+derivative, suffix="_"+derivative) @@ -203,7 +77,7 @@ def test_tim3d_slab(ctx_factory): def variant_orig(knl): return knl - knl = lp.tag_inames(knl, dict(e="g.0", i="l.0", j="l.1"), ) + knl = lp.tag_inames(knl, dict(e="g.0", i="l.0", j="l.1"), ) for derivative, par_names in [ ("r", ["j", "k"]), @@ -214,7 +88,7 @@ def test_tim3d_slab(ctx_factory): print knl - return knl + return knl #print lp.preprocess_kernel(knl) #1/0