diff --git a/test/test_fem_assembly.py b/test/test_fem_assembly.py index 2c70c2693355f307010e84279487175551179334..4a580725e3bceae5158d058a21ea3ee0e32d5335 100644 --- a/test/test_fem_assembly.py +++ b/test/test_fem_assembly.py @@ -30,7 +30,6 @@ def test_laplacian_stiffness(ctx_factory): from pymbolic import var Nc_sym = var("Nc") - print "[Nc] -> {[K,i,j,q]: 0<=K<Nc and 0<=i,j<%(Nb)d and 0<=q<%(Nq)d}" % dict(Nb=Nb, Nq=Nq), knl = lp.make_kernel(ctx.devices[0], "[Nc] -> {[K,i,j,q]: 0<=K<Nc and 0<=i,j<%(Nb)d and 0<=q<%(Nq)d}" % dict(Nb=Nb, Nq=Nq), @@ -42,11 +41,11 @@ def test_laplacian_stiffness(ctx_factory): ], [ - lp.ArrayArg("jacInv", dtype, shape=(Nc, Nq, dim, dim), order=order), + lp.ArrayArg("jacInv", dtype, shape=(Nc_sym, Nq, dim, dim), order=order), lp.ConstantArrayArg("DPsi", dtype, shape=(Nb, Nq, dim), order=order), - lp.ArrayArg("jacDet", dtype, shape=(Nc, Nq), order=order), + lp.ArrayArg("jacDet", dtype, shape=(Nc_sym, Nq), order=order), lp.ConstantArrayArg("w", dtype, shape=(Nq, dim), order=order), - lp.ArrayArg("A", dtype, shape=(Nc, Nb, Nb), order=order), + lp.ArrayArg("A", dtype, shape=(Nc_sym, Nb, Nb), order=order), lp.ScalarArg("Nc", np.int32, approximately=1000), ], name="semlap", assumptions="Nc>=1") @@ -56,17 +55,16 @@ def test_laplacian_stiffness(ctx_factory): knl = lp.split_dimension(knl, "K", 16, outer_tag="g.0", slabs=(0,1)) knl = lp.split_dimension(knl, "K_inner", 4, inner_tag="ilp") knl = lp.tag_dimensions(knl, {"i": "l.0", "j": "l.1"}) - knl = lp.add_prefetch(knl, 'jacInv', ["K_inner_outer", "K_inner_inner", "q"], - uni_template="jacInv[x,y,z,u]") + knl = lp.add_prefetch(knl, 'jacInv', ["Kii", "Kio", "q", "x", "y"], + uni_template="jacInv[Kii + 4*Kio +16*Ko,q,x,y]") kernel_gen = lp.generate_loop_schedules(knl, loop_priority=["K", "i", "j"]) - kernel_gen = lp.check_kernels(kernel_gen, dict(Nc=1000)) + kernel_gen = lp.check_kernels(kernel_gen, dict(Nc=Nc)) - Nc = 1000 lp.auto_test_vs_seq(seq_knl, ctx, kernel_gen, op_count=0, op_label="GFlops", - parameters={"Nc": 1000}, print_seq_code=True, + parameters={"Nc": Nc}, print_seq_code=True, timing_rounds=30) diff --git a/test/test_linalg.py b/test/test_linalg.py index 329384071e98471202c7da7bb78e8fbdcbd060ae..07fcde54bc5771f2396e8b6c01ed5c95201977a6 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -132,8 +132,8 @@ def test_axpy(ctx_factory): #y = cl_array.to_device(queue, np.random.rand(n).astype(dtype)) x = cl_random.rand(queue, n, dtype=dtype, luxury=2) y = cl_random.rand(queue, n, dtype=dtype, luxury=2) - print np.isnan(x.get()).any() - 1/0 + #print np.isnan(x.get()).any() + #1/0 z = cl_array.zeros_like(x) refsol = (2*x+3*y).get() @@ -161,8 +161,6 @@ def test_transpose(ctx_factory): dtype = np.dtype(np.float32) ctx = ctx_factory() order = "C" - queue = cl.CommandQueue(ctx, - properties=cl.command_queue_properties.PROFILING_ENABLE) n = get_suitable_size(ctx) @@ -370,7 +368,8 @@ def test_rank_one(ctx_factory): seq_knl = knl - for variant in [variant_1, variant_2, variant_4]: + #for variant in [variant_1, variant_2, variant_4]: + for variant in [variant_4]: kernel_gen = lp.generate_loop_schedules(variant(knl)) kernel_gen = lp.check_kernels(kernel_gen, dict(n=n)) diff --git a/test/test_loopy.py b/test/test_loopy.py index f74848a63fe3cb14e8b422f781af91e8453addf8..31bf59d4bd0ed70f84b98efd729e54ab9d3c78d1 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -101,15 +101,12 @@ def test_bad_stencil(ctx_factory): lp.ArrayArg("a", np.float32, shape=(32,32,)) ]) - def variant_1(knl): - return knl - def variant_2(knl): - knl = lp.split_dimension(knl, "i", 16, outer_tag="g.1", inner_tag="l.1") + knl = lp.split_dimension(knl, "i", 16, outer_tag="g.0", inner_tag="l.0") knl = lp.realize_cse(knl, None, np.float32, ["i_inner", "j"]) return knl - for variant in [variant_1, variant_2]: + for variant in [variant_2]: kernel_gen = lp.generate_loop_schedules(variant(knl), loop_priority=["i_outer", "i_inner_0", "j_0"]) kernel_gen = lp.check_kernels(kernel_gen) diff --git a/test/test_sem.py b/test/test_sem.py index 3e593cb0ad8f3155dcb90319ff27642df05c131e..19f854ce9b25f6fb7a2df0b3a2c849ddc1c225c8 100644 --- a/test/test_sem.py +++ b/test/test_sem.py @@ -54,29 +54,31 @@ def test_sem_3d(ctx_factory): name="semlap", assumptions="K>=1") - def add_pf(knl): - knl = lp.add_prefetch(knl, "G", ["gi", "m", "j", "k"], "G[gi,e,m,j,k]") - knl = lp.add_prefetch(knl, "D", ["m", "j"]) - knl = lp.add_prefetch(knl, "u", ["i", "j", "k"], "u[*,i,j,k]") - knl = lp.realize_cse(knl, "ur", np.float32, ["k", "j", "m"]) - knl = lp.realize_cse(knl, "us", np.float32, ["i", "m", "k"]) - knl = lp.realize_cse(knl, "ut", np.float32, ["i", "j", "m"]) - - seq_knl = add_pf(knl) + knl = lp.realize_cse(knl, "ur", np.float32, ["k", "j", "m"]) + knl = lp.realize_cse(knl, "us", np.float32, ["i", "m", "k"]) + knl = lp.realize_cse(knl, "ut", np.float32, ["i", "j", "m"]) + + if 0: + seq_knl = lp.add_prefetch(knl, "G", ["gi", "m", "j", "k"], "G[gi,e,m,j,k]") + seq_knl = lp.add_prefetch(seq_knl, "D", ["m", "j"]) + seq_knl = lp.add_prefetch(seq_knl, "u", ["i", "j", "k"], "u[*,i,j,k]") + else: + seq_knl = knl knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) + + knl = lp.add_prefetch(knl, "G", ["gi", "m", "j", "k"], "G[gi,e,m,j,k]") + knl = lp.add_prefetch(knl, "D", ["m", "j"]) + knl = lp.add_prefetch(knl, "u", ["i", "j", "k"], "u[*,i,j,k]") #knl = lp.split_dimension(knl, "e_inner", 4, inner_tag="ilp") - knl = add_pf(knl) #print seq_knl - #print lp.preprocess_kernel(seq_knl) + #print lp.preprocess_kernel(knl) #1/0 - knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1")) - kernel_gen = lp.generate_loop_schedules(knl, - loop_priority=["j_dr", "j_ds", "i_dt"]) + kernel_gen = lp.generate_loop_schedules(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000)) K = 1000