diff --git a/examples/sem_reagan.py b/examples/sem_reagan.py index 42c3dcfec32907dd66cb125a0a51776636fcc720..a420d70fb6d43edc5260944f959c7f9e1b1ba741 100644 --- a/examples/sem_reagan.py +++ b/examples/sem_reagan.py @@ -35,7 +35,7 @@ from pyopencl.tools import pytest_generate_tests_for_pyopencl \ -def test_tim3d_slab(ctx_factory): +def cannot_schedule_test_tim3d_slab(ctx_factory): dtype = np.float32 ctx = ctx_factory() order = "C" @@ -70,25 +70,133 @@ def test_tim3d_slab(ctx_factory): name="semdiff3D", assumptions="E>=1", defines={"Nq": Nq}) - for derivative in "rst": - knl = lp.duplicate_inames(knl, "o", within="... < lap"+derivative, suffix="_"+derivative) - seq_knl = knl + + def duplicate_os(knl): + for derivative in "rst": + knl = lp.duplicate_inames( + knl, "o", + within="... < lap"+derivative, suffix="_"+derivative) + return knl def variant_orig(knl): + # NOTE: Removing this makes the thing unschedulable + #knl = lp.tag_inames(knl, dict(e="g.0", i="l.0", j="l.1"), ) + + knl = lp.precompute(knl, "ur", ["i", "j"], within="... < lapr") + knl = lp.precompute(knl, "us", ["i", "j"], within="... < lapr") + knl = lp.precompute(knl, "ut", ["i", "j"], within="... < lapr") + + # prefetch the derivative matrix + knl = lp.add_prefetch(knl, "D[:,:]") + + knl = duplicate_os(knl) + + print knl + return knl + + seq_knl = duplicate_os(knl) + + #print lp.preprocess_kernel(knl) + #1/0 + + for variant in [variant_orig]: + kernel_gen = lp.generate_loop_schedules(variant(knl), loop_priority=["e", "i", "j"]) + kernel_gen = lp.check_kernels(kernel_gen, dict(E=1000)) + + E = 1000 + lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen, + op_count=[-666], + op_label=["GFlops"], + parameters={"E": E}) + + + + +def test_tim3d_slab(ctx_factory): + dtype = np.float32 + ctx = ctx_factory() + order = "C" + + Nq = 8 + + knl = lp.make_kernel(ctx.devices[0], + "[E] -> {[i,j, k, o,m, e]: 0<=i,j,k,o,m < Nq and 0<=e