diff --git a/kernel_fixtures.py b/kernel_fixtures.py index c2b9f30b09246d719ac46d6c91edd1412d19ce6a..1ff71ac5ab413959f8f27e4a1509e1638a2a66e6 100644 --- a/kernel_fixtures.py +++ b/kernel_fixtures.py @@ -1,11 +1,5 @@ -import numpy as np -import pyopencl as cl -import pyopencl.array # noqa import loopy as lp # noqa -import device_fixtures as device -import program_fixtures as program -import transform_fixtures as transform import setup_fixtures as setup diff --git a/setup_fixtures.py b/setup_fixtures.py index 255adfcac99e21cd5e89037a020009f9c6d278bf..bcecb9eb354d4abaa81c7e5a929b6048cd0b3756 100644 --- a/setup_fixtures.py +++ b/setup_fixtures.py @@ -1,5 +1,6 @@ import numpy as np import pyopencl as cl +import pyopencl.array # noqa import device_fixtures as device diff --git a/test.py b/test.py index 8e883b84b2671f75d1dcd2a4988cd3f1abe36196..146e052c994dd5b9f24a078562debb3ee2198bce 100644 --- a/test.py +++ b/test.py @@ -39,7 +39,6 @@ def test_compute_flux_derivatives(ctx_factory): def test_compute_flux_derivatives_gpu(ctx_factory): queue = device.get_queue(ctx_factory) prg = program.get_weno() - prg = transform.weno_for_gpu(prg) prg = transform.compute_flux_derivative_gpu(queue, prg) params = setup.flux_derivative_params(ndim=3, nvars=5, n=10) diff --git a/transform_fixtures.py b/transform_fixtures.py index a985e97f60e58fdcb48bd2f9e2b08dba35b6be2b..f69581a045eae6d0289910fc635b2b289c0d5178 100644 --- a/transform_fixtures.py +++ b/transform_fixtures.py @@ -16,28 +16,10 @@ def compute_flux_derivative_basic(prg): return prg.with_kernel(cfd) -def compute_flux_derivative_gpu(queue, prg): - prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - - if 1: - with open("gen-code.cl", "w") as outf: - outf.write(lp.generate_code_v2(prg).device_code()) - - prg = lp.set_options(prg, no_numpy=True) - return prg - - def weno_for_gpu(prg): - cfd = prg["compute_flux_derivatives"] + prg = compute_flux_derivative_basic(prg) - cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") - - cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", - lp.AddressSpace.GLOBAL) + cfd = prg["compute_flux_derivatives"] for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6", "_7"]: cfd = lp.split_iname(cfd, "i"+suffix, 16, @@ -70,7 +52,14 @@ def weno_for_gpu(prg): return prg -def get_gpu_transformed_weno(): - import program_fixtures as program - prg = program.get_weno() - return weno_for_gpu(prg) +def compute_flux_derivative_gpu(queue, prg): + prg = weno_for_gpu(prg) + + prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) + + if 1: + with open("gen-code.cl", "w") as outf: + outf.write(lp.generate_code_v2(prg).device_code()) + + prg = lp.set_options(prg, no_numpy=True) + return prg