From 2d875121dad15d5a1f30d1ccacfb522177e7f054 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 27 May 2019 21:17:30 -0500 Subject: [PATCH 01/80] get rid of LoopyFixture class --- fixtures.py | 99 ++++++++++++++++++++++++++--------------------------- test.py | 27 +++++++-------- 2 files changed, 60 insertions(+), 66 deletions(-) diff --git a/fixtures.py b/fixtures.py index 257f582..73df2fe 100644 --- a/fixtures.py +++ b/fixtures.py @@ -8,6 +8,16 @@ import loopy as lp from pytest import approx +_WENO_PRG = [] +_QUEUE = [] + +def get_queue(ctx_factory): + if not _QUEUE: + ctx = ctx_factory() + _QUEUE.append(cl.CommandQueue(ctx)) + return _QUEUE[0] + + def with_root_kernel(prg, root_name): # FIXME This is a little less beautiful than it could be new_prg = prg.copy(name=root_name) @@ -20,70 +30,57 @@ def with_root_kernel(prg, root_name): return new_prg -class LoopyFixture: - _WENO_PRG = [] - _QUEUE = [] - - def __init__(self): - self.prg = self.get_weno_program() - - def get_weno_program(self): - if self._WENO_PRG: - return self._WENO_PRG[0] - - fn = "WENO.F90" +def get_weno_program(): + if _WENO_PRG: + return _WENO_PRG[0] - with open(fn, "r") as infile: - infile_content = infile.read() + fn = "WENO.F90" - prg = lp.parse_transformed_fortran(infile_content, filename=fn) - self._WENO_PRG.append(prg) - return prg + with open(fn, "r") as infile: + infile_content = infile.read() - def get_queue(self, ctx_factory): - if not self._QUEUE: - ctx = ctx_factory() - self._QUEUE.append(cl.CommandQueue(ctx)) - return self._QUEUE[0] + prg = lp.parse_transformed_fortran(infile_content, filename=fn) + _WENO_PRG.append(prg) + return prg - def random_array(self, *args): - return np.random.random_sample(args).astype(np.float32).copy(order="F") +def random_array(*args): + return np.random.random_sample(args).astype(np.float32).copy(order="F") - def mult_mat_vec(self, ctx_factory, alpha, a, b): - queue = self.get_queue(ctx_factory) +def mult_mat_vec(ctx_factory, alpha, a, b): + queue = get_queue(ctx_factory) - c_dev = cl.array.empty(queue, 10, dtype=np.float32) + c_dev = cl.array.empty(queue, 10, dtype=np.float32) - prg = with_root_kernel(self.prg, "mult_mat_vec") - prg(queue, a=a, b=b, c=c_dev, alpha=alpha) + prg = with_root_kernel(get_weno_program(), "mult_mat_vec") + prg(queue, a=a, b=b, c=c_dev, alpha=alpha) - return c_dev.get() + return c_dev.get() - def compute_flux_derivatives(self, ctx_factory, - nvars, ndim, nx, ny, nz, - states, fluxes, metrics, metric_jacobians): +def compute_flux_derivatives(ctx_factory, + nvars, ndim, nx, ny, nz, + states, fluxes, metrics, metric_jacobians): - queue = self.get_queue(ctx_factory) + queue = get_queue(ctx_factory) - prg = self.prg - cfd = prg["compute_flux_derivatives"] + prg = get_weno_program() + cfd = prg["compute_flux_derivatives"] - cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") + cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") - cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", - lp.AddressSpace.GLOBAL) + cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", + lp.AddressSpace.GLOBAL) + cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", + lp.AddressSpace.GLOBAL) + cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", + lp.AddressSpace.GLOBAL) - prg = prg.with_kernel(cfd) + prg = prg.with_kernel(cfd) - flux_derivatives_dev = cl.array.empty(queue, (nvars, ndim, nx+6, ny+6, - nz+6), dtype=np.float32, order="F") + flux_derivatives_dev = cl.array.empty(queue, (nvars, ndim, nx+6, ny+6, + nz+6), dtype=np.float32, order="F") - prg(queue, nvars=nvars, ndim=ndim, - states=states, fluxes=fluxes, metrics=metrics, - metric_jacobians=metric_jacobians, - flux_derivatives=flux_derivatives_dev) - return flux_derivatives_dev.get() + prg(queue, nvars=nvars, ndim=ndim, + states=states, fluxes=fluxes, metrics=metrics, + metric_jacobians=metric_jacobians, + flux_derivatives=flux_derivatives_dev) + return flux_derivatives_dev.get() diff --git a/test.py b/test.py index 93890bc..1f4afa3 100644 --- a/test.py +++ b/test.py @@ -14,17 +14,14 @@ from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) -from fixtures import LoopyFixture - - -f = LoopyFixture() +import fixtures def test_matvec(ctx_factory): - a = f.random_array(10, 10) - b = f.random_array(10) + a = fixtures.random_array(10, 10) + b = fixtures.random_array(10) - c = f.mult_mat_vec(ctx_factory, a=a, b=b, alpha=1.0) + c = fixtures.mult_mat_vec(ctx_factory, a=a, b=b, alpha=1.0) assert la.norm(a@b - c, 2)/la.norm(c) < 1e-5 @@ -38,12 +35,12 @@ def test_compute_flux_derivatives(ctx_factory): ny = 10 nz = 10 - states = f.random_array(nvars, nx+6, ny+6, nz+6) - fluxes = f.random_array(nvars, ndim, nx+6, ny+6, nz+6) - metrics = f.random_array(ndim, ndim, nx+6, ny+6, nz+6) - metric_jacobians = f.random_array(nx+6, ny+6, nz+6) + states = fixtures.random_array(nvars, nx+6, ny+6, nz+6) + fluxes = fixtures.random_array(nvars, ndim, nx+6, ny+6, nz+6) + metrics = fixtures.random_array(ndim, ndim, nx+6, ny+6, nz+6) + metric_jacobians = fixtures.random_array(nx+6, ny+6, nz+6) - f.compute_flux_derivatives(ctx_factory, + fixtures.compute_flux_derivatives(ctx_factory, nvars=nvars, ndim=ndim, nx=nx, ny=ny, nz=nz, states=states, fluxes=fluxes, metrics=metrics, metric_jacobians=metric_jacobians) @@ -55,7 +52,7 @@ def f_array(queue, *shape): def get_gpu_transformed_weno(): - prg = f.prg + prg = fixtures.get_weno_program() cfd = prg["compute_flux_derivatives"] @@ -104,7 +101,7 @@ def test_compute_flux_derivatives_gpu(ctx_factory): prg = get_gpu_transformed_weno() - queue = f.get_queue(ctx_factory) + queue = fixtures.get_queue(ctx_factory) ndim = 3 nvars = 5 @@ -144,7 +141,7 @@ def benchmark_compute_flux_derivatives_gpu(ctx_factory): prg = get_gpu_transformed_weno() - queue = f.get_queue(ctx_factory) + queue = fixtures.get_queue(ctx_factory) ndim = 3 nvars = 5 -- GitLab From 6b72ba01d0ecccb75d0d499bacb47261b3c42e69 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 27 May 2019 21:44:37 -0500 Subject: [PATCH 02/80] move everything except test functions to fixtures.py. move benchmark code to benchmark.py --- benchmark.py | 106 ++++++++++++++++++++++++++++++++++++++ fixtures.py | 49 ++++++++++++++++++ test.py | 141 ++------------------------------------------------- 3 files changed, 160 insertions(+), 136 deletions(-) create mode 100644 benchmark.py diff --git a/benchmark.py b/benchmark.py new file mode 100644 index 0000000..f87a219 --- /dev/null +++ b/benchmark.py @@ -0,0 +1,106 @@ +import numpy as np +import numpy.linalg as la +import pyopencl as cl +import pyopencl.array # noqa +import pyopencl.tools # noqa +import pyopencl.clrandom # noqa +import loopy as lp # noqa +import sys + +import logging + +import pytest +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl + as pytest_generate_tests) + +import fixtures + +def benchmark_compute_flux_derivatives_gpu(ctx_factory): + logging.basicConfig(level="INFO") + + prg = fixtures.get_gpu_transformed_weno() + + queue = fixtures.get_queue(ctx_factory) + + ndim = 3 + nvars = 5 + n = 16*16 + nx = n + ny = n + nz = n + + print("ARRAY GEN") + states = fixtures.f_array(queue, nvars, nx+6, ny+6, nz+6) + fluxes = fixtures.f_array(queue, nvars, ndim, nx+6, ny+6, nz+6) + metrics = fixtures.f_array(queue, ndim, ndim, nx+6, ny+6, nz+6) + metric_jacobians = fixtures.f_array(queue, nx+6, ny+6, nz+6) + print("END ARRAY GEN") + + flux_derivatives_dev = cl.array.empty(queue, (nvars, ndim, nx+6, ny+6, + nz+6), dtype=np.float32, order="F") + + prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) + + if 0: + with open("gen-code.cl", "w") as outf: + outf.write(lp.generate_code_v2(prg).device_code()) + + prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) + prg = lp.set_options(prg, ignore_boostable_into=True) + prg = lp.set_options(prg, no_numpy=True) + #prg = lp.set_options(prg, write_wrapper=True) + #op_map = lp.get_op_map(prg, count_redundant_work=False) + #print(op_map) + + allocator = pyopencl.tools.MemoryPool(pyopencl.tools.ImmediateAllocator(queue)) + + from functools import partial + run = partial(prg, queue, nvars=nvars, ndim=ndim, + states=states, fluxes=fluxes, metrics=metrics, + metric_jacobians=metric_jacobians, + flux_derivatives=flux_derivatives_dev, + allocator=allocator) + + # {{{ monkeypatch enqueue_nd_range_kernel to trace + + if 0: + old_enqueue_nd_range_kernel = cl.enqueue_nd_range_kernel + + def enqueue_nd_range_kernel_wrapper(queue, ker, *args, **kwargs): + print(f"Enqueueing {ker.function_name}") + return old_enqueue_nd_range_kernel(queue, ker, *args, **kwargs) + + cl.enqueue_nd_range_kernel = enqueue_nd_range_kernel_wrapper + + # }}} + + print("warmup") + for iwarmup_round in range(2): + run() + + nrounds = 10 + + queue.finish() + print("timing") + from time import time + start = time() + + for iround in range(nrounds): + run() + + queue.finish() + one_round = (time() - start)/nrounds + + print(f"M RHSs/s: {ndim*nvars*n**3/one_round/1e6}") + print(f"elapsed per round: {one_round} s") + print(f"Output size: {flux_derivatives_dev.nbytes/1e6} MB") + + +if __name__ == "__main__": + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + benchmark_compute_flux_derivatives_gpu(cl._csc): + #from pytest import main + #main([__file__]) diff --git a/fixtures.py b/fixtures.py index 73df2fe..3ec8efc 100644 --- a/fixtures.py +++ b/fixtures.py @@ -11,6 +11,51 @@ from pytest import approx _WENO_PRG = [] _QUEUE = [] +def get_gpu_transformed_weno(): + prg = fixtures.get_weno_program() + + cfd = prg["compute_flux_derivatives"] + + cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") + + cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", + lp.AddressSpace.GLOBAL) + cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", + lp.AddressSpace.GLOBAL) + cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", + lp.AddressSpace.GLOBAL) + + for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6", "_7"]: + cfd = lp.split_iname(cfd, "i"+suffix, 16, + outer_tag="g.0", inner_tag="l.0") + cfd = lp.split_iname(cfd, "j"+suffix, 16, + outer_tag="g.1", inner_tag="l.1") + + for var_name in ["delta_xi", "delta_eta", "delta_zeta"]: + cfd = lp.assignment_to_subst(cfd, var_name) + + cfd = lp.add_barrier(cfd, "tag:to_generalized", "tag:flux_x_compute") + cfd = lp.add_barrier(cfd, "tag:flux_x_compute", "tag:flux_x_diff") + cfd = lp.add_barrier(cfd, "tag:flux_x_diff", "tag:flux_y_compute") + cfd = lp.add_barrier(cfd, "tag:flux_y_compute", "tag:flux_y_diff") + cfd = lp.add_barrier(cfd, "tag:flux_y_diff", "tag:flux_z_compute") + cfd = lp.add_barrier(cfd, "tag:flux_z_compute", "tag:flux_z_diff") + cfd = lp.add_barrier(cfd, "tag:flux_z_diff", "tag:from_generalized") + + prg = prg.with_kernel(cfd) + + # FIXME: These should work, but don't + # FIXME: Undo the hand-inlining in WENO.F90 + #prg = lp.inline_callable_kernel(prg, "convert_to_generalized") + #prg = lp.inline_callable_kernel(prg, "convert_from_generalized") + + if 0: + print(prg["convert_to_generalized_frozen"]) + 1/0 + + return prg + + def get_queue(ctx_factory): if not _QUEUE: ctx = ctx_factory() @@ -43,6 +88,10 @@ def get_weno_program(): _WENO_PRG.append(prg) return prg +def f_array(queue, *shape): + ary = np.random.random_sample(shape).astype(np.float32).copy(order="F") + return cl.array.to_device(queue, ary) + def random_array(*args): return np.random.random_sample(args).astype(np.float32).copy(order="F") diff --git a/test.py b/test.py index 1f4afa3..5db0b73 100644 --- a/test.py +++ b/test.py @@ -46,60 +46,10 @@ def test_compute_flux_derivatives(ctx_factory): metric_jacobians=metric_jacobians) -def f_array(queue, *shape): - ary = np.random.random_sample(shape).astype(np.float32).copy(order="F") - return cl.array.to_device(queue, ary) - - -def get_gpu_transformed_weno(): - prg = fixtures.get_weno_program() - - cfd = prg["compute_flux_derivatives"] - - cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") - - cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", - lp.AddressSpace.GLOBAL) - - for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6", "_7"]: - cfd = lp.split_iname(cfd, "i"+suffix, 16, - outer_tag="g.0", inner_tag="l.0") - cfd = lp.split_iname(cfd, "j"+suffix, 16, - outer_tag="g.1", inner_tag="l.1") - - for var_name in ["delta_xi", "delta_eta", "delta_zeta"]: - cfd = lp.assignment_to_subst(cfd, var_name) - - cfd = lp.add_barrier(cfd, "tag:to_generalized", "tag:flux_x_compute") - cfd = lp.add_barrier(cfd, "tag:flux_x_compute", "tag:flux_x_diff") - cfd = lp.add_barrier(cfd, "tag:flux_x_diff", "tag:flux_y_compute") - cfd = lp.add_barrier(cfd, "tag:flux_y_compute", "tag:flux_y_diff") - cfd = lp.add_barrier(cfd, "tag:flux_y_diff", "tag:flux_z_compute") - cfd = lp.add_barrier(cfd, "tag:flux_z_compute", "tag:flux_z_diff") - cfd = lp.add_barrier(cfd, "tag:flux_z_diff", "tag:from_generalized") - - prg = prg.with_kernel(cfd) - - # FIXME: These should work, but don't - # FIXME: Undo the hand-inlining in WENO.F90 - #prg = lp.inline_callable_kernel(prg, "convert_to_generalized") - #prg = lp.inline_callable_kernel(prg, "convert_from_generalized") - - if 0: - print(prg["convert_to_generalized_frozen"]) - 1/0 - - return prg - - def test_compute_flux_derivatives_gpu(ctx_factory): logging.basicConfig(level="INFO") - prg = get_gpu_transformed_weno() + prg = fixtures.get_gpu_transformed_weno() queue = fixtures.get_queue(ctx_factory) @@ -109,10 +59,10 @@ def test_compute_flux_derivatives_gpu(ctx_factory): ny = 10 nz = 10 - states = f_array(queue, nvars, nx+6, ny+6, nz+6) - fluxes = f_array(queue, nvars, ndim, nx+6, ny+6, nz+6) - metrics = f_array(queue, ndim, ndim, nx+6, ny+6, nz+6) - metric_jacobians = f_array(queue, nx+6, ny+6, nz+6) + states = fixtures.f_array(queue, nvars, nx+6, ny+6, nz+6) + fluxes = fixtures.f_array(queue, nvars, ndim, nx+6, ny+6, nz+6) + metrics = fixtures.f_array(queue, ndim, ndim, nx+6, ny+6, nz+6) + metric_jacobians = fixtures.f_array(queue, nx+6, ny+6, nz+6) flux_derivatives_dev = cl.array.empty(queue, (nvars, ndim, nx+6, ny+6, nz+6), dtype=np.float32, order="F") @@ -136,87 +86,6 @@ def test_compute_flux_derivatives_gpu(ctx_factory): flux_derivatives=flux_derivatives_dev) -def benchmark_compute_flux_derivatives_gpu(ctx_factory): - logging.basicConfig(level="INFO") - - prg = get_gpu_transformed_weno() - - queue = fixtures.get_queue(ctx_factory) - - ndim = 3 - nvars = 5 - n = 16*16 - nx = n - ny = n - nz = n - - print("ARRAY GEN") - states = f_array(queue, nvars, nx+6, ny+6, nz+6) - fluxes = f_array(queue, nvars, ndim, nx+6, ny+6, nz+6) - metrics = f_array(queue, ndim, ndim, nx+6, ny+6, nz+6) - metric_jacobians = f_array(queue, nx+6, ny+6, nz+6) - print("END ARRAY GEN") - - flux_derivatives_dev = cl.array.empty(queue, (nvars, ndim, nx+6, ny+6, - nz+6), dtype=np.float32, order="F") - - prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - - if 0: - with open("gen-code.cl", "w") as outf: - outf.write(lp.generate_code_v2(prg).device_code()) - - prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - prg = lp.set_options(prg, ignore_boostable_into=True) - prg = lp.set_options(prg, no_numpy=True) - #prg = lp.set_options(prg, write_wrapper=True) - #op_map = lp.get_op_map(prg, count_redundant_work=False) - #print(op_map) - - allocator = pyopencl.tools.MemoryPool(pyopencl.tools.ImmediateAllocator(queue)) - - from functools import partial - run = partial(prg, queue, nvars=nvars, ndim=ndim, - states=states, fluxes=fluxes, metrics=metrics, - metric_jacobians=metric_jacobians, - flux_derivatives=flux_derivatives_dev, - allocator=allocator) - - # {{{ monkeypatch enqueue_nd_range_kernel to trace - - if 0: - old_enqueue_nd_range_kernel = cl.enqueue_nd_range_kernel - - def enqueue_nd_range_kernel_wrapper(queue, ker, *args, **kwargs): - print(f"Enqueueing {ker.function_name}") - return old_enqueue_nd_range_kernel(queue, ker, *args, **kwargs) - - cl.enqueue_nd_range_kernel = enqueue_nd_range_kernel_wrapper - - # }}} - - print("warmup") - for iwarmup_round in range(2): - run() - - nrounds = 10 - - queue.finish() - print("timing") - from time import time - start = time() - - for iround in range(nrounds): - run() - - queue.finish() - one_round = (time() - start)/nrounds - - print(f"M RHSs/s: {ndim*nvars*n**3/one_round/1e6}") - print(f"elapsed per round: {one_round} s") - print(f"Output size: {flux_derivatives_dev.nbytes/1e6} MB") - - # This lets you run 'python test.py test_case(cl._csc)' without pytest. if __name__ == "__main__": if len(sys.argv) > 1: -- GitLab From ad46040fd11177512fc8febf7f1e35848f24a3ba Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 27 May 2019 21:53:54 -0500 Subject: [PATCH 03/80] cleanup and bugfixes from moving things around --- benchmark.py | 4 +--- fixtures.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/benchmark.py b/benchmark.py index f87a219..266d998 100644 --- a/benchmark.py +++ b/benchmark.py @@ -101,6 +101,4 @@ if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) else: - benchmark_compute_flux_derivatives_gpu(cl._csc): - #from pytest import main - #main([__file__]) + benchmark_compute_flux_derivatives_gpu(cl._csc) diff --git a/fixtures.py b/fixtures.py index 3ec8efc..e28fa7d 100644 --- a/fixtures.py +++ b/fixtures.py @@ -12,7 +12,7 @@ _WENO_PRG = [] _QUEUE = [] def get_gpu_transformed_weno(): - prg = fixtures.get_weno_program() + prg = get_weno_program() cfd = prg["compute_flux_derivatives"] -- GitLab From bbceb1a11ad1f0a21bdf5b390c9bb1fefa4a48da Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 27 May 2019 22:03:24 -0500 Subject: [PATCH 04/80] added file for comparison fixtures, move array comparison there --- comparison_fixtures.py | 4 ++++ test.py | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 comparison_fixtures.py diff --git a/comparison_fixtures.py b/comparison_fixtures.py new file mode 100644 index 0000000..4d2be8b --- /dev/null +++ b/comparison_fixtures.py @@ -0,0 +1,4 @@ +import numpy.linalg as la + +def arrays(a, b): + assert la.norm(a - b, 2)/la.norm(b) < 1e-5 diff --git a/test.py b/test.py index 5db0b73..198cf24 100644 --- a/test.py +++ b/test.py @@ -15,7 +15,7 @@ from pyopencl.tools import ( # noqa as pytest_generate_tests) import fixtures - +import comparison_fixtures as compare def test_matvec(ctx_factory): a = fixtures.random_array(10, 10) @@ -23,9 +23,10 @@ def test_matvec(ctx_factory): c = fixtures.mult_mat_vec(ctx_factory, a=a, b=b, alpha=1.0) - assert la.norm(a@b - c, 2)/la.norm(c) < 1e-5 + compare.arrays(a@b, c) +@pytest.mark.skip("slow") def test_compute_flux_derivatives(ctx_factory): logging.basicConfig(level="INFO") @@ -46,6 +47,7 @@ def test_compute_flux_derivatives(ctx_factory): metric_jacobians=metric_jacobians) +@pytest.mark.skip("slow") def test_compute_flux_derivatives_gpu(ctx_factory): logging.basicConfig(level="INFO") -- GitLab From 5ebbef78733e9897d9583707fe9fb3776996786b Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 27 May 2019 22:08:02 -0500 Subject: [PATCH 05/80] change array comparison to use pytest approx --- comparison_fixtures.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/comparison_fixtures.py b/comparison_fixtures.py index 4d2be8b..fa0cde6 100644 --- a/comparison_fixtures.py +++ b/comparison_fixtures.py @@ -1,4 +1,4 @@ -import numpy.linalg as la +from pytest import approx def arrays(a, b): - assert la.norm(a - b, 2)/la.norm(b) < 1e-5 + assert a == approx(b) -- GitLab From 1559021f910e046be921a0d4a1c8e0b1104b041d Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 27 May 2019 22:11:09 -0500 Subject: [PATCH 06/80] formatting in comparison_fixtures --- comparison_fixtures.py | 1 + 1 file changed, 1 insertion(+) diff --git a/comparison_fixtures.py b/comparison_fixtures.py index fa0cde6..e21ec0c 100644 --- a/comparison_fixtures.py +++ b/comparison_fixtures.py @@ -1,4 +1,5 @@ from pytest import approx + def arrays(a, b): assert a == approx(b) -- GitLab From 578dc7c4456c8ce79ce9a88eaf5c0e1683bd1929 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 27 May 2019 22:12:36 -0500 Subject: [PATCH 07/80] move random_array to new file setup_fixtures.py --- fixtures.py | 3 --- setup_fixtures.py | 5 +++++ test.py | 15 ++++++++------- 3 files changed, 13 insertions(+), 10 deletions(-) create mode 100644 setup_fixtures.py diff --git a/fixtures.py b/fixtures.py index e28fa7d..d11810c 100644 --- a/fixtures.py +++ b/fixtures.py @@ -92,9 +92,6 @@ def f_array(queue, *shape): ary = np.random.random_sample(shape).astype(np.float32).copy(order="F") return cl.array.to_device(queue, ary) -def random_array(*args): - return np.random.random_sample(args).astype(np.float32).copy(order="F") - def mult_mat_vec(ctx_factory, alpha, a, b): queue = get_queue(ctx_factory) diff --git a/setup_fixtures.py b/setup_fixtures.py new file mode 100644 index 0000000..5a36bd8 --- /dev/null +++ b/setup_fixtures.py @@ -0,0 +1,5 @@ +import numpy as np + + +def random_array(*shape): + return np.random.random_sample(shape).astype(np.float32).copy(order="F") diff --git a/test.py b/test.py index 198cf24..7a8d406 100644 --- a/test.py +++ b/test.py @@ -16,17 +16,18 @@ from pyopencl.tools import ( # noqa import fixtures import comparison_fixtures as compare +import setup_fixtures as setup def test_matvec(ctx_factory): - a = fixtures.random_array(10, 10) - b = fixtures.random_array(10) + a = setup.random_array(10, 10) + b = setup.random_array(10) c = fixtures.mult_mat_vec(ctx_factory, a=a, b=b, alpha=1.0) compare.arrays(a@b, c) -@pytest.mark.skip("slow") +#@pytest.mark.skip("slow") def test_compute_flux_derivatives(ctx_factory): logging.basicConfig(level="INFO") @@ -36,10 +37,10 @@ def test_compute_flux_derivatives(ctx_factory): ny = 10 nz = 10 - states = fixtures.random_array(nvars, nx+6, ny+6, nz+6) - fluxes = fixtures.random_array(nvars, ndim, nx+6, ny+6, nz+6) - metrics = fixtures.random_array(ndim, ndim, nx+6, ny+6, nz+6) - metric_jacobians = fixtures.random_array(nx+6, ny+6, nz+6) + states = setup.random_array(nvars, nx+6, ny+6, nz+6) + fluxes = setup.random_array(nvars, ndim, nx+6, ny+6, nz+6) + metrics = setup.random_array(ndim, ndim, nx+6, ny+6, nz+6) + metric_jacobians = setup.random_array(nx+6, ny+6, nz+6) fixtures.compute_flux_derivatives(ctx_factory, nvars=nvars, ndim=ndim, nx=nx, ny=ny, nz=nz, -- GitLab From 340b78d89b363f7013db3c27edffc2bdc125af9a Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 27 May 2019 22:36:52 -0500 Subject: [PATCH 08/80] create new kernel_fixtures.py for direct interface with Loopy kernels --- fixtures.py | 10 ---------- kernel_fixtures.py | 15 +++++++++++++++ test.py | 6 ++++-- 3 files changed, 19 insertions(+), 12 deletions(-) create mode 100644 kernel_fixtures.py diff --git a/fixtures.py b/fixtures.py index d11810c..10d0f45 100644 --- a/fixtures.py +++ b/fixtures.py @@ -92,16 +92,6 @@ def f_array(queue, *shape): ary = np.random.random_sample(shape).astype(np.float32).copy(order="F") return cl.array.to_device(queue, ary) -def mult_mat_vec(ctx_factory, alpha, a, b): - queue = get_queue(ctx_factory) - - c_dev = cl.array.empty(queue, 10, dtype=np.float32) - - prg = with_root_kernel(get_weno_program(), "mult_mat_vec") - prg(queue, a=a, b=b, c=c_dev, alpha=alpha) - - return c_dev.get() - def compute_flux_derivatives(ctx_factory, nvars, ndim, nx, ny, nz, states, fluxes, metrics, metric_jacobians): diff --git a/kernel_fixtures.py b/kernel_fixtures.py new file mode 100644 index 0000000..d480a5d --- /dev/null +++ b/kernel_fixtures.py @@ -0,0 +1,15 @@ +import numpy as np +import pyopencl as cl + +import fixtures + +def mult_mat_vec(ctx_factory, alpha, a, b): + queue = fixtures.get_queue(ctx_factory) + + c_dev = cl.array.empty(queue, 10, dtype=np.float32) + + prg = fixtures.with_root_kernel(fixtures.get_weno_program(), "mult_mat_vec") + prg(queue, a=a, b=b, c=c_dev, alpha=alpha) + + return c_dev.get() + diff --git a/test.py b/test.py index 7a8d406..555e953 100644 --- a/test.py +++ b/test.py @@ -17,17 +17,19 @@ from pyopencl.tools import ( # noqa import fixtures import comparison_fixtures as compare import setup_fixtures as setup +import kernel_fixtures as kernel + def test_matvec(ctx_factory): a = setup.random_array(10, 10) b = setup.random_array(10) - c = fixtures.mult_mat_vec(ctx_factory, a=a, b=b, alpha=1.0) + c = kernel.mult_mat_vec(ctx_factory, a=a, b=b, alpha=1.0) compare.arrays(a@b, c) -#@pytest.mark.skip("slow") +@pytest.mark.skip("slow") def test_compute_flux_derivatives(ctx_factory): logging.basicConfig(level="INFO") -- GitLab From 0994a5f0c3d32987fff45605e64c2d6b69f4e4bf Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 27 May 2019 22:46:51 -0500 Subject: [PATCH 09/80] move logging.basicConfig to bottom of file since we only see logging output if pytest doesn't run --- test.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/test.py b/test.py index 555e953..3836923 100644 --- a/test.py +++ b/test.py @@ -31,8 +31,6 @@ def test_matvec(ctx_factory): @pytest.mark.skip("slow") def test_compute_flux_derivatives(ctx_factory): - logging.basicConfig(level="INFO") - ndim = 3 nvars = 5 nx = 10 @@ -52,8 +50,6 @@ def test_compute_flux_derivatives(ctx_factory): @pytest.mark.skip("slow") def test_compute_flux_derivatives_gpu(ctx_factory): - logging.basicConfig(level="INFO") - prg = fixtures.get_gpu_transformed_weno() queue = fixtures.get_queue(ctx_factory) @@ -94,6 +90,7 @@ def test_compute_flux_derivatives_gpu(ctx_factory): # This lets you run 'python test.py test_case(cl._csc)' without pytest. if __name__ == "__main__": if len(sys.argv) > 1: + logging.basicConfig(level="INFO") exec(sys.argv[1]) else: from pytest import main -- GitLab From df8afa7724e7e169e0ff11079c4c6141d28e4aaa Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 27 May 2019 22:52:29 -0500 Subject: [PATCH 10/80] added objects to hold params and arrays for flux derivative computation --- setup_fixtures.py | 17 +++++++++++++++++ test.py | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/setup_fixtures.py b/setup_fixtures.py index 5a36bd8..3b51fce 100644 --- a/setup_fixtures.py +++ b/setup_fixtures.py @@ -1,5 +1,22 @@ import numpy as np +class FluxDerivativeParams: + def __init__(self, nvars, ndim, nx, ny, nz): + self.nvars = nvars + self.ndim = ndim + self.nx = nx + self.ny = ny + self.nz = nz + + +class FluxDerivativeArrays: + def __init__(self, states, fluxes, metrics, metric_jacobians): + self.states = states + self.fluxes = fluxes + self.metrics = metrics + self.metric_jacobians = metric_jacobians + + def random_array(*shape): return np.random.random_sample(shape).astype(np.float32).copy(order="F") diff --git a/test.py b/test.py index 3836923..999959b 100644 --- a/test.py +++ b/test.py @@ -29,8 +29,8 @@ def test_matvec(ctx_factory): compare.arrays(a@b, c) -@pytest.mark.skip("slow") def test_compute_flux_derivatives(ctx_factory): + params = setup.FluxDerivativeParams(ndim=3, nvars=5, nx=10, ny=10, nz=10) ndim = 3 nvars = 5 nx = 10 -- GitLab From 81a6c748e62387606b9ebb80aca112530bfcdf0e Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 27 May 2019 22:58:13 -0500 Subject: [PATCH 11/80] add FluxDerivativeArrays object and setup fixture for multiple arrays --- setup_fixtures.py | 16 ++++++++++++++++ test.py | 2 ++ 2 files changed, 18 insertions(+) diff --git a/setup_fixtures.py b/setup_fixtures.py index 3b51fce..9e24722 100644 --- a/setup_fixtures.py +++ b/setup_fixtures.py @@ -8,6 +8,7 @@ class FluxDerivativeParams: self.nx = nx self.ny = ny self.nz = nz + self.nhalo = 6 class FluxDerivativeArrays: @@ -20,3 +21,18 @@ class FluxDerivativeArrays: def random_array(*shape): return np.random.random_sample(shape).astype(np.float32).copy(order="F") + + +def random_flux_derivative_arrays(params): + nvars = params.nvars + ndim = params.ndim + nx_halo = params.nx + params.nhalo + ny_halo = params.ny + params.nhalo + nz_halo = params.nz + params.nhalo + + states = random_array(nvars, nx_halo, ny_halo, nz_halo) + fluxes = random_array(nvars, ndim, nx_halo, ny_halo, nz_halo) + metrics = random_array(ndim, ndim, nx_halo, ny_halo, nz_halo) + metric_jacobians = random_array(nx_halo, ny_halo, nz_halo) + + return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) diff --git a/test.py b/test.py index 999959b..d99f84f 100644 --- a/test.py +++ b/test.py @@ -31,6 +31,8 @@ def test_matvec(ctx_factory): def test_compute_flux_derivatives(ctx_factory): params = setup.FluxDerivativeParams(ndim=3, nvars=5, nx=10, ny=10, nz=10) + arrays = setup.random_flux_derivative_arrays(params) + ndim = 3 nvars = 5 nx = 10 -- GitLab From 08f9f2f31308a2a0d62387c4f977088dcd5e9646 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 27 May 2019 23:06:25 -0500 Subject: [PATCH 12/80] simplify interface to kernel.compute_flux_derivative --- fixtures.py | 29 ----------------------------- kernel_fixtures.py | 32 ++++++++++++++++++++++++++++++++ test.py | 17 +---------------- 3 files changed, 33 insertions(+), 45 deletions(-) diff --git a/fixtures.py b/fixtures.py index 10d0f45..c13c2e2 100644 --- a/fixtures.py +++ b/fixtures.py @@ -91,32 +91,3 @@ def get_weno_program(): def f_array(queue, *shape): ary = np.random.random_sample(shape).astype(np.float32).copy(order="F") return cl.array.to_device(queue, ary) - -def compute_flux_derivatives(ctx_factory, - nvars, ndim, nx, ny, nz, - states, fluxes, metrics, metric_jacobians): - - queue = get_queue(ctx_factory) - - prg = get_weno_program() - cfd = prg["compute_flux_derivatives"] - - cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") - - cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", - lp.AddressSpace.GLOBAL) - - prg = prg.with_kernel(cfd) - - flux_derivatives_dev = cl.array.empty(queue, (nvars, ndim, nx+6, ny+6, - nz+6), dtype=np.float32, order="F") - - prg(queue, nvars=nvars, ndim=ndim, - states=states, fluxes=fluxes, metrics=metrics, - metric_jacobians=metric_jacobians, - flux_derivatives=flux_derivatives_dev) - return flux_derivatives_dev.get() diff --git a/kernel_fixtures.py b/kernel_fixtures.py index d480a5d..e905363 100644 --- a/kernel_fixtures.py +++ b/kernel_fixtures.py @@ -1,8 +1,10 @@ import numpy as np import pyopencl as cl +import loopy as lp # noqa import fixtures + def mult_mat_vec(ctx_factory, alpha, a, b): queue = fixtures.get_queue(ctx_factory) @@ -13,3 +15,33 @@ def mult_mat_vec(ctx_factory, alpha, a, b): return c_dev.get() + +def compute_flux_derivatives(ctx_factory, params, arrays): + queue = fixtures.get_queue(ctx_factory) + + prg = fixtures.get_weno_program() + cfd = prg["compute_flux_derivatives"] + + cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") + + cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", + lp.AddressSpace.GLOBAL) + cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", + lp.AddressSpace.GLOBAL) + cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", + lp.AddressSpace.GLOBAL) + + prg = prg.with_kernel(cfd) + + nx_halo = params.nx + params.nhalo + ny_halo = params.ny + params.nhalo + nz_halo = params.nz + params.nhalo + + flux_derivatives_dev = cl.array.empty(queue, (params.nvars, params.ndim, + nx_halo, ny_halo, nz_halo), dtype=np.float32, order="F") + + prg(queue, nvars=params.nvars, ndim=params.ndim, + states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, + metric_jacobians=arrays.metric_jacobians, + flux_derivatives=flux_derivatives_dev) + return flux_derivatives_dev.get() diff --git a/test.py b/test.py index d99f84f..157c3bd 100644 --- a/test.py +++ b/test.py @@ -32,22 +32,7 @@ def test_matvec(ctx_factory): def test_compute_flux_derivatives(ctx_factory): params = setup.FluxDerivativeParams(ndim=3, nvars=5, nx=10, ny=10, nz=10) arrays = setup.random_flux_derivative_arrays(params) - - ndim = 3 - nvars = 5 - nx = 10 - ny = 10 - nz = 10 - - states = setup.random_array(nvars, nx+6, ny+6, nz+6) - fluxes = setup.random_array(nvars, ndim, nx+6, ny+6, nz+6) - metrics = setup.random_array(ndim, ndim, nx+6, ny+6, nz+6) - metric_jacobians = setup.random_array(nx+6, ny+6, nz+6) - - fixtures.compute_flux_derivatives(ctx_factory, - nvars=nvars, ndim=ndim, nx=nx, ny=ny, nz=nz, - states=states, fluxes=fluxes, metrics=metrics, - metric_jacobians=metric_jacobians) + kernel.compute_flux_derivatives(ctx_factory, params, arrays) @pytest.mark.skip("slow") -- GitLab From 01f7e71e331de0c37f6245b4728819db50bfa4ce Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 27 May 2019 23:16:31 -0500 Subject: [PATCH 13/80] remove a kernel call that seems like a duplicate --- test.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/test.py b/test.py index 157c3bd..00b21ef 100644 --- a/test.py +++ b/test.py @@ -68,11 +68,6 @@ def test_compute_flux_derivatives_gpu(ctx_factory): metric_jacobians=metric_jacobians, flux_derivatives=flux_derivatives_dev) - prg(queue, nvars=nvars, ndim=ndim, - states=states, fluxes=fluxes, metrics=metrics, - metric_jacobians=metric_jacobians, - flux_derivatives=flux_derivatives_dev) - # This lets you run 'python test.py test_case(cl._csc)' without pytest. if __name__ == "__main__": -- GitLab From fc1b17b00c80f445e97dcb4e393ed73ad47b8e82 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 27 May 2019 23:36:28 -0500 Subject: [PATCH 14/80] move bulk of compute_flux_derivative_gpu code into test fixture --- kernel_fixtures.py | 25 +++++++++++++++++++++++++ setup_fixtures.py | 19 +++++++++++++++++++ test.py | 34 ++++------------------------------ 3 files changed, 48 insertions(+), 30 deletions(-) diff --git a/kernel_fixtures.py b/kernel_fixtures.py index e905363..0640e8c 100644 --- a/kernel_fixtures.py +++ b/kernel_fixtures.py @@ -45,3 +45,28 @@ def compute_flux_derivatives(ctx_factory, params, arrays): metric_jacobians=arrays.metric_jacobians, flux_derivatives=flux_derivatives_dev) return flux_derivatives_dev.get() + +def compute_flux_derivatives_gpu(ctx_factory, params, arrays): + prg = fixtures.get_gpu_transformed_weno() + + queue = fixtures.get_queue(ctx_factory) + + nx_halo = params.nx + params.nhalo + ny_halo = params.ny + params.nhalo + nz_halo = params.nz + params.nhalo + + flux_derivatives_dev = cl.array.empty(queue, (params.nvars, params.ndim, + nx_halo, ny_halo, nz_halo), dtype=np.float32, order="F") + + prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) + + if 1: + with open("gen-code.cl", "w") as outf: + outf.write(lp.generate_code_v2(prg).device_code()) + + prg = lp.set_options(prg, no_numpy=True) + + prg(queue, nvars=params.nvars, ndim=params.ndim, + states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, + metric_jacobians=arrays.metric_jacobians, + flux_derivatives=flux_derivatives_dev) diff --git a/setup_fixtures.py b/setup_fixtures.py index 9e24722..b576866 100644 --- a/setup_fixtures.py +++ b/setup_fixtures.py @@ -1,5 +1,7 @@ import numpy as np +import fixtures + class FluxDerivativeParams: def __init__(self, nvars, ndim, nx, ny, nz): @@ -36,3 +38,20 @@ def random_flux_derivative_arrays(params): metric_jacobians = random_array(nx_halo, ny_halo, nz_halo) return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) + + +def random_flux_derivative_arrays_on_device(ctx_factory, params): + queue = fixtures.get_queue(ctx_factory) + + nvars = params.nvars + ndim = params.ndim + nx_halo = params.nx + params.nhalo + ny_halo = params.ny + params.nhalo + nz_halo = params.nz + params.nhalo + + states = fixtures.f_array(queue, nvars, nx_halo, ny_halo, nz_halo) + fluxes = fixtures.f_array(queue, nvars, ndim, nx_halo, ny_halo, nz_halo) + metrics = fixtures.f_array(queue, ndim, ndim, nx_halo, ny_halo, nz_halo) + metric_jacobians = fixtures.f_array(queue, nx_halo, ny_halo, nz_halo) + + return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) diff --git a/test.py b/test.py index 00b21ef..914e029 100644 --- a/test.py +++ b/test.py @@ -32,41 +32,15 @@ def test_matvec(ctx_factory): def test_compute_flux_derivatives(ctx_factory): params = setup.FluxDerivativeParams(ndim=3, nvars=5, nx=10, ny=10, nz=10) arrays = setup.random_flux_derivative_arrays(params) + kernel.compute_flux_derivatives(ctx_factory, params, arrays) -@pytest.mark.skip("slow") def test_compute_flux_derivatives_gpu(ctx_factory): - prg = fixtures.get_gpu_transformed_weno() - - queue = fixtures.get_queue(ctx_factory) - - ndim = 3 - nvars = 5 - nx = 10 - ny = 10 - nz = 10 - - states = fixtures.f_array(queue, nvars, nx+6, ny+6, nz+6) - fluxes = fixtures.f_array(queue, nvars, ndim, nx+6, ny+6, nz+6) - metrics = fixtures.f_array(queue, ndim, ndim, nx+6, ny+6, nz+6) - metric_jacobians = fixtures.f_array(queue, nx+6, ny+6, nz+6) - - flux_derivatives_dev = cl.array.empty(queue, (nvars, ndim, nx+6, ny+6, - nz+6), dtype=np.float32, order="F") - - prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - - if 1: - with open("gen-code.cl", "w") as outf: - outf.write(lp.generate_code_v2(prg).device_code()) - - prg = lp.set_options(prg, no_numpy=True) + params = setup.FluxDerivativeParams(ndim=3, nvars=5, nx=10, ny=10, nz=10) + arrays = setup.random_flux_derivative_arrays_on_device(ctx_factory, params) - prg(queue, nvars=nvars, ndim=ndim, - states=states, fluxes=fluxes, metrics=metrics, - metric_jacobians=metric_jacobians, - flux_derivatives=flux_derivatives_dev) + kernel.compute_flux_derivatives_gpu(ctx_factory, params, arrays) # This lets you run 'python test.py test_case(cl._csc)' without pytest. -- GitLab From 8c911b6aa391c992a5ceb6d696a5dc60042eb267 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 27 May 2019 23:40:36 -0500 Subject: [PATCH 15/80] cleanup imports in test.py --- test.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/test.py b/test.py index 914e029..eaac9d7 100644 --- a/test.py +++ b/test.py @@ -1,12 +1,4 @@ -import numpy as np -import numpy.linalg as la -import pyopencl as cl -import pyopencl.array # noqa -import pyopencl.tools # noqa -import pyopencl.clrandom # noqa -import loopy as lp # noqa import sys - import logging import pytest @@ -14,7 +6,6 @@ from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) -import fixtures import comparison_fixtures as compare import setup_fixtures as setup import kernel_fixtures as kernel @@ -49,5 +40,4 @@ if __name__ == "__main__": logging.basicConfig(level="INFO") exec(sys.argv[1]) else: - from pytest import main - main([__file__]) + pytest.main([__file__]) -- GitLab From 4b7ac4da133e47b2232e58eea52a7bef7802bfa6 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 27 May 2019 23:46:45 -0500 Subject: [PATCH 16/80] refactor halo computations into params class --- kernel_fixtures.py | 12 ++---------- setup_fixtures.py | 39 ++++++++++++++++----------------------- 2 files changed, 18 insertions(+), 33 deletions(-) diff --git a/kernel_fixtures.py b/kernel_fixtures.py index 0640e8c..e1ff329 100644 --- a/kernel_fixtures.py +++ b/kernel_fixtures.py @@ -33,12 +33,8 @@ def compute_flux_derivatives(ctx_factory, params, arrays): prg = prg.with_kernel(cfd) - nx_halo = params.nx + params.nhalo - ny_halo = params.ny + params.nhalo - nz_halo = params.nz + params.nhalo - flux_derivatives_dev = cl.array.empty(queue, (params.nvars, params.ndim, - nx_halo, ny_halo, nz_halo), dtype=np.float32, order="F") + params.nx_halo, params.ny_halo, params.nz_halo), dtype=np.float32, order="F") prg(queue, nvars=params.nvars, ndim=params.ndim, states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, @@ -51,12 +47,8 @@ def compute_flux_derivatives_gpu(ctx_factory, params, arrays): queue = fixtures.get_queue(ctx_factory) - nx_halo = params.nx + params.nhalo - ny_halo = params.ny + params.nhalo - nz_halo = params.nz + params.nhalo - flux_derivatives_dev = cl.array.empty(queue, (params.nvars, params.ndim, - nx_halo, ny_halo, nz_halo), dtype=np.float32, order="F") + params.nx_halo, params.ny_halo, params.nz_halo), dtype=np.float32, order="F") prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) diff --git a/setup_fixtures.py b/setup_fixtures.py index b576866..d3a236c 100644 --- a/setup_fixtures.py +++ b/setup_fixtures.py @@ -7,10 +7,15 @@ class FluxDerivativeParams: def __init__(self, nvars, ndim, nx, ny, nz): self.nvars = nvars self.ndim = ndim + self.nx = nx self.ny = ny self.nz = nz - self.nhalo = 6 + + self.nhalo = 3 + self.nx_halo = self.nx + 2*self.nhalo + self.ny_halo = self.ny + 2*self.nhalo + self.nz_halo = self.nz + 2*self.nhalo class FluxDerivativeArrays: @@ -25,33 +30,21 @@ def random_array(*shape): return np.random.random_sample(shape).astype(np.float32).copy(order="F") -def random_flux_derivative_arrays(params): - nvars = params.nvars - ndim = params.ndim - nx_halo = params.nx + params.nhalo - ny_halo = params.ny + params.nhalo - nz_halo = params.nz + params.nhalo - - states = random_array(nvars, nx_halo, ny_halo, nz_halo) - fluxes = random_array(nvars, ndim, nx_halo, ny_halo, nz_halo) - metrics = random_array(ndim, ndim, nx_halo, ny_halo, nz_halo) - metric_jacobians = random_array(nx_halo, ny_halo, nz_halo) +def random_flux_derivative_arrays(p): + states = random_array(p.nvars, p.nx_halo, p.ny_halo, p.nz_halo) + fluxes = random_array(p.nvars, p.ndim, p.nx_halo, p.ny_halo, p.nz_halo) + metrics = random_array(p.ndim, p.ndim, p.nx_halo, p.ny_halo, p.nz_halo) + metric_jacobians = random_array(p.nx_halo, p.ny_halo, p.nz_halo) return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) -def random_flux_derivative_arrays_on_device(ctx_factory, params): +def random_flux_derivative_arrays_on_device(ctx_factory, p): queue = fixtures.get_queue(ctx_factory) - nvars = params.nvars - ndim = params.ndim - nx_halo = params.nx + params.nhalo - ny_halo = params.ny + params.nhalo - nz_halo = params.nz + params.nhalo - - states = fixtures.f_array(queue, nvars, nx_halo, ny_halo, nz_halo) - fluxes = fixtures.f_array(queue, nvars, ndim, nx_halo, ny_halo, nz_halo) - metrics = fixtures.f_array(queue, ndim, ndim, nx_halo, ny_halo, nz_halo) - metric_jacobians = fixtures.f_array(queue, nx_halo, ny_halo, nz_halo) + states = fixtures.f_array(queue, p.nvars, p.nx_halo, p.ny_halo, p.nz_halo) + fluxes = fixtures.f_array(queue, p.nvars, p.ndim, p.nx_halo, p.ny_halo, p.nz_halo) + metrics = fixtures.f_array(queue, p.ndim, p.ndim, p.nx_halo, p.ny_halo, p.nz_halo) + metric_jacobians = fixtures.f_array(queue, p.nx_halo, p.ny_halo, p.nz_halo) return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) -- GitLab From abf7ac90fc4a1c6594829b5d405ca8f4779dfd69 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 27 May 2019 23:58:55 -0500 Subject: [PATCH 17/80] move lists of bounds to FluxDerivativeParams --- setup_fixtures.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/setup_fixtures.py b/setup_fixtures.py index d3a236c..b5599da 100644 --- a/setup_fixtures.py +++ b/setup_fixtures.py @@ -17,6 +17,18 @@ class FluxDerivativeParams: self.ny_halo = self.ny + 2*self.nhalo self.nz_halo = self.nz + 2*self.nhalo + def state_bounds(self): + return self.nvars, self.nx_halo, self.ny_halo, self.nz_halo + + def flux_bounds(self): + return self.nvars, self.ndim, self.nx_halo, self.ny_halo, self.nz_halo + + def metric_bounds(self): + return self.ndim, self.ndim, self.nx_halo, self.ny_halo, self.nz_halo + + def jacobian_bounds(self): + return self.nx_halo, self.ny_halo, self.nz_halo + class FluxDerivativeArrays: def __init__(self, states, fluxes, metrics, metric_jacobians): @@ -30,21 +42,21 @@ def random_array(*shape): return np.random.random_sample(shape).astype(np.float32).copy(order="F") -def random_flux_derivative_arrays(p): - states = random_array(p.nvars, p.nx_halo, p.ny_halo, p.nz_halo) - fluxes = random_array(p.nvars, p.ndim, p.nx_halo, p.ny_halo, p.nz_halo) - metrics = random_array(p.ndim, p.ndim, p.nx_halo, p.ny_halo, p.nz_halo) - metric_jacobians = random_array(p.nx_halo, p.ny_halo, p.nz_halo) +def random_flux_derivative_arrays(params): + states = random_array(*params.state_bounds()) + fluxes = random_array(*params.flux_bounds()) + metrics = random_array(*params.metric_bounds()) + metric_jacobians = random_array(*params.jacobian_bounds()) return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) -def random_flux_derivative_arrays_on_device(ctx_factory, p): +def random_flux_derivative_arrays_on_device(ctx_factory, params): queue = fixtures.get_queue(ctx_factory) - states = fixtures.f_array(queue, p.nvars, p.nx_halo, p.ny_halo, p.nz_halo) - fluxes = fixtures.f_array(queue, p.nvars, p.ndim, p.nx_halo, p.ny_halo, p.nz_halo) - metrics = fixtures.f_array(queue, p.ndim, p.ndim, p.nx_halo, p.ny_halo, p.nz_halo) - metric_jacobians = fixtures.f_array(queue, p.nx_halo, p.ny_halo, p.nz_halo) + states = fixtures.f_array(queue, *params.state_bounds()) + fluxes = fixtures.f_array(queue, *params.flux_bounds()) + metrics = fixtures.f_array(queue, *params.metric_bounds()) + metric_jacobians = fixtures.f_array(queue, *params.jacobian_bounds()) return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) -- GitLab From 753bacff7d74f002955c851e5e7b667c2e3558b3 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 00:02:03 -0500 Subject: [PATCH 18/80] move f_array to setup and rename --- fixtures.py | 4 ---- setup_fixtures.py | 14 ++++++++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/fixtures.py b/fixtures.py index c13c2e2..febd232 100644 --- a/fixtures.py +++ b/fixtures.py @@ -87,7 +87,3 @@ def get_weno_program(): prg = lp.parse_transformed_fortran(infile_content, filename=fn) _WENO_PRG.append(prg) return prg - -def f_array(queue, *shape): - ary = np.random.random_sample(shape).astype(np.float32).copy(order="F") - return cl.array.to_device(queue, ary) diff --git a/setup_fixtures.py b/setup_fixtures.py index b5599da..042cbfa 100644 --- a/setup_fixtures.py +++ b/setup_fixtures.py @@ -1,4 +1,5 @@ import numpy as np +import pyopencl as cl import fixtures @@ -42,6 +43,11 @@ def random_array(*shape): return np.random.random_sample(shape).astype(np.float32).copy(order="F") +def random_array_on_device(queue, *shape): + ary = np.random.random_sample(shape).astype(np.float32).copy(order="F") + return cl.array.to_device(queue, ary) + + def random_flux_derivative_arrays(params): states = random_array(*params.state_bounds()) fluxes = random_array(*params.flux_bounds()) @@ -54,9 +60,9 @@ def random_flux_derivative_arrays(params): def random_flux_derivative_arrays_on_device(ctx_factory, params): queue = fixtures.get_queue(ctx_factory) - states = fixtures.f_array(queue, *params.state_bounds()) - fluxes = fixtures.f_array(queue, *params.flux_bounds()) - metrics = fixtures.f_array(queue, *params.metric_bounds()) - metric_jacobians = fixtures.f_array(queue, *params.jacobian_bounds()) + states = random_array_on_device(queue, *params.state_bounds()) + fluxes = random_array_on_device(queue, *params.flux_bounds()) + metrics = random_array_on_device(queue, *params.metric_bounds()) + metric_jacobians = random_array_on_device(queue, *params.jacobian_bounds()) return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) -- GitLab From 1b5c490d7e35c7ab85aa7570315c192fcb4a264e Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 00:05:15 -0500 Subject: [PATCH 19/80] remove some duplicate code in random_array generation --- setup_fixtures.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup_fixtures.py b/setup_fixtures.py index 042cbfa..b3800c5 100644 --- a/setup_fixtures.py +++ b/setup_fixtures.py @@ -44,8 +44,7 @@ def random_array(*shape): def random_array_on_device(queue, *shape): - ary = np.random.random_sample(shape).astype(np.float32).copy(order="F") - return cl.array.to_device(queue, ary) + return cl.array.to_device(queue, random_array(*shape)) def random_flux_derivative_arrays(params): -- GitLab From 1d7969a937d51ea63cba06eb47e38c6a7247a9e3 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 00:10:10 -0500 Subject: [PATCH 20/80] add new fixtures file for device-related things and put get_queue there --- device_fixtures.py | 15 +++++++++++++++ fixtures.py | 8 +------- setup_fixtures.py | 4 ++-- 3 files changed, 18 insertions(+), 9 deletions(-) create mode 100644 device_fixtures.py diff --git a/device_fixtures.py b/device_fixtures.py new file mode 100644 index 0000000..d0dbc59 --- /dev/null +++ b/device_fixtures.py @@ -0,0 +1,15 @@ +import pyopencl as cl + + +_QUEUE = [] + + +def get_queue(ctx_factory): + if not _QUEUE: + setup_queue(ctx_factory) + return _QUEUE[0] + + +def setup_queue(ctx_factory): + ctx = ctx_factory() + _QUEUE.append(cl.CommandQueue(ctx)) diff --git a/fixtures.py b/fixtures.py index febd232..6bb5c43 100644 --- a/fixtures.py +++ b/fixtures.py @@ -7,9 +7,9 @@ import loopy as lp from pytest import approx +from device_fixtures import get_queue _WENO_PRG = [] -_QUEUE = [] def get_gpu_transformed_weno(): prg = get_weno_program() @@ -56,12 +56,6 @@ def get_gpu_transformed_weno(): return prg -def get_queue(ctx_factory): - if not _QUEUE: - ctx = ctx_factory() - _QUEUE.append(cl.CommandQueue(ctx)) - return _QUEUE[0] - def with_root_kernel(prg, root_name): # FIXME This is a little less beautiful than it could be diff --git a/setup_fixtures.py b/setup_fixtures.py index b3800c5..eaaa63e 100644 --- a/setup_fixtures.py +++ b/setup_fixtures.py @@ -1,7 +1,7 @@ import numpy as np import pyopencl as cl -import fixtures +import device_fixtures as device class FluxDerivativeParams: @@ -57,7 +57,7 @@ def random_flux_derivative_arrays(params): def random_flux_derivative_arrays_on_device(ctx_factory, params): - queue = fixtures.get_queue(ctx_factory) + queue = device.get_queue(ctx_factory) states = random_array_on_device(queue, *params.state_bounds()) fluxes = random_array_on_device(queue, *params.flux_bounds()) -- GitLab From b0b032c772236ad029ed0facd555cb834d296d0a Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 00:23:05 -0500 Subject: [PATCH 21/80] fix broken reference in benchmark due to fixture refactorings --- fixtures.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fixtures.py b/fixtures.py index 6bb5c43..c5737db 100644 --- a/fixtures.py +++ b/fixtures.py @@ -8,6 +8,7 @@ import loopy as lp from pytest import approx from device_fixtures import get_queue +from setup_fixtures import random_array_on_device as f_array _WENO_PRG = [] -- GitLab From 413ec5e46bbf8d8a9d15f8cacc6a6a360685c4a7 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 16:24:47 -0500 Subject: [PATCH 22/80] unpin pytest version in CI script --- build-and-test-py-project.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build-and-test-py-project.sh b/build-and-test-py-project.sh index 0810a38..c74b4ec 100644 --- a/build-and-test-py-project.sh +++ b/build-and-test-py-project.sh @@ -57,7 +57,7 @@ $PIP install setuptools # Pinned to 3.0.4 because of https://github.com/pytest-dev/pytest/issues/2434 # Install before a newer version gets pulled in as a dependency -$PIP install pytest==3.0.4 pytest-warnings==0.2.0 +#$PIP install pytest==3.0.4 pytest-warnings==0.2.0 if test "$EXTRA_INSTALL" != ""; then for i in $EXTRA_INSTALL ; do -- GitLab From a5c7aa5c93b2cd92b85dce656dd69b79d9c3f898 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= Date: Tue, 28 May 2019 23:45:20 +0200 Subject: [PATCH 23/80] Fix pytest installation in CI script --- build-and-test-py-project.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/build-and-test-py-project.sh b/build-and-test-py-project.sh index c74b4ec..08035f5 100644 --- a/build-and-test-py-project.sh +++ b/build-and-test-py-project.sh @@ -55,9 +55,7 @@ export XDG_CACHE_HOME=$HOME/.cache/$CI_RUNNER_ID $PIP install --upgrade pip $PIP install setuptools -# Pinned to 3.0.4 because of https://github.com/pytest-dev/pytest/issues/2434 -# Install before a newer version gets pulled in as a dependency -#$PIP install pytest==3.0.4 pytest-warnings==0.2.0 +$PIP install pytest if test "$EXTRA_INSTALL" != ""; then for i in $EXTRA_INSTALL ; do -- GitLab From 8aa2cf58e781fcb3bde37b3397e37bc0799477ea Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 22:34:38 -0500 Subject: [PATCH 24/80] pass queue, prg directly to kernel fixture for mult_mat_vec --- kernel_fixtures.py | 6 ++---- test.py | 9 ++++++++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/kernel_fixtures.py b/kernel_fixtures.py index e1ff329..610e469 100644 --- a/kernel_fixtures.py +++ b/kernel_fixtures.py @@ -5,12 +5,10 @@ import loopy as lp # noqa import fixtures -def mult_mat_vec(ctx_factory, alpha, a, b): - queue = fixtures.get_queue(ctx_factory) - +def mult_mat_vec(queue, prg, alpha, a, b): c_dev = cl.array.empty(queue, 10, dtype=np.float32) - prg = fixtures.with_root_kernel(fixtures.get_weno_program(), "mult_mat_vec") + prg = fixtures.with_root_kernel(prg, "mult_mat_vec") prg(queue, a=a, b=b, c=c_dev, alpha=alpha) return c_dev.get() diff --git a/test.py b/test.py index eaac9d7..f1dd8de 100644 --- a/test.py +++ b/test.py @@ -10,16 +10,22 @@ import comparison_fixtures as compare import setup_fixtures as setup import kernel_fixtures as kernel +import fixtures +import device_fixtures as device def test_matvec(ctx_factory): + queue = device.get_queue(ctx_factory) + prg = fixtures.get_weno_program() + a = setup.random_array(10, 10) b = setup.random_array(10) - c = kernel.mult_mat_vec(ctx_factory, a=a, b=b, alpha=1.0) + c = kernel.mult_mat_vec(queue, prg, alpha=1.0, a=a, b=b) compare.arrays(a@b, c) +@pytest.mark.skip("slow") def test_compute_flux_derivatives(ctx_factory): params = setup.FluxDerivativeParams(ndim=3, nvars=5, nx=10, ny=10, nz=10) arrays = setup.random_flux_derivative_arrays(params) @@ -27,6 +33,7 @@ def test_compute_flux_derivatives(ctx_factory): kernel.compute_flux_derivatives(ctx_factory, params, arrays) +@pytest.mark.skip("slow") def test_compute_flux_derivatives_gpu(ctx_factory): params = setup.FluxDerivativeParams(ndim=3, nvars=5, nx=10, ny=10, nz=10) arrays = setup.random_flux_derivative_arrays_on_device(ctx_factory, params) -- GitLab From ce7087199e25299d6e158e9bdaf6e764076ce395 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 22:36:03 -0500 Subject: [PATCH 25/80] move with_root_kernel to kernel_fixtures, possibly breaks skipped tests --- fixtures.py | 12 ------------ kernel_fixtures.py | 14 +++++++++++++- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/fixtures.py b/fixtures.py index c5737db..6a1e791 100644 --- a/fixtures.py +++ b/fixtures.py @@ -58,18 +58,6 @@ def get_gpu_transformed_weno(): -def with_root_kernel(prg, root_name): - # FIXME This is a little less beautiful than it could be - new_prg = prg.copy(name=root_name) - for name in prg: - clbl = new_prg[name] - if isinstance(clbl, lp.LoopKernel) and clbl.is_called_from_host: - new_prg = new_prg.with_kernel(clbl.copy(is_called_from_host=False)) - - new_prg = new_prg.with_kernel(prg[root_name].copy(is_called_from_host=True)) - return new_prg - - def get_weno_program(): if _WENO_PRG: return _WENO_PRG[0] diff --git a/kernel_fixtures.py b/kernel_fixtures.py index 610e469..cbef8a3 100644 --- a/kernel_fixtures.py +++ b/kernel_fixtures.py @@ -5,10 +5,22 @@ import loopy as lp # noqa import fixtures +def with_root_kernel(prg, root_name): + # FIXME This is a little less beautiful than it could be + new_prg = prg.copy(name=root_name) + for name in prg: + clbl = new_prg[name] + if isinstance(clbl, lp.LoopKernel) and clbl.is_called_from_host: + new_prg = new_prg.with_kernel(clbl.copy(is_called_from_host=False)) + + new_prg = new_prg.with_kernel(prg[root_name].copy(is_called_from_host=True)) + return new_prg + + def mult_mat_vec(queue, prg, alpha, a, b): c_dev = cl.array.empty(queue, 10, dtype=np.float32) - prg = fixtures.with_root_kernel(prg, "mult_mat_vec") + prg = with_root_kernel(prg, "mult_mat_vec") prg(queue, a=a, b=b, c=c_dev, alpha=alpha) return c_dev.get() -- GitLab From 6777ee223a9800dd4347bf88fe989a733671d6b7 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 22:36:46 -0500 Subject: [PATCH 26/80] size of output should not be hardcoded in kernel_fixture --- kernel_fixtures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel_fixtures.py b/kernel_fixtures.py index cbef8a3..3e15104 100644 --- a/kernel_fixtures.py +++ b/kernel_fixtures.py @@ -18,7 +18,7 @@ def with_root_kernel(prg, root_name): def mult_mat_vec(queue, prg, alpha, a, b): - c_dev = cl.array.empty(queue, 10, dtype=np.float32) + c_dev = cl.array.empty(queue, *b.shape, dtype=np.float32) prg = with_root_kernel(prg, "mult_mat_vec") prg(queue, a=a, b=b, c=c_dev, alpha=alpha) -- GitLab From 9f527cf65ecaf4237842f2d41b72eba8d3cc2f51 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 22:45:31 -0500 Subject: [PATCH 27/80] added program_fixtures to handle program parsing and memoization --- fixtures.py | 15 +-------------- program_fixtures.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 14 deletions(-) create mode 100644 program_fixtures.py diff --git a/fixtures.py b/fixtures.py index 6a1e791..e4d5810 100644 --- a/fixtures.py +++ b/fixtures.py @@ -9,8 +9,7 @@ from pytest import approx from device_fixtures import get_queue from setup_fixtures import random_array_on_device as f_array - -_WENO_PRG = [] +from program_fixtures import get_weno as get_weno_program def get_gpu_transformed_weno(): prg = get_weno_program() @@ -58,15 +57,3 @@ def get_gpu_transformed_weno(): -def get_weno_program(): - if _WENO_PRG: - return _WENO_PRG[0] - - fn = "WENO.F90" - - with open(fn, "r") as infile: - infile_content = infile.read() - - prg = lp.parse_transformed_fortran(infile_content, filename=fn) - _WENO_PRG.append(prg) - return prg diff --git a/program_fixtures.py b/program_fixtures.py new file mode 100644 index 0000000..03ba9e2 --- /dev/null +++ b/program_fixtures.py @@ -0,0 +1,18 @@ +import loopy as lp + + +_WENO_PRG = [] + + +def get_weno(): + if _WENO_PRG: + return _WENO_PRG[0] + + fn = "WENO.F90" + + with open(fn, "r") as infile: + infile_content = infile.read() + + prg = lp.parse_transformed_fortran(infile_content, filename=fn) + _WENO_PRG.append(prg) + return prg -- GitLab From 0fe6a0650f8040e22ebd030d95c833cf2e2040cf Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 22:49:19 -0500 Subject: [PATCH 28/80] using new program_fixtures from test.py --- test.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/test.py b/test.py index f1dd8de..067365f 100644 --- a/test.py +++ b/test.py @@ -6,16 +6,15 @@ from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) -import comparison_fixtures as compare +import device_fixtures as device +import program_fixtures as program import setup_fixtures as setup import kernel_fixtures as kernel - -import fixtures -import device_fixtures as device +import comparison_fixtures as compare def test_matvec(ctx_factory): queue = device.get_queue(ctx_factory) - prg = fixtures.get_weno_program() + prg = program.get_weno() a = setup.random_array(10, 10) b = setup.random_array(10) -- GitLab From 7c032810e47ab17cdd03e1ad805d7172ac6b010b Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 22:51:18 -0500 Subject: [PATCH 29/80] remove skips --- test.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/test.py b/test.py index 067365f..821db6c 100644 --- a/test.py +++ b/test.py @@ -24,7 +24,6 @@ def test_matvec(ctx_factory): compare.arrays(a@b, c) -@pytest.mark.skip("slow") def test_compute_flux_derivatives(ctx_factory): params = setup.FluxDerivativeParams(ndim=3, nvars=5, nx=10, ny=10, nz=10) arrays = setup.random_flux_derivative_arrays(params) @@ -32,7 +31,6 @@ def test_compute_flux_derivatives(ctx_factory): kernel.compute_flux_derivatives(ctx_factory, params, arrays) -@pytest.mark.skip("slow") def test_compute_flux_derivatives_gpu(ctx_factory): params = setup.FluxDerivativeParams(ndim=3, nvars=5, nx=10, ny=10, nz=10) arrays = setup.random_flux_derivative_arrays_on_device(ctx_factory, params) -- GitLab From 12702db94934688450e7996c91a77d978f014ff9 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 23:03:40 -0500 Subject: [PATCH 30/80] move get_gpu_transformed_weno to new fixture module for transforms --- fixtures.py | 48 +---------------------------------------- transform_fixtures.py | 50 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 47 deletions(-) create mode 100644 transform_fixtures.py diff --git a/fixtures.py b/fixtures.py index e4d5810..4e570f3 100644 --- a/fixtures.py +++ b/fixtures.py @@ -10,50 +10,4 @@ from pytest import approx from device_fixtures import get_queue from setup_fixtures import random_array_on_device as f_array from program_fixtures import get_weno as get_weno_program - -def get_gpu_transformed_weno(): - prg = get_weno_program() - - cfd = prg["compute_flux_derivatives"] - - cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") - - cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", - lp.AddressSpace.GLOBAL) - - for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6", "_7"]: - cfd = lp.split_iname(cfd, "i"+suffix, 16, - outer_tag="g.0", inner_tag="l.0") - cfd = lp.split_iname(cfd, "j"+suffix, 16, - outer_tag="g.1", inner_tag="l.1") - - for var_name in ["delta_xi", "delta_eta", "delta_zeta"]: - cfd = lp.assignment_to_subst(cfd, var_name) - - cfd = lp.add_barrier(cfd, "tag:to_generalized", "tag:flux_x_compute") - cfd = lp.add_barrier(cfd, "tag:flux_x_compute", "tag:flux_x_diff") - cfd = lp.add_barrier(cfd, "tag:flux_x_diff", "tag:flux_y_compute") - cfd = lp.add_barrier(cfd, "tag:flux_y_compute", "tag:flux_y_diff") - cfd = lp.add_barrier(cfd, "tag:flux_y_diff", "tag:flux_z_compute") - cfd = lp.add_barrier(cfd, "tag:flux_z_compute", "tag:flux_z_diff") - cfd = lp.add_barrier(cfd, "tag:flux_z_diff", "tag:from_generalized") - - prg = prg.with_kernel(cfd) - - # FIXME: These should work, but don't - # FIXME: Undo the hand-inlining in WENO.F90 - #prg = lp.inline_callable_kernel(prg, "convert_to_generalized") - #prg = lp.inline_callable_kernel(prg, "convert_from_generalized") - - if 0: - print(prg["convert_to_generalized_frozen"]) - 1/0 - - return prg - - - +from transform_fixtures import get_gpu_transformed_weno diff --git a/transform_fixtures.py b/transform_fixtures.py new file mode 100644 index 0000000..33849bd --- /dev/null +++ b/transform_fixtures.py @@ -0,0 +1,50 @@ +import loopy as lp + + +def weno_for_gpu(prg): + cfd = prg["compute_flux_derivatives"] + + cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") + + cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", + lp.AddressSpace.GLOBAL) + cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", + lp.AddressSpace.GLOBAL) + cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", + lp.AddressSpace.GLOBAL) + + for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6", "_7"]: + cfd = lp.split_iname(cfd, "i"+suffix, 16, + outer_tag="g.0", inner_tag="l.0") + cfd = lp.split_iname(cfd, "j"+suffix, 16, + outer_tag="g.1", inner_tag="l.1") + + for var_name in ["delta_xi", "delta_eta", "delta_zeta"]: + cfd = lp.assignment_to_subst(cfd, var_name) + + cfd = lp.add_barrier(cfd, "tag:to_generalized", "tag:flux_x_compute") + cfd = lp.add_barrier(cfd, "tag:flux_x_compute", "tag:flux_x_diff") + cfd = lp.add_barrier(cfd, "tag:flux_x_diff", "tag:flux_y_compute") + cfd = lp.add_barrier(cfd, "tag:flux_y_compute", "tag:flux_y_diff") + cfd = lp.add_barrier(cfd, "tag:flux_y_diff", "tag:flux_z_compute") + cfd = lp.add_barrier(cfd, "tag:flux_z_compute", "tag:flux_z_diff") + cfd = lp.add_barrier(cfd, "tag:flux_z_diff", "tag:from_generalized") + + prg = prg.with_kernel(cfd) + + # FIXME: These should work, but don't + # FIXME: Undo the hand-inlining in WENO.F90 + #prg = lp.inline_callable_kernel(prg, "convert_to_generalized") + #prg = lp.inline_callable_kernel(prg, "convert_from_generalized") + + if 0: + print(prg["convert_to_generalized_frozen"]) + 1/0 + + return prg + + +def get_gpu_transformed_weno(): + import program_fixtures as program + prg = program.get_weno() + return weno_for_gpu(prg) -- GitLab From 2afb7d4005f055f0ea376a457db1558864fa64f9 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 23:04:03 -0500 Subject: [PATCH 31/80] get rid of all references to fixtures in kernel_fixtures --- kernel_fixtures.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/kernel_fixtures.py b/kernel_fixtures.py index 3e15104..36b2e3c 100644 --- a/kernel_fixtures.py +++ b/kernel_fixtures.py @@ -1,8 +1,11 @@ import numpy as np import pyopencl as cl +import pyopencl.array # noqa import loopy as lp # noqa -import fixtures +import device_fixtures as device +import program_fixtures as program +import transform_fixtures as transform def with_root_kernel(prg, root_name): @@ -27,9 +30,9 @@ def mult_mat_vec(queue, prg, alpha, a, b): def compute_flux_derivatives(ctx_factory, params, arrays): - queue = fixtures.get_queue(ctx_factory) + queue = device.get_queue(ctx_factory) - prg = fixtures.get_weno_program() + prg = program.get_weno() cfd = prg["compute_flux_derivatives"] cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") @@ -53,9 +56,9 @@ def compute_flux_derivatives(ctx_factory, params, arrays): return flux_derivatives_dev.get() def compute_flux_derivatives_gpu(ctx_factory, params, arrays): - prg = fixtures.get_gpu_transformed_weno() + prg = transform.get_gpu_transformed_weno() - queue = fixtures.get_queue(ctx_factory) + queue = device.get_queue(ctx_factory) flux_derivatives_dev = cl.array.empty(queue, (params.nvars, params.ndim, params.nx_halo, params.ny_halo, params.nz_halo), dtype=np.float32, order="F") -- GitLab From 90e00355b2ea60a011aa5e4e5470b28bba6e07b6 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 23:13:12 -0500 Subject: [PATCH 32/80] update references to fixtures in benchmark.py --- benchmark.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/benchmark.py b/benchmark.py index 266d998..00034a7 100644 --- a/benchmark.py +++ b/benchmark.py @@ -14,14 +14,18 @@ from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) -import fixtures +import device_fixtures as device +import program_fixtures as program +import transform_fixtures as transform +import setup_fixtures as setup def benchmark_compute_flux_derivatives_gpu(ctx_factory): logging.basicConfig(level="INFO") - prg = fixtures.get_gpu_transformed_weno() + prg = program.get_weno() + prg = transform.weno_for_gpu(prg) - queue = fixtures.get_queue(ctx_factory) + queue = device.get_queue(ctx_factory) ndim = 3 nvars = 5 @@ -31,10 +35,10 @@ def benchmark_compute_flux_derivatives_gpu(ctx_factory): nz = n print("ARRAY GEN") - states = fixtures.f_array(queue, nvars, nx+6, ny+6, nz+6) - fluxes = fixtures.f_array(queue, nvars, ndim, nx+6, ny+6, nz+6) - metrics = fixtures.f_array(queue, ndim, ndim, nx+6, ny+6, nz+6) - metric_jacobians = fixtures.f_array(queue, nx+6, ny+6, nz+6) + states = setup.random_array_on_device(queue, nvars, nx+6, ny+6, nz+6) + fluxes = setup.random_array_on_device(queue, nvars, ndim, nx+6, ny+6, nz+6) + metrics = setup.random_array_on_device(queue, ndim, ndim, nx+6, ny+6, nz+6) + metric_jacobians = setup.random_array_on_device(queue, nx+6, ny+6, nz+6) print("END ARRAY GEN") flux_derivatives_dev = cl.array.empty(queue, (nvars, ndim, nx+6, ny+6, -- GitLab From 0a508efe885e47b29f9c36c586554357175d77b5 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 23:15:50 -0500 Subject: [PATCH 33/80] remove empty fixtures module --- fixtures.py | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 fixtures.py diff --git a/fixtures.py b/fixtures.py deleted file mode 100644 index 4e570f3..0000000 --- a/fixtures.py +++ /dev/null @@ -1,13 +0,0 @@ -import numpy as np -import numpy.linalg as la # noqa -import pyopencl as cl -import pyopencl.array # noqa -import pyopencl.clrandom # noqa -import loopy as lp - -from pytest import approx - -from device_fixtures import get_queue -from setup_fixtures import random_array_on_device as f_array -from program_fixtures import get_weno as get_weno_program -from transform_fixtures import get_gpu_transformed_weno -- GitLab From 6122d5fd34e89d8fb7ebdac64c5c76d52e6011cb Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 23:21:32 -0500 Subject: [PATCH 34/80] refactor out WENO parsing into separate function --- program_fixtures.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/program_fixtures.py b/program_fixtures.py index 03ba9e2..0f50ff1 100644 --- a/program_fixtures.py +++ b/program_fixtures.py @@ -4,10 +4,7 @@ import loopy as lp _WENO_PRG = [] -def get_weno(): - if _WENO_PRG: - return _WENO_PRG[0] - +def parse_weno(): fn = "WENO.F90" with open(fn, "r") as infile: @@ -15,4 +12,9 @@ def get_weno(): prg = lp.parse_transformed_fortran(infile_content, filename=fn) _WENO_PRG.append(prg) - return prg + + +def get_weno(): + if not _WENO_PRG: + parse_weno() + return _WENO_PRG[0] -- GitLab From 319b4d98cfcab6a92789197198347dad3acc54a9 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 23:33:07 -0500 Subject: [PATCH 35/80] take transforms out of compute_flux_derivatives --- kernel_fixtures.py | 19 ++----------------- setup_fixtures.py | 4 ++++ test.py | 12 +++++++++--- transform_fixtures.py | 15 +++++++++++++++ 4 files changed, 30 insertions(+), 20 deletions(-) diff --git a/kernel_fixtures.py b/kernel_fixtures.py index 36b2e3c..58d19bc 100644 --- a/kernel_fixtures.py +++ b/kernel_fixtures.py @@ -29,23 +29,7 @@ def mult_mat_vec(queue, prg, alpha, a, b): return c_dev.get() -def compute_flux_derivatives(ctx_factory, params, arrays): - queue = device.get_queue(ctx_factory) - - prg = program.get_weno() - cfd = prg["compute_flux_derivatives"] - - cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") - - cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", - lp.AddressSpace.GLOBAL) - - prg = prg.with_kernel(cfd) - +def compute_flux_derivatives(queue, prg, params, arrays): flux_derivatives_dev = cl.array.empty(queue, (params.nvars, params.ndim, params.nx_halo, params.ny_halo, params.nz_halo), dtype=np.float32, order="F") @@ -53,6 +37,7 @@ def compute_flux_derivatives(ctx_factory, params, arrays): states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, metric_jacobians=arrays.metric_jacobians, flux_derivatives=flux_derivatives_dev) + return flux_derivatives_dev.get() def compute_flux_derivatives_gpu(ctx_factory, params, arrays): diff --git a/setup_fixtures.py b/setup_fixtures.py index eaaa63e..add7fea 100644 --- a/setup_fixtures.py +++ b/setup_fixtures.py @@ -39,6 +39,10 @@ class FluxDerivativeArrays: self.metric_jacobians = metric_jacobians +def flux_derivative_params(nvars, ndim, n): + return FluxDerivativeParams(nvars, ndim, n, n, n) + + def random_array(*shape): return np.random.random_sample(shape).astype(np.float32).copy(order="F") diff --git a/test.py b/test.py index 821db6c..9e1ff7f 100644 --- a/test.py +++ b/test.py @@ -8,6 +8,7 @@ from pyopencl.tools import ( # noqa import device_fixtures as device import program_fixtures as program +import transform_fixtures as transform import setup_fixtures as setup import kernel_fixtures as kernel import comparison_fixtures as compare @@ -25,14 +26,19 @@ def test_matvec(ctx_factory): def test_compute_flux_derivatives(ctx_factory): - params = setup.FluxDerivativeParams(ndim=3, nvars=5, nx=10, ny=10, nz=10) + queue = device.get_queue(ctx_factory) + prg = program.get_weno() + prg = transform.compute_flux_derivative_basic(prg) + + params = setup.flux_derivative_params(ndim=3, nvars=5, n=10) arrays = setup.random_flux_derivative_arrays(params) - kernel.compute_flux_derivatives(ctx_factory, params, arrays) + kernel.compute_flux_derivatives(queue, prg, params, arrays) +@pytest.mark.skip("slow") def test_compute_flux_derivatives_gpu(ctx_factory): - params = setup.FluxDerivativeParams(ndim=3, nvars=5, nx=10, ny=10, nz=10) + params = setup.flux_derivative_params(ndim=3, nvars=5, n=10) arrays = setup.random_flux_derivative_arrays_on_device(ctx_factory, params) kernel.compute_flux_derivatives_gpu(ctx_factory, params, arrays) diff --git a/transform_fixtures.py b/transform_fixtures.py index 33849bd..7b29fbc 100644 --- a/transform_fixtures.py +++ b/transform_fixtures.py @@ -1,6 +1,21 @@ import loopy as lp +def compute_flux_derivative_basic(prg): + cfd = prg["compute_flux_derivatives"] + + cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") + + cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", + lp.AddressSpace.GLOBAL) + cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", + lp.AddressSpace.GLOBAL) + cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", + lp.AddressSpace.GLOBAL) + + return prg.with_kernel(cfd) + + def weno_for_gpu(prg): cfd = prg["compute_flux_derivatives"] -- GitLab From a6f068a663ec45e52813aa8e478026e83fd543f3 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 23:38:46 -0500 Subject: [PATCH 36/80] add new fixture for setting up an empty array on the device --- kernel_fixtures.py | 7 ++++--- setup_fixtures.py | 4 ++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel_fixtures.py b/kernel_fixtures.py index 58d19bc..278d5f7 100644 --- a/kernel_fixtures.py +++ b/kernel_fixtures.py @@ -6,6 +6,7 @@ import loopy as lp # noqa import device_fixtures as device import program_fixtures as program import transform_fixtures as transform +import setup_fixtures as setup def with_root_kernel(prg, root_name): @@ -21,7 +22,7 @@ def with_root_kernel(prg, root_name): def mult_mat_vec(queue, prg, alpha, a, b): - c_dev = cl.array.empty(queue, *b.shape, dtype=np.float32) + c_dev = setup.empty_array_on_device(queue, b.shape) prg = with_root_kernel(prg, "mult_mat_vec") prg(queue, a=a, b=b, c=c_dev, alpha=alpha) @@ -30,8 +31,8 @@ def mult_mat_vec(queue, prg, alpha, a, b): def compute_flux_derivatives(queue, prg, params, arrays): - flux_derivatives_dev = cl.array.empty(queue, (params.nvars, params.ndim, - params.nx_halo, params.ny_halo, params.nz_halo), dtype=np.float32, order="F") + flux_derivatives_dev = setup.empty_array_on_device(queue, (params.nvars, params.ndim, + params.nx_halo, params.ny_halo, params.nz_halo)) prg(queue, nvars=params.nvars, ndim=params.ndim, states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, diff --git a/setup_fixtures.py b/setup_fixtures.py index add7fea..255adfc 100644 --- a/setup_fixtures.py +++ b/setup_fixtures.py @@ -43,6 +43,10 @@ def flux_derivative_params(nvars, ndim, n): return FluxDerivativeParams(nvars, ndim, n, n, n) +def empty_array_on_device(queue, shape): + return cl.array.empty(queue, shape, dtype=np.float32, order="F") + + def random_array(*shape): return np.random.random_sample(shape).astype(np.float32).copy(order="F") -- GitLab From 2268c61284eea12e9f1afa7771f849d32bd084a7 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 23:45:23 -0500 Subject: [PATCH 37/80] replace kernel.compute_flux_derivatives_gpu with appropriate transform fixtures --- kernel_fixtures.py | 21 --------------------- test.py | 8 ++++++-- transform_fixtures.py | 11 +++++++++++ 3 files changed, 17 insertions(+), 23 deletions(-) diff --git a/kernel_fixtures.py b/kernel_fixtures.py index 278d5f7..c2b9f30 100644 --- a/kernel_fixtures.py +++ b/kernel_fixtures.py @@ -40,24 +40,3 @@ def compute_flux_derivatives(queue, prg, params, arrays): flux_derivatives=flux_derivatives_dev) return flux_derivatives_dev.get() - -def compute_flux_derivatives_gpu(ctx_factory, params, arrays): - prg = transform.get_gpu_transformed_weno() - - queue = device.get_queue(ctx_factory) - - flux_derivatives_dev = cl.array.empty(queue, (params.nvars, params.ndim, - params.nx_halo, params.ny_halo, params.nz_halo), dtype=np.float32, order="F") - - prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - - if 1: - with open("gen-code.cl", "w") as outf: - outf.write(lp.generate_code_v2(prg).device_code()) - - prg = lp.set_options(prg, no_numpy=True) - - prg(queue, nvars=params.nvars, ndim=params.ndim, - states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, - metric_jacobians=arrays.metric_jacobians, - flux_derivatives=flux_derivatives_dev) diff --git a/test.py b/test.py index 9e1ff7f..8e883b8 100644 --- a/test.py +++ b/test.py @@ -36,12 +36,16 @@ def test_compute_flux_derivatives(ctx_factory): kernel.compute_flux_derivatives(queue, prg, params, arrays) -@pytest.mark.skip("slow") def test_compute_flux_derivatives_gpu(ctx_factory): + queue = device.get_queue(ctx_factory) + prg = program.get_weno() + prg = transform.weno_for_gpu(prg) + prg = transform.compute_flux_derivative_gpu(queue, prg) + params = setup.flux_derivative_params(ndim=3, nvars=5, n=10) arrays = setup.random_flux_derivative_arrays_on_device(ctx_factory, params) - kernel.compute_flux_derivatives_gpu(ctx_factory, params, arrays) + kernel.compute_flux_derivatives(queue, prg, params, arrays) # This lets you run 'python test.py test_case(cl._csc)' without pytest. diff --git a/transform_fixtures.py b/transform_fixtures.py index 7b29fbc..a985e97 100644 --- a/transform_fixtures.py +++ b/transform_fixtures.py @@ -16,6 +16,17 @@ def compute_flux_derivative_basic(prg): return prg.with_kernel(cfd) +def compute_flux_derivative_gpu(queue, prg): + prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) + + if 1: + with open("gen-code.cl", "w") as outf: + outf.write(lp.generate_code_v2(prg).device_code()) + + prg = lp.set_options(prg, no_numpy=True) + return prg + + def weno_for_gpu(prg): cfd = prg["compute_flux_derivatives"] -- GitLab From 112064d1d737924db4b275dbec98085cb42a70b5 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Tue, 28 May 2019 23:54:02 -0500 Subject: [PATCH 38/80] cleanup and minor rearrangement --- kernel_fixtures.py | 6 ------ setup_fixtures.py | 1 + test.py | 1 - transform_fixtures.py | 37 +++++++++++++------------------------ 4 files changed, 14 insertions(+), 31 deletions(-) diff --git a/kernel_fixtures.py b/kernel_fixtures.py index c2b9f30..1ff71ac 100644 --- a/kernel_fixtures.py +++ b/kernel_fixtures.py @@ -1,11 +1,5 @@ -import numpy as np -import pyopencl as cl -import pyopencl.array # noqa import loopy as lp # noqa -import device_fixtures as device -import program_fixtures as program -import transform_fixtures as transform import setup_fixtures as setup diff --git a/setup_fixtures.py b/setup_fixtures.py index 255adfc..bcecb9e 100644 --- a/setup_fixtures.py +++ b/setup_fixtures.py @@ -1,5 +1,6 @@ import numpy as np import pyopencl as cl +import pyopencl.array # noqa import device_fixtures as device diff --git a/test.py b/test.py index 8e883b8..146e052 100644 --- a/test.py +++ b/test.py @@ -39,7 +39,6 @@ def test_compute_flux_derivatives(ctx_factory): def test_compute_flux_derivatives_gpu(ctx_factory): queue = device.get_queue(ctx_factory) prg = program.get_weno() - prg = transform.weno_for_gpu(prg) prg = transform.compute_flux_derivative_gpu(queue, prg) params = setup.flux_derivative_params(ndim=3, nvars=5, n=10) diff --git a/transform_fixtures.py b/transform_fixtures.py index a985e97..f69581a 100644 --- a/transform_fixtures.py +++ b/transform_fixtures.py @@ -16,28 +16,10 @@ def compute_flux_derivative_basic(prg): return prg.with_kernel(cfd) -def compute_flux_derivative_gpu(queue, prg): - prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - - if 1: - with open("gen-code.cl", "w") as outf: - outf.write(lp.generate_code_v2(prg).device_code()) - - prg = lp.set_options(prg, no_numpy=True) - return prg - - def weno_for_gpu(prg): - cfd = prg["compute_flux_derivatives"] + prg = compute_flux_derivative_basic(prg) - cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") - - cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", - lp.AddressSpace.GLOBAL) + cfd = prg["compute_flux_derivatives"] for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6", "_7"]: cfd = lp.split_iname(cfd, "i"+suffix, 16, @@ -70,7 +52,14 @@ def weno_for_gpu(prg): return prg -def get_gpu_transformed_weno(): - import program_fixtures as program - prg = program.get_weno() - return weno_for_gpu(prg) +def compute_flux_derivative_gpu(queue, prg): + prg = weno_for_gpu(prg) + + prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) + + if 1: + with open("gen-code.cl", "w") as outf: + outf.write(lp.generate_code_v2(prg).device_code()) + + prg = lp.set_options(prg, no_numpy=True) + return prg -- GitLab From f77caa259db8949bbca6035f06a25ed49088a14b Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Wed, 29 May 2019 10:21:40 -0500 Subject: [PATCH 39/80] tests marked with @pytest.mark.slow will be skipped by default, but not by the CI script --- build-and-test-py-project.sh | 2 +- conftest.py | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 conftest.py diff --git a/build-and-test-py-project.sh b/build-and-test-py-project.sh index 08035f5..8bfbc06 100644 --- a/build-and-test-py-project.sh +++ b/build-and-test-py-project.sh @@ -85,6 +85,6 @@ if test -f $REQUIREMENTS_TXT; then $PIP install -r $REQUIREMENTS_TXT fi -${PY_EXE} -m pytest -rw --durations=10 --tb=native --junitxml=pytest.xml -rxsw test.py +${PY_EXE} -m pytest -rw --durations=10 --tb=native --junitxml=pytest.xml -rxsw --runslow test.py # vim: foldmethod=marker diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..29be9d5 --- /dev/null +++ b/conftest.py @@ -0,0 +1,21 @@ +# setup to mark slow tests with @pytest.mark.slow, so that they don't run by +# default, but can be forced to run with the command-line option --runslow +# taken from +# https://docs.pytest.org/en/latest/example/simple.html#control-skipping-of-tests-according-to-command-line-option + +import pytest + + +def pytest_addoption(parser): + parser.addoption("--runslow", action="store_true", default=False, + help="run slow tests") + + +def pytest_collection_modifyitems(config, items): + if config.getoption("--runslow"): + # --runslow given in cli: do not skip slow tests + return + skip_slow = pytest.mark.skip(reason="need --runslow option to run") + for item in items: + if "slow" in item.keywords: + item.add_marker(skip_slow) -- GitLab From 08b0844e52f38d4ce1c7a52c108063033c5182cd Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Wed, 29 May 2019 10:44:35 -0500 Subject: [PATCH 40/80] skipping slow tests always for now since we don't have the new marker yet --- test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test.py b/test.py index 146e052..2748ded 100644 --- a/test.py +++ b/test.py @@ -13,6 +13,7 @@ import setup_fixtures as setup import kernel_fixtures as kernel import comparison_fixtures as compare + def test_matvec(ctx_factory): queue = device.get_queue(ctx_factory) prg = program.get_weno() @@ -25,6 +26,7 @@ def test_matvec(ctx_factory): compare.arrays(a@b, c) +@pytest.mark.skip("slow") def test_compute_flux_derivatives(ctx_factory): queue = device.get_queue(ctx_factory) prg = program.get_weno() @@ -36,6 +38,7 @@ def test_compute_flux_derivatives(ctx_factory): kernel.compute_flux_derivatives(queue, prg, params, arrays) +@pytest.mark.skip("slow") def test_compute_flux_derivatives_gpu(ctx_factory): queue = device.get_queue(ctx_factory) prg = program.get_weno() -- GitLab From 5ae0def2bcabbe6a4505045c0901e3c37a2660a7 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Wed, 29 May 2019 10:53:54 -0500 Subject: [PATCH 41/80] bring in array parsing fixtures from another project --- setup_fixtures.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/setup_fixtures.py b/setup_fixtures.py index bcecb9e..d1705f5 100644 --- a/setup_fixtures.py +++ b/setup_fixtures.py @@ -74,3 +74,42 @@ def random_flux_derivative_arrays_on_device(ctx_factory, params): metric_jacobians = random_array_on_device(queue, *params.jacobian_bounds()) return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) + + +def arrays_from_string(string_arrays): + return split_map_to_list(string_arrays, array_from_string, ":") + + +def array_from_string(string_array): + if ";" not in string_array: + if "," not in string_array: + return array_from_string_1d(string_array) + else: + return array_from_string_2d(string_array) + else: + return array_from_string_3d(string_array) + + +def array_from_string_3d(string_array): + if string_array[0] == ";": + return array_from_string_1d(string_array[1:]).reshape((-1, 1, 1)) + else: + return np.array(split_map_to_list(string_array, array_from_string_2d, ";")) + + +def array_from_string_2d(string_array): + if string_array[0] == ",": + return array_from_string_1d(string_array[1:]).reshape((-1, 1)) + else: + return np.array(split_map_to_list(string_array, array_from_string_1d, ",")) + + +def array_from_string_1d(string_array): + if string_array[0] == "i": + return np.array(split_map_to_list(string_array[1:], int, " ")) + else: + return np.array(split_map_to_list(string_array, float, " ")) + + +def split_map_to_list(string, map_func, splitter): + return list(map(map_func, string.split(splitter))) -- GitLab From 80d89b0ea04da54cbb8855d29f63bb460f58f522 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Wed, 29 May 2019 11:31:33 -0500 Subject: [PATCH 42/80] added Roe identity check -- exposes broken implementation --- comparison_fixtures.py | 5 +++++ kernel_fixtures.py | 13 +++++++++++++ setup_fixtures.py | 31 +++++++++++++++++++++++++++---- test.py | 16 ++++++++++++++++ 4 files changed, 61 insertions(+), 4 deletions(-) diff --git a/comparison_fixtures.py b/comparison_fixtures.py index e21ec0c..402f9be 100644 --- a/comparison_fixtures.py +++ b/comparison_fixtures.py @@ -3,3 +3,8 @@ from pytest import approx def arrays(a, b): assert a == approx(b) + + +def roe_identity(states, R, Rinv): + dState = states[:,1] - states[:,0] + arrays(R@(Rinv@dState), dState) diff --git a/kernel_fixtures.py b/kernel_fixtures.py index 1ff71ac..7f3dff4 100644 --- a/kernel_fixtures.py +++ b/kernel_fixtures.py @@ -15,6 +15,19 @@ def with_root_kernel(prg, root_name): return new_prg +def roe_eigensystem(queue, prg, params, states, metrics_frozen): + R_dev = setup.empty_array_on_device(queue, params.mat_bounds()) + Rinv_dev = setup.empty_array_on_device(queue, params.mat_bounds()) + lam_dev = setup.empty_array_on_device(queue, params.vec_bounds()) + + prg = with_root_kernel(prg, "roe_eigensystem") + prg(queue, nvars=params.nvars, ndim=params.ndim, d=params.d, + states=states, metrics_frozen=metrics_frozen, + R=R_dev, R_inv=Rinv_dev, lambda_roe=lam_dev) + + return R_dev.get(), Rinv_dev.get(), lam_dev.get() + + def mult_mat_vec(queue, prg, alpha, a, b): c_dev = setup.empty_array_on_device(queue, b.shape) diff --git a/setup_fixtures.py b/setup_fixtures.py index d1705f5..6f1debc 100644 --- a/setup_fixtures.py +++ b/setup_fixtures.py @@ -5,6 +5,19 @@ import pyopencl.array # noqa import device_fixtures as device +class RoeParams: + def __init__(self, nvars, ndim, d): + self.nvars = nvars + self.ndim = ndim + self.d = d + + def mat_bounds(self): + return self.nvars, self.nvars + + def vec_bounds(self): + return self.nvars + + class FluxDerivativeParams: def __init__(self, nvars, ndim, nx, ny, nz): self.nvars = nvars @@ -40,6 +53,11 @@ class FluxDerivativeArrays: self.metric_jacobians = metric_jacobians +def roe_params(nvars, ndim, direction): + dirs = {"x" : 1, "y" : 2, "z" : 3} + return RoeParams(nvars, ndim, dirs[direction]) + + def flux_derivative_params(nvars, ndim, n): return FluxDerivativeParams(nvars, ndim, n, n, n) @@ -48,6 +66,10 @@ def empty_array_on_device(queue, shape): return cl.array.empty(queue, shape, dtype=np.float32, order="F") +def identity(n): + return np.identity(n).astype(np.float32).copy(order="F") + + def random_array(*shape): return np.random.random_sample(shape).astype(np.float32).copy(order="F") @@ -83,11 +105,12 @@ def arrays_from_string(string_arrays): def array_from_string(string_array): if ";" not in string_array: if "," not in string_array: - return array_from_string_1d(string_array) + array = array_from_string_1d(string_array) else: - return array_from_string_2d(string_array) + array = array_from_string_2d(string_array) else: - return array_from_string_3d(string_array) + array = array_from_string_3d(string_array) + return array.copy(order="F") def array_from_string_3d(string_array): @@ -108,7 +131,7 @@ def array_from_string_1d(string_array): if string_array[0] == "i": return np.array(split_map_to_list(string_array[1:], int, " ")) else: - return np.array(split_map_to_list(string_array, float, " ")) + return np.array(split_map_to_list(string_array, float, " "), dtype=np.float32) def split_map_to_list(string, map_func, splitter): diff --git a/test.py b/test.py index 2748ded..ab3af8c 100644 --- a/test.py +++ b/test.py @@ -2,6 +2,7 @@ import sys import logging import pytest +import pyopencl as cl from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) @@ -14,6 +15,21 @@ import kernel_fixtures as kernel import comparison_fixtures as compare +def test_roe(ctx_factory): + queue = device.get_queue(ctx_factory) + prg = program.get_weno() + + params = setup.roe_params(nvars=5, ndim=3, direction="x") + states = setup.array_from_string("2 1,4 1,4 1,4 1,20 5.5") + metrics_frozen = setup.identity(params.ndim) + R, Rinv, lam = kernel.roe_eigensystem(queue, prg, params, states, metrics_frozen) + + compare.roe_identity(states, R, Rinv) + + #fluxes = setup.array_from_string("4 11.2 8 8 46.4,1 2.6 1 1 7.1") + #compare.roe_property(states, fluxes, R, Rinv, lam) + + def test_matvec(ctx_factory): queue = device.get_queue(ctx_factory) prg = program.get_weno() -- GitLab From eb96634bc3a2da98ecbdfd7d9dab4e7deefe73e1 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Wed, 29 May 2019 17:52:40 -0500 Subject: [PATCH 43/80] Fix bug in Roe eigensystem computation --- WENO.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/WENO.F90 b/WENO.F90 index e8618ef..0299251 100644 --- a/WENO.F90 +++ b/WENO.F90 @@ -350,7 +350,7 @@ subroutine roe_eigensystem(nvars, ndim, d, states, metrics_frozen, R, R_inv, lam c = sqrt((1.4 - 1.0)*(H - 0.5*q)) b1 = (1.4 - 1.0)/(c**2) - b2 = 1.0 + b1*q**2 - b1*H + b2 = 1.0 + b1*q - b1*H u_tilde(1) = 0.0 do i=1,ndim -- GitLab From 2f24f12522165c465d758dd99e8086ddf7e5a1a3 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Wed, 29 May 2019 18:19:36 -0500 Subject: [PATCH 44/80] add Roe property check --- comparison_fixtures.py | 10 ++++++++++ test.py | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/comparison_fixtures.py b/comparison_fixtures.py index 402f9be..04c7432 100644 --- a/comparison_fixtures.py +++ b/comparison_fixtures.py @@ -1,3 +1,4 @@ +import numpy as np from pytest import approx @@ -8,3 +9,12 @@ def arrays(a, b): def roe_identity(states, R, Rinv): dState = states[:,1] - states[:,0] arrays(R@(Rinv@dState), dState) + + +def roe_property(states, fluxes, R, Rinv, lam): + dState = states[:,1] - states[:,0] + dFlux = fluxes[:,1] - fluxes[:,0] + + temp = Rinv@dState + temp = np.multiply(lam, temp) + arrays(R@temp, dFlux) diff --git a/test.py b/test.py index ab3af8c..d71231e 100644 --- a/test.py +++ b/test.py @@ -26,8 +26,8 @@ def test_roe(ctx_factory): compare.roe_identity(states, R, Rinv) - #fluxes = setup.array_from_string("4 11.2 8 8 46.4,1 2.6 1 1 7.1") - #compare.roe_property(states, fluxes, R, Rinv, lam) + fluxes = setup.array_from_string("4 1,11.2 2.6,8 1,8 1,46.4 7.1") + compare.roe_property(states, fluxes, R, Rinv, lam) def test_matvec(ctx_factory): -- GitLab From 49ca1d1476848698f274f2b57865afefdcdc6007 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Wed, 29 May 2019 18:20:20 -0500 Subject: [PATCH 45/80] rename Roe test --- test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test.py b/test.py index d71231e..257046f 100644 --- a/test.py +++ b/test.py @@ -15,7 +15,7 @@ import kernel_fixtures as kernel import comparison_fixtures as compare -def test_roe(ctx_factory): +def test_roe_uniform_grid(ctx_factory): queue = device.get_queue(ctx_factory) prg = program.get_weno() -- GitLab From bbc1da42d47a23b55e5b601a564091a9acbc67d3 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Wed, 29 May 2019 21:54:47 -0500 Subject: [PATCH 46/80] add more values for Roe test -- exposed another bug --- test.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/test.py b/test.py index 257046f..e72d34e 100644 --- a/test.py +++ b/test.py @@ -14,19 +14,29 @@ import setup_fixtures as setup import kernel_fixtures as kernel import comparison_fixtures as compare - -def test_roe_uniform_grid(ctx_factory): +@pytest.mark.parametrize("states_str,fluxes_str,direction", [ + ("2 1,4 1,4 1,4 1,20 5.5", "4 1,11.2 2.6,8 1,8 1,46.4 7.1", "x"), + ("2 1,4 1,4 1,4 1,20 5.5", "4 1,8 1,11.2 2.6,8 1,46.4 7.1", "y"), + ("2 1,4 1,4 1,4 1,20 5.5", "4 1,8 1,8 1,11.2 2.6,46.4 7.1", "z"), + ("1 2,-1 -4,-1 -4,-1 -4,5.5 20", "-1 -4,2.6 11.2,1 8,1 8,-7.1 -46.4", "x"), + ("1 2,-1 -4,-1 -4,-1 -4,5.5 20", "-1 -4,1 8,2.6 11.2,1 8,-7.1 -46.4", "y"), + ("1 2,-1 -4,-1 -4,-1 -4,5.5 20", "-1 -4,1 8,1 8,2.6 11.2,-7.1 -46.4", "z"), + ("2 1,4 1,8 2,12 3,64 11", "4 1,11.2 2.6,16 2,24 3,134.4 12.6", "x"), + ("2 1,4 1,8 2,12 3,64 11", "8 2,16 2,35.2 5.6,48 6,268.8 25.2", "y"), + ("2 1,4 1,8 2,12 3,64 11", "12 3,24 3,48 6,75.2 10.6,403.2 37.8", "z") + ]) +def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): queue = device.get_queue(ctx_factory) prg = program.get_weno() - params = setup.roe_params(nvars=5, ndim=3, direction="x") - states = setup.array_from_string("2 1,4 1,4 1,4 1,20 5.5") + params = setup.roe_params(nvars=5, ndim=3, direction=direction) + states = setup.array_from_string(states_str) metrics_frozen = setup.identity(params.ndim) R, Rinv, lam = kernel.roe_eigensystem(queue, prg, params, states, metrics_frozen) compare.roe_identity(states, R, Rinv) - fluxes = setup.array_from_string("4 1,11.2 2.6,8 1,8 1,46.4 7.1") + fluxes = setup.array_from_string(fluxes_str) compare.roe_property(states, fluxes, R, Rinv, lam) -- GitLab From 094b2b214f7a97ff816e53061c9a4187521a3ce0 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 30 May 2019 12:13:34 -0500 Subject: [PATCH 47/80] using new strategy for marking slow tests --- test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test.py b/test.py index e72d34e..f213723 100644 --- a/test.py +++ b/test.py @@ -52,7 +52,7 @@ def test_matvec(ctx_factory): compare.arrays(a@b, c) -@pytest.mark.skip("slow") +@pytest.mark.slow def test_compute_flux_derivatives(ctx_factory): queue = device.get_queue(ctx_factory) prg = program.get_weno() @@ -64,7 +64,7 @@ def test_compute_flux_derivatives(ctx_factory): kernel.compute_flux_derivatives(queue, prg, params, arrays) -@pytest.mark.skip("slow") +@pytest.mark.slow def test_compute_flux_derivatives_gpu(ctx_factory): queue = device.get_queue(ctx_factory) prg = program.get_weno() -- GitLab From 49a446518be0e8f643a3571c03fb15bb70d74bde Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 30 May 2019 13:18:53 -0500 Subject: [PATCH 48/80] mark roe tests as expected to fail --- test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test.py b/test.py index f213723..924171f 100644 --- a/test.py +++ b/test.py @@ -14,6 +14,7 @@ import setup_fixtures as setup import kernel_fixtures as kernel import comparison_fixtures as compare +@pytest.mark.xfail @pytest.mark.parametrize("states_str,fluxes_str,direction", [ ("2 1,4 1,4 1,4 1,20 5.5", "4 1,11.2 2.6,8 1,8 1,46.4 7.1", "x"), ("2 1,4 1,4 1,4 1,20 5.5", "4 1,8 1,11.2 2.6,8 1,46.4 7.1", "y"), -- GitLab From 6e213784fb0cf5e6e24776d34f27f902e7eeaf72 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Fri, 31 May 2019 10:40:11 -0500 Subject: [PATCH 49/80] fix an apparent bug in Roe eigenvalues, tests still not passing --- WENO.F90 | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/WENO.F90 b/WENO.F90 index 0299251..995d3b7 100644 --- a/WENO.F90 +++ b/WENO.F90 @@ -370,11 +370,11 @@ subroutine roe_eigensystem(nvars, ndim, d, states, metrics_frozen, R, R_inv, lam alpha = rho/(2.0*c) beta = 1.0/(2.0*alpha) - lambda_roe(1) = u(1) - lambda_roe(2) = u(1) - lambda_roe(3) = u(1) - lambda_roe(4) = u(1) + c - lambda_roe(5) = u(1) - c + lambda_roe(1) = u_tilde(1)*metric_norm(ik) + lambda_roe(2) = u_tilde(1)*metric_norm(ik) + lambda_roe(3) = u_tilde(1)*metric_norm(ik) + lambda_roe(4) = u_tilde(1)*metric_norm(ik) + c*metric_norm(ik) + lambda_roe(5) = u_tilde(1)*metric_norm(ik) - c*metric_norm(ik) R(1,1) = 1.0 R(2,1) = u(1) -- GitLab From f3a2d6b2a0787465f56ac97cdccc802edfd4905f Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 17 Jun 2019 10:59:11 -0500 Subject: [PATCH 50/80] run all tests, even slow ones --- test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test.py b/test.py index 924171f..e97a345 100644 --- a/test.py +++ b/test.py @@ -53,7 +53,7 @@ def test_matvec(ctx_factory): compare.arrays(a@b, c) -@pytest.mark.slow +#@pytest.mark.slow def test_compute_flux_derivatives(ctx_factory): queue = device.get_queue(ctx_factory) prg = program.get_weno() @@ -65,7 +65,7 @@ def test_compute_flux_derivatives(ctx_factory): kernel.compute_flux_derivatives(queue, prg, params, arrays) -@pytest.mark.slow +#@pytest.mark.slow def test_compute_flux_derivatives_gpu(ctx_factory): queue = device.get_queue(ctx_factory) prg = program.get_weno() -- GitLab From 0b99456599c7673f20182efe728de57d40afc05e Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 17 Jun 2019 11:03:12 -0500 Subject: [PATCH 51/80] move comparison fixtures to main test file --- comparison_fixtures.py | 20 -------------------- test.py | 35 +++++++++++++++++++++++++++++++---- 2 files changed, 31 insertions(+), 24 deletions(-) delete mode 100644 comparison_fixtures.py diff --git a/comparison_fixtures.py b/comparison_fixtures.py deleted file mode 100644 index 04c7432..0000000 --- a/comparison_fixtures.py +++ /dev/null @@ -1,20 +0,0 @@ -import numpy as np -from pytest import approx - - -def arrays(a, b): - assert a == approx(b) - - -def roe_identity(states, R, Rinv): - dState = states[:,1] - states[:,0] - arrays(R@(Rinv@dState), dState) - - -def roe_property(states, fluxes, R, Rinv, lam): - dState = states[:,1] - states[:,0] - dFlux = fluxes[:,1] - fluxes[:,0] - - temp = Rinv@dState - temp = np.multiply(lam, temp) - arrays(R@temp, dFlux) diff --git a/test.py b/test.py index e97a345..d48f18a 100644 --- a/test.py +++ b/test.py @@ -1,7 +1,16 @@ +import numpy as np +import numpy.linalg as la +import pyopencl as cl +import pyopencl.array # noqa +import pyopencl.tools # noqa +import pyopencl.clrandom # noqa +import loopy as lp # noqa + import sys import logging import pytest +from pytest import approx import pyopencl as cl from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl @@ -12,7 +21,25 @@ import program_fixtures as program import transform_fixtures as transform import setup_fixtures as setup import kernel_fixtures as kernel -import comparison_fixtures as compare + + +def compare_arrays(a, b): + assert a == approx(b) + + +def compare_roe_identity(states, R, Rinv): + dState = states[:,1] - states[:,0] + compare_arrays(R@(Rinv@dState), dState) + + +def compare_roe_property(states, fluxes, R, Rinv, lam): + dState = states[:,1] - states[:,0] + dFlux = fluxes[:,1] - fluxes[:,0] + + temp = Rinv@dState + temp = np.multiply(lam, temp) + compare_arrays(R@temp, dFlux) + @pytest.mark.xfail @pytest.mark.parametrize("states_str,fluxes_str,direction", [ @@ -35,10 +62,10 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): metrics_frozen = setup.identity(params.ndim) R, Rinv, lam = kernel.roe_eigensystem(queue, prg, params, states, metrics_frozen) - compare.roe_identity(states, R, Rinv) + compare_roe_identity(states, R, Rinv) fluxes = setup.array_from_string(fluxes_str) - compare.roe_property(states, fluxes, R, Rinv, lam) + compare_roe_property(states, fluxes, R, Rinv, lam) def test_matvec(ctx_factory): @@ -50,7 +77,7 @@ def test_matvec(ctx_factory): c = kernel.mult_mat_vec(queue, prg, alpha=1.0, a=a, b=b) - compare.arrays(a@b, c) + compare_arrays(a@b, c) #@pytest.mark.slow -- GitLab From 440ecc0f045eb2fa94091e0ff805000d858386f4 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 17 Jun 2019 12:05:08 -0500 Subject: [PATCH 52/80] put transformation fixtures in test.py --- test.py | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/test.py b/test.py index d48f18a..6b42fdd 100644 --- a/test.py +++ b/test.py @@ -18,7 +18,6 @@ from pyopencl.tools import ( # noqa import device_fixtures as device import program_fixtures as program -import transform_fixtures as transform import setup_fixtures as setup import kernel_fixtures as kernel @@ -41,6 +40,70 @@ def compare_roe_property(states, fluxes, R, Rinv, lam): compare_arrays(R@temp, dFlux) +def transform_compute_flux_derivative_basic(prg): + cfd = prg["compute_flux_derivatives"] + + cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") + + cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", + lp.AddressSpace.GLOBAL) + cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", + lp.AddressSpace.GLOBAL) + cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", + lp.AddressSpace.GLOBAL) + + return prg.with_kernel(cfd) + + +def transform_weno_for_gpu(prg): + prg = transform_compute_flux_derivative_basic(prg) + + cfd = prg["compute_flux_derivatives"] + + for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6", "_7"]: + cfd = lp.split_iname(cfd, "i"+suffix, 16, + outer_tag="g.0", inner_tag="l.0") + cfd = lp.split_iname(cfd, "j"+suffix, 16, + outer_tag="g.1", inner_tag="l.1") + + for var_name in ["delta_xi", "delta_eta", "delta_zeta"]: + cfd = lp.assignment_to_subst(cfd, var_name) + + cfd = lp.add_barrier(cfd, "tag:to_generalized", "tag:flux_x_compute") + cfd = lp.add_barrier(cfd, "tag:flux_x_compute", "tag:flux_x_diff") + cfd = lp.add_barrier(cfd, "tag:flux_x_diff", "tag:flux_y_compute") + cfd = lp.add_barrier(cfd, "tag:flux_y_compute", "tag:flux_y_diff") + cfd = lp.add_barrier(cfd, "tag:flux_y_diff", "tag:flux_z_compute") + cfd = lp.add_barrier(cfd, "tag:flux_z_compute", "tag:flux_z_diff") + cfd = lp.add_barrier(cfd, "tag:flux_z_diff", "tag:from_generalized") + + prg = prg.with_kernel(cfd) + + # FIXME: These should work, but don't + # FIXME: Undo the hand-inlining in WENO.F90 + #prg = lp.inline_callable_kernel(prg, "convert_to_generalized") + #prg = lp.inline_callable_kernel(prg, "convert_from_generalized") + + if 0: + print(prg["convert_to_generalized_frozen"]) + 1/0 + + return prg + + +def transform_compute_flux_derivative_gpu(queue, prg): + prg = transform_weno_for_gpu(prg) + + prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) + + if 1: + with open("gen-code.cl", "w") as outf: + outf.write(lp.generate_code_v2(prg).device_code()) + + prg = lp.set_options(prg, no_numpy=True) + return prg + + @pytest.mark.xfail @pytest.mark.parametrize("states_str,fluxes_str,direction", [ ("2 1,4 1,4 1,4 1,20 5.5", "4 1,11.2 2.6,8 1,8 1,46.4 7.1", "x"), @@ -84,7 +147,7 @@ def test_matvec(ctx_factory): def test_compute_flux_derivatives(ctx_factory): queue = device.get_queue(ctx_factory) prg = program.get_weno() - prg = transform.compute_flux_derivative_basic(prg) + prg = transform_compute_flux_derivative_basic(prg) params = setup.flux_derivative_params(ndim=3, nvars=5, n=10) arrays = setup.random_flux_derivative_arrays(params) @@ -96,7 +159,7 @@ def test_compute_flux_derivatives(ctx_factory): def test_compute_flux_derivatives_gpu(ctx_factory): queue = device.get_queue(ctx_factory) prg = program.get_weno() - prg = transform.compute_flux_derivative_gpu(queue, prg) + prg = transform_compute_flux_derivative_gpu(queue, prg) params = setup.flux_derivative_params(ndim=3, nvars=5, n=10) arrays = setup.random_flux_derivative_arrays_on_device(ctx_factory, params) -- GitLab From a4df56a052e0f5d01cf29844d4d9f8538096363f Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 17 Jun 2019 12:11:21 -0500 Subject: [PATCH 53/80] move program/device fixtures inside test.py --- test.py | 51 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/test.py b/test.py index 6b42fdd..3db55b6 100644 --- a/test.py +++ b/test.py @@ -16,12 +16,43 @@ from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) -import device_fixtures as device -import program_fixtures as program import setup_fixtures as setup import kernel_fixtures as kernel +_QUEUE = [] + + +def get_queue(ctx_factory): + if not _QUEUE: + setup_queue(ctx_factory) + return _QUEUE[0] + + +def setup_queue(ctx_factory): + ctx = ctx_factory() + _QUEUE.append(cl.CommandQueue(ctx)) + + +_WENO_PRG = [] + + +def parse_weno(): + fn = "WENO.F90" + + with open(fn, "r") as infile: + infile_content = infile.read() + + prg = lp.parse_transformed_fortran(infile_content, filename=fn) + _WENO_PRG.append(prg) + + +def get_weno_program(): + if not _WENO_PRG: + parse_weno() + return _WENO_PRG[0] + + def compare_arrays(a, b): assert a == approx(b) @@ -117,8 +148,8 @@ def transform_compute_flux_derivative_gpu(queue, prg): ("2 1,4 1,8 2,12 3,64 11", "12 3,24 3,48 6,75.2 10.6,403.2 37.8", "z") ]) def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): - queue = device.get_queue(ctx_factory) - prg = program.get_weno() + queue = get_queue(ctx_factory) + prg = get_weno_program() params = setup.roe_params(nvars=5, ndim=3, direction=direction) states = setup.array_from_string(states_str) @@ -132,8 +163,8 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): def test_matvec(ctx_factory): - queue = device.get_queue(ctx_factory) - prg = program.get_weno() + queue = get_queue(ctx_factory) + prg = get_weno_program() a = setup.random_array(10, 10) b = setup.random_array(10) @@ -145,8 +176,8 @@ def test_matvec(ctx_factory): #@pytest.mark.slow def test_compute_flux_derivatives(ctx_factory): - queue = device.get_queue(ctx_factory) - prg = program.get_weno() + queue = get_queue(ctx_factory) + prg = get_weno_program() prg = transform_compute_flux_derivative_basic(prg) params = setup.flux_derivative_params(ndim=3, nvars=5, n=10) @@ -157,8 +188,8 @@ def test_compute_flux_derivatives(ctx_factory): #@pytest.mark.slow def test_compute_flux_derivatives_gpu(ctx_factory): - queue = device.get_queue(ctx_factory) - prg = program.get_weno() + queue = get_queue(ctx_factory) + prg = get_weno_program() prg = transform_compute_flux_derivative_gpu(queue, prg) params = setup.flux_derivative_params(ndim=3, nvars=5, n=10) -- GitLab From d5afdfbb66d8ceb87b8917d316715d61dc6db5a8 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 17 Jun 2019 12:26:51 -0500 Subject: [PATCH 54/80] put setup fixtures in test.py --- test.py | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 143 insertions(+), 11 deletions(-) diff --git a/test.py b/test.py index 3db55b6..dbc6261 100644 --- a/test.py +++ b/test.py @@ -16,7 +16,6 @@ from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) -import setup_fixtures as setup import kernel_fixtures as kernel @@ -53,6 +52,139 @@ def get_weno_program(): return _WENO_PRG[0] +class RoeParams: + def __init__(self, nvars, ndim, d): + self.nvars = nvars + self.ndim = ndim + self.d = d + + def mat_bounds(self): + return self.nvars, self.nvars + + def vec_bounds(self): + return self.nvars + + +class FluxDerivativeParams: + def __init__(self, nvars, ndim, nx, ny, nz): + self.nvars = nvars + self.ndim = ndim + + self.nx = nx + self.ny = ny + self.nz = nz + + self.nhalo = 3 + self.nx_halo = self.nx + 2*self.nhalo + self.ny_halo = self.ny + 2*self.nhalo + self.nz_halo = self.nz + 2*self.nhalo + + def state_bounds(self): + return self.nvars, self.nx_halo, self.ny_halo, self.nz_halo + + def flux_bounds(self): + return self.nvars, self.ndim, self.nx_halo, self.ny_halo, self.nz_halo + + def metric_bounds(self): + return self.ndim, self.ndim, self.nx_halo, self.ny_halo, self.nz_halo + + def jacobian_bounds(self): + return self.nx_halo, self.ny_halo, self.nz_halo + + +class FluxDerivativeArrays: + def __init__(self, states, fluxes, metrics, metric_jacobians): + self.states = states + self.fluxes = fluxes + self.metrics = metrics + self.metric_jacobians = metric_jacobians + + +def setup_roe_params(nvars, ndim, direction): + dirs = {"x" : 1, "y" : 2, "z" : 3} + return RoeParams(nvars, ndim, dirs[direction]) + + +def setup_flux_derivative_params(nvars, ndim, n): + return FluxDerivativeParams(nvars, ndim, n, n, n) + + +def setup_empty_array_on_device(queue, shape): + return cl.array.empty(queue, shape, dtype=np.float32, order="F") + + +def setup_identity(n): + return np.identity(n).astype(np.float32).copy(order="F") + + +def setup_random_array(*shape): + return np.random.random_sample(shape).astype(np.float32).copy(order="F") + + +def setup_random_array_on_device(queue, *shape): + return cl.array.to_device(queue, setup_random_array(*shape)) + + +def setup_random_flux_derivative_arrays(params): + states = setup_random_array(*params.state_bounds()) + fluxes = setup_random_array(*params.flux_bounds()) + metrics = setup_random_array(*params.metric_bounds()) + metric_jacobians = setup_random_array(*params.jacobian_bounds()) + + return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) + + +def setup_random_flux_derivative_arrays_on_device(ctx_factory, params): + queue = get_queue(ctx_factory) + + states = setup_random_array_on_device(queue, *params.state_bounds()) + fluxes = setup_random_array_on_device(queue, *params.flux_bounds()) + metrics = setup_random_array_on_device(queue, *params.metric_bounds()) + metric_jacobians = setup_random_array_on_device(queue, *params.jacobian_bounds()) + + return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) + + +def arrays_from_string(string_arrays): + return split_map_to_list(string_arrays, array_from_string, ":") + + +def array_from_string(string_array): + if ";" not in string_array: + if "," not in string_array: + array = array_from_string_1d(string_array) + else: + array = array_from_string_2d(string_array) + else: + array = array_from_string_3d(string_array) + return array.copy(order="F") + + +def array_from_string_3d(string_array): + if string_array[0] == ";": + return array_from_string_1d(string_array[1:]).reshape((-1, 1, 1)) + else: + return np.array(split_map_to_list(string_array, array_from_string_2d, ";")) + + +def array_from_string_2d(string_array): + if string_array[0] == ",": + return array_from_string_1d(string_array[1:]).reshape((-1, 1)) + else: + return np.array(split_map_to_list(string_array, array_from_string_1d, ",")) + + +def array_from_string_1d(string_array): + if string_array[0] == "i": + return np.array(split_map_to_list(string_array[1:], int, " ")) + else: + return np.array(split_map_to_list(string_array, float, " "), dtype=np.float32) + + +def split_map_to_list(string, map_func, splitter): + return list(map(map_func, string.split(splitter))) + + def compare_arrays(a, b): assert a == approx(b) @@ -151,14 +283,14 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): queue = get_queue(ctx_factory) prg = get_weno_program() - params = setup.roe_params(nvars=5, ndim=3, direction=direction) - states = setup.array_from_string(states_str) - metrics_frozen = setup.identity(params.ndim) + params = setup_roe_params(nvars=5, ndim=3, direction=direction) + states = array_from_string(states_str) + metrics_frozen = setup_identity(params.ndim) R, Rinv, lam = kernel.roe_eigensystem(queue, prg, params, states, metrics_frozen) compare_roe_identity(states, R, Rinv) - fluxes = setup.array_from_string(fluxes_str) + fluxes = array_from_string(fluxes_str) compare_roe_property(states, fluxes, R, Rinv, lam) @@ -166,8 +298,8 @@ def test_matvec(ctx_factory): queue = get_queue(ctx_factory) prg = get_weno_program() - a = setup.random_array(10, 10) - b = setup.random_array(10) + a = setup_random_array(10, 10) + b = setup_random_array(10) c = kernel.mult_mat_vec(queue, prg, alpha=1.0, a=a, b=b) @@ -180,8 +312,8 @@ def test_compute_flux_derivatives(ctx_factory): prg = get_weno_program() prg = transform_compute_flux_derivative_basic(prg) - params = setup.flux_derivative_params(ndim=3, nvars=5, n=10) - arrays = setup.random_flux_derivative_arrays(params) + params = setup_flux_derivative_params(ndim=3, nvars=5, n=10) + arrays = setup_random_flux_derivative_arrays(params) kernel.compute_flux_derivatives(queue, prg, params, arrays) @@ -192,8 +324,8 @@ def test_compute_flux_derivatives_gpu(ctx_factory): prg = get_weno_program() prg = transform_compute_flux_derivative_gpu(queue, prg) - params = setup.flux_derivative_params(ndim=3, nvars=5, n=10) - arrays = setup.random_flux_derivative_arrays_on_device(ctx_factory, params) + params = setup_flux_derivative_params(ndim=3, nvars=5, n=10) + arrays = setup_random_flux_derivative_arrays_on_device(ctx_factory, params) kernel.compute_flux_derivatives(queue, prg, params, arrays) -- GitLab From 2797f50236c512939a604175a6e7572ec1303f72 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 17 Jun 2019 12:33:37 -0500 Subject: [PATCH 55/80] move kernel fixtures into test.py --- test.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/test.py b/test.py index dbc6261..17e7f8f 100644 --- a/test.py +++ b/test.py @@ -16,8 +16,6 @@ from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) -import kernel_fixtures as kernel - _QUEUE = [] @@ -185,6 +183,52 @@ def split_map_to_list(string, map_func, splitter): return list(map(map_func, string.split(splitter))) +def with_root_kernel(prg, root_name): + # FIXME This is a little less beautiful than it could be + new_prg = prg.copy(name=root_name) + for name in prg: + clbl = new_prg[name] + if isinstance(clbl, lp.LoopKernel) and clbl.is_called_from_host: + new_prg = new_prg.with_kernel(clbl.copy(is_called_from_host=False)) + + new_prg = new_prg.with_kernel(prg[root_name].copy(is_called_from_host=True)) + return new_prg + + +def kernel_roe_eigensystem(queue, prg, params, states, metrics_frozen): + R_dev = setup_empty_array_on_device(queue, params.mat_bounds()) + Rinv_dev = setup_empty_array_on_device(queue, params.mat_bounds()) + lam_dev = setup_empty_array_on_device(queue, params.vec_bounds()) + + prg = with_root_kernel(prg, "roe_eigensystem") + prg(queue, nvars=params.nvars, ndim=params.ndim, d=params.d, + states=states, metrics_frozen=metrics_frozen, + R=R_dev, R_inv=Rinv_dev, lambda_roe=lam_dev) + + return R_dev.get(), Rinv_dev.get(), lam_dev.get() + + +def kernel_mult_mat_vec(queue, prg, alpha, a, b): + c_dev = setup_empty_array_on_device(queue, b.shape) + + prg = with_root_kernel(prg, "mult_mat_vec") + prg(queue, a=a, b=b, c=c_dev, alpha=alpha) + + return c_dev.get() + + +def kernel_compute_flux_derivatives(queue, prg, params, arrays): + flux_derivatives_dev = setup_empty_array_on_device(queue, (params.nvars, params.ndim, + params.nx_halo, params.ny_halo, params.nz_halo)) + + prg(queue, nvars=params.nvars, ndim=params.ndim, + states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, + metric_jacobians=arrays.metric_jacobians, + flux_derivatives=flux_derivatives_dev) + + return flux_derivatives_dev.get() + + def compare_arrays(a, b): assert a == approx(b) @@ -286,7 +330,7 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): params = setup_roe_params(nvars=5, ndim=3, direction=direction) states = array_from_string(states_str) metrics_frozen = setup_identity(params.ndim) - R, Rinv, lam = kernel.roe_eigensystem(queue, prg, params, states, metrics_frozen) + R, Rinv, lam = kernel_roe_eigensystem(queue, prg, params, states, metrics_frozen) compare_roe_identity(states, R, Rinv) @@ -301,7 +345,7 @@ def test_matvec(ctx_factory): a = setup_random_array(10, 10) b = setup_random_array(10) - c = kernel.mult_mat_vec(queue, prg, alpha=1.0, a=a, b=b) + c = kernel_mult_mat_vec(queue, prg, alpha=1.0, a=a, b=b) compare_arrays(a@b, c) @@ -315,7 +359,7 @@ def test_compute_flux_derivatives(ctx_factory): params = setup_flux_derivative_params(ndim=3, nvars=5, n=10) arrays = setup_random_flux_derivative_arrays(params) - kernel.compute_flux_derivatives(queue, prg, params, arrays) + kernel_compute_flux_derivatives(queue, prg, params, arrays) #@pytest.mark.slow @@ -327,7 +371,7 @@ def test_compute_flux_derivatives_gpu(ctx_factory): params = setup_flux_derivative_params(ndim=3, nvars=5, n=10) arrays = setup_random_flux_derivative_arrays_on_device(ctx_factory, params) - kernel.compute_flux_derivatives(queue, prg, params, arrays) + kernel_compute_flux_derivatives(queue, prg, params, arrays) # This lets you run 'python test.py test_case(cl._csc)' without pytest. -- GitLab From 59267775b2d303af5f6882704a511d7cec770e64 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Wed, 19 Jun 2019 21:34:20 -0500 Subject: [PATCH 56/80] create utilities.py for all utilities, remove fixture files, update benchmark script to use utilities --- benchmark.py | 28 ++++-- device_fixtures.py | 15 --- kernel_fixtures.py | 49 --------- program_fixtures.py | 20 ---- setup_fixtures.py | 138 -------------------------- test.py | 98 +----------------- transform_fixtures.py => utilities.py | 51 ++++++++-- 7 files changed, 63 insertions(+), 336 deletions(-) delete mode 100644 device_fixtures.py delete mode 100644 kernel_fixtures.py delete mode 100644 program_fixtures.py delete mode 100644 setup_fixtures.py rename transform_fixtures.py => utilities.py (67%) diff --git a/benchmark.py b/benchmark.py index 00034a7..444b689 100644 --- a/benchmark.py +++ b/benchmark.py @@ -14,18 +14,24 @@ from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) -import device_fixtures as device -import program_fixtures as program -import transform_fixtures as transform -import setup_fixtures as setup +from utilities import * + + +def setup_random_array(*shape): + return np.random.random_sample(shape).astype(np.float32).copy(order="F") + + +def setup_random_array_on_device(queue, *shape): + return cl.array.to_device(queue, setup_random_array(*shape)) + def benchmark_compute_flux_derivatives_gpu(ctx_factory): logging.basicConfig(level="INFO") - prg = program.get_weno() - prg = transform.weno_for_gpu(prg) + prg = get_weno_program() + prg = transform_weno_for_gpu(prg) - queue = device.get_queue(ctx_factory) + queue = get_queue(ctx_factory) ndim = 3 nvars = 5 @@ -35,10 +41,10 @@ def benchmark_compute_flux_derivatives_gpu(ctx_factory): nz = n print("ARRAY GEN") - states = setup.random_array_on_device(queue, nvars, nx+6, ny+6, nz+6) - fluxes = setup.random_array_on_device(queue, nvars, ndim, nx+6, ny+6, nz+6) - metrics = setup.random_array_on_device(queue, ndim, ndim, nx+6, ny+6, nz+6) - metric_jacobians = setup.random_array_on_device(queue, nx+6, ny+6, nz+6) + states = setup_random_array_on_device(queue, nvars, nx+6, ny+6, nz+6) + fluxes = setup_random_array_on_device(queue, nvars, ndim, nx+6, ny+6, nz+6) + metrics = setup_random_array_on_device(queue, ndim, ndim, nx+6, ny+6, nz+6) + metric_jacobians = setup_random_array_on_device(queue, nx+6, ny+6, nz+6) print("END ARRAY GEN") flux_derivatives_dev = cl.array.empty(queue, (nvars, ndim, nx+6, ny+6, diff --git a/device_fixtures.py b/device_fixtures.py deleted file mode 100644 index d0dbc59..0000000 --- a/device_fixtures.py +++ /dev/null @@ -1,15 +0,0 @@ -import pyopencl as cl - - -_QUEUE = [] - - -def get_queue(ctx_factory): - if not _QUEUE: - setup_queue(ctx_factory) - return _QUEUE[0] - - -def setup_queue(ctx_factory): - ctx = ctx_factory() - _QUEUE.append(cl.CommandQueue(ctx)) diff --git a/kernel_fixtures.py b/kernel_fixtures.py deleted file mode 100644 index 7f3dff4..0000000 --- a/kernel_fixtures.py +++ /dev/null @@ -1,49 +0,0 @@ -import loopy as lp # noqa - -import setup_fixtures as setup - - -def with_root_kernel(prg, root_name): - # FIXME This is a little less beautiful than it could be - new_prg = prg.copy(name=root_name) - for name in prg: - clbl = new_prg[name] - if isinstance(clbl, lp.LoopKernel) and clbl.is_called_from_host: - new_prg = new_prg.with_kernel(clbl.copy(is_called_from_host=False)) - - new_prg = new_prg.with_kernel(prg[root_name].copy(is_called_from_host=True)) - return new_prg - - -def roe_eigensystem(queue, prg, params, states, metrics_frozen): - R_dev = setup.empty_array_on_device(queue, params.mat_bounds()) - Rinv_dev = setup.empty_array_on_device(queue, params.mat_bounds()) - lam_dev = setup.empty_array_on_device(queue, params.vec_bounds()) - - prg = with_root_kernel(prg, "roe_eigensystem") - prg(queue, nvars=params.nvars, ndim=params.ndim, d=params.d, - states=states, metrics_frozen=metrics_frozen, - R=R_dev, R_inv=Rinv_dev, lambda_roe=lam_dev) - - return R_dev.get(), Rinv_dev.get(), lam_dev.get() - - -def mult_mat_vec(queue, prg, alpha, a, b): - c_dev = setup.empty_array_on_device(queue, b.shape) - - prg = with_root_kernel(prg, "mult_mat_vec") - prg(queue, a=a, b=b, c=c_dev, alpha=alpha) - - return c_dev.get() - - -def compute_flux_derivatives(queue, prg, params, arrays): - flux_derivatives_dev = setup.empty_array_on_device(queue, (params.nvars, params.ndim, - params.nx_halo, params.ny_halo, params.nz_halo)) - - prg(queue, nvars=params.nvars, ndim=params.ndim, - states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, - metric_jacobians=arrays.metric_jacobians, - flux_derivatives=flux_derivatives_dev) - - return flux_derivatives_dev.get() diff --git a/program_fixtures.py b/program_fixtures.py deleted file mode 100644 index 0f50ff1..0000000 --- a/program_fixtures.py +++ /dev/null @@ -1,20 +0,0 @@ -import loopy as lp - - -_WENO_PRG = [] - - -def parse_weno(): - fn = "WENO.F90" - - with open(fn, "r") as infile: - infile_content = infile.read() - - prg = lp.parse_transformed_fortran(infile_content, filename=fn) - _WENO_PRG.append(prg) - - -def get_weno(): - if not _WENO_PRG: - parse_weno() - return _WENO_PRG[0] diff --git a/setup_fixtures.py b/setup_fixtures.py deleted file mode 100644 index 6f1debc..0000000 --- a/setup_fixtures.py +++ /dev/null @@ -1,138 +0,0 @@ -import numpy as np -import pyopencl as cl -import pyopencl.array # noqa - -import device_fixtures as device - - -class RoeParams: - def __init__(self, nvars, ndim, d): - self.nvars = nvars - self.ndim = ndim - self.d = d - - def mat_bounds(self): - return self.nvars, self.nvars - - def vec_bounds(self): - return self.nvars - - -class FluxDerivativeParams: - def __init__(self, nvars, ndim, nx, ny, nz): - self.nvars = nvars - self.ndim = ndim - - self.nx = nx - self.ny = ny - self.nz = nz - - self.nhalo = 3 - self.nx_halo = self.nx + 2*self.nhalo - self.ny_halo = self.ny + 2*self.nhalo - self.nz_halo = self.nz + 2*self.nhalo - - def state_bounds(self): - return self.nvars, self.nx_halo, self.ny_halo, self.nz_halo - - def flux_bounds(self): - return self.nvars, self.ndim, self.nx_halo, self.ny_halo, self.nz_halo - - def metric_bounds(self): - return self.ndim, self.ndim, self.nx_halo, self.ny_halo, self.nz_halo - - def jacobian_bounds(self): - return self.nx_halo, self.ny_halo, self.nz_halo - - -class FluxDerivativeArrays: - def __init__(self, states, fluxes, metrics, metric_jacobians): - self.states = states - self.fluxes = fluxes - self.metrics = metrics - self.metric_jacobians = metric_jacobians - - -def roe_params(nvars, ndim, direction): - dirs = {"x" : 1, "y" : 2, "z" : 3} - return RoeParams(nvars, ndim, dirs[direction]) - - -def flux_derivative_params(nvars, ndim, n): - return FluxDerivativeParams(nvars, ndim, n, n, n) - - -def empty_array_on_device(queue, shape): - return cl.array.empty(queue, shape, dtype=np.float32, order="F") - - -def identity(n): - return np.identity(n).astype(np.float32).copy(order="F") - - -def random_array(*shape): - return np.random.random_sample(shape).astype(np.float32).copy(order="F") - - -def random_array_on_device(queue, *shape): - return cl.array.to_device(queue, random_array(*shape)) - - -def random_flux_derivative_arrays(params): - states = random_array(*params.state_bounds()) - fluxes = random_array(*params.flux_bounds()) - metrics = random_array(*params.metric_bounds()) - metric_jacobians = random_array(*params.jacobian_bounds()) - - return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) - - -def random_flux_derivative_arrays_on_device(ctx_factory, params): - queue = device.get_queue(ctx_factory) - - states = random_array_on_device(queue, *params.state_bounds()) - fluxes = random_array_on_device(queue, *params.flux_bounds()) - metrics = random_array_on_device(queue, *params.metric_bounds()) - metric_jacobians = random_array_on_device(queue, *params.jacobian_bounds()) - - return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) - - -def arrays_from_string(string_arrays): - return split_map_to_list(string_arrays, array_from_string, ":") - - -def array_from_string(string_array): - if ";" not in string_array: - if "," not in string_array: - array = array_from_string_1d(string_array) - else: - array = array_from_string_2d(string_array) - else: - array = array_from_string_3d(string_array) - return array.copy(order="F") - - -def array_from_string_3d(string_array): - if string_array[0] == ";": - return array_from_string_1d(string_array[1:]).reshape((-1, 1, 1)) - else: - return np.array(split_map_to_list(string_array, array_from_string_2d, ";")) - - -def array_from_string_2d(string_array): - if string_array[0] == ",": - return array_from_string_1d(string_array[1:]).reshape((-1, 1)) - else: - return np.array(split_map_to_list(string_array, array_from_string_1d, ",")) - - -def array_from_string_1d(string_array): - if string_array[0] == "i": - return np.array(split_map_to_list(string_array[1:], int, " ")) - else: - return np.array(split_map_to_list(string_array, float, " "), dtype=np.float32) - - -def split_map_to_list(string, map_func, splitter): - return list(map(map_func, string.split(splitter))) diff --git a/test.py b/test.py index 17e7f8f..514ba6f 100644 --- a/test.py +++ b/test.py @@ -11,43 +11,11 @@ import logging import pytest from pytest import approx -import pyopencl as cl from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) - -_QUEUE = [] - - -def get_queue(ctx_factory): - if not _QUEUE: - setup_queue(ctx_factory) - return _QUEUE[0] - - -def setup_queue(ctx_factory): - ctx = ctx_factory() - _QUEUE.append(cl.CommandQueue(ctx)) - - -_WENO_PRG = [] - - -def parse_weno(): - fn = "WENO.F90" - - with open(fn, "r") as infile: - infile_content = infile.read() - - prg = lp.parse_transformed_fortran(infile_content, filename=fn) - _WENO_PRG.append(prg) - - -def get_weno_program(): - if not _WENO_PRG: - parse_weno() - return _WENO_PRG[0] +from utilities import * class RoeParams: @@ -247,70 +215,6 @@ def compare_roe_property(states, fluxes, R, Rinv, lam): compare_arrays(R@temp, dFlux) -def transform_compute_flux_derivative_basic(prg): - cfd = prg["compute_flux_derivatives"] - - cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") - - cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", - lp.AddressSpace.GLOBAL) - - return prg.with_kernel(cfd) - - -def transform_weno_for_gpu(prg): - prg = transform_compute_flux_derivative_basic(prg) - - cfd = prg["compute_flux_derivatives"] - - for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6", "_7"]: - cfd = lp.split_iname(cfd, "i"+suffix, 16, - outer_tag="g.0", inner_tag="l.0") - cfd = lp.split_iname(cfd, "j"+suffix, 16, - outer_tag="g.1", inner_tag="l.1") - - for var_name in ["delta_xi", "delta_eta", "delta_zeta"]: - cfd = lp.assignment_to_subst(cfd, var_name) - - cfd = lp.add_barrier(cfd, "tag:to_generalized", "tag:flux_x_compute") - cfd = lp.add_barrier(cfd, "tag:flux_x_compute", "tag:flux_x_diff") - cfd = lp.add_barrier(cfd, "tag:flux_x_diff", "tag:flux_y_compute") - cfd = lp.add_barrier(cfd, "tag:flux_y_compute", "tag:flux_y_diff") - cfd = lp.add_barrier(cfd, "tag:flux_y_diff", "tag:flux_z_compute") - cfd = lp.add_barrier(cfd, "tag:flux_z_compute", "tag:flux_z_diff") - cfd = lp.add_barrier(cfd, "tag:flux_z_diff", "tag:from_generalized") - - prg = prg.with_kernel(cfd) - - # FIXME: These should work, but don't - # FIXME: Undo the hand-inlining in WENO.F90 - #prg = lp.inline_callable_kernel(prg, "convert_to_generalized") - #prg = lp.inline_callable_kernel(prg, "convert_from_generalized") - - if 0: - print(prg["convert_to_generalized_frozen"]) - 1/0 - - return prg - - -def transform_compute_flux_derivative_gpu(queue, prg): - prg = transform_weno_for_gpu(prg) - - prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - - if 1: - with open("gen-code.cl", "w") as outf: - outf.write(lp.generate_code_v2(prg).device_code()) - - prg = lp.set_options(prg, no_numpy=True) - return prg - - @pytest.mark.xfail @pytest.mark.parametrize("states_str,fluxes_str,direction", [ ("2 1,4 1,4 1,4 1,20 5.5", "4 1,11.2 2.6,8 1,8 1,46.4 7.1", "x"), diff --git a/transform_fixtures.py b/utilities.py similarity index 67% rename from transform_fixtures.py rename to utilities.py index f69581a..a188dce 100644 --- a/transform_fixtures.py +++ b/utilities.py @@ -1,7 +1,46 @@ -import loopy as lp +import numpy as np +import numpy.linalg as la +import pyopencl as cl +import pyopencl.array # noqa +import pyopencl.tools # noqa +import pyopencl.clrandom # noqa +import loopy as lp # noqa -def compute_flux_derivative_basic(prg): +_QUEUE = [] + + +def get_queue(ctx_factory): + if not _QUEUE: + setup_queue(ctx_factory) + return _QUEUE[0] + + +def setup_queue(ctx_factory): + ctx = ctx_factory() + _QUEUE.append(cl.CommandQueue(ctx)) + + +_WENO_PRG = [] + + +def parse_weno(): + fn = "WENO.F90" + + with open(fn, "r") as infile: + infile_content = infile.read() + + prg = lp.parse_transformed_fortran(infile_content, filename=fn) + _WENO_PRG.append(prg) + + +def get_weno_program(): + if not _WENO_PRG: + parse_weno() + return _WENO_PRG[0] + + +def transform_compute_flux_derivative_basic(prg): cfd = prg["compute_flux_derivatives"] cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") @@ -16,8 +55,8 @@ def compute_flux_derivative_basic(prg): return prg.with_kernel(cfd) -def weno_for_gpu(prg): - prg = compute_flux_derivative_basic(prg) +def transform_weno_for_gpu(prg): + prg = transform_compute_flux_derivative_basic(prg) cfd = prg["compute_flux_derivatives"] @@ -52,8 +91,8 @@ def weno_for_gpu(prg): return prg -def compute_flux_derivative_gpu(queue, prg): - prg = weno_for_gpu(prg) +def transform_compute_flux_derivative_gpu(queue, prg): + prg = transform_weno_for_gpu(prg) prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) -- GitLab From 68f74b816c6f6e489a3d348cd34153c2cf240942 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Wed, 19 Jun 2019 22:08:12 -0500 Subject: [PATCH 57/80] move benchmark array generation to test-local functions --- benchmark.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/benchmark.py b/benchmark.py index 444b689..b48c19c 100644 --- a/benchmark.py +++ b/benchmark.py @@ -17,15 +17,13 @@ from pyopencl.tools import ( # noqa from utilities import * -def setup_random_array(*shape): - return np.random.random_sample(shape).astype(np.float32).copy(order="F") - - -def setup_random_array_on_device(queue, *shape): - return cl.array.to_device(queue, setup_random_array(*shape)) - - def benchmark_compute_flux_derivatives_gpu(ctx_factory): + def random_array_on_device(queue, *shape): + return cl.array.to_device(queue, random_array(*shape)) + + def random_array(*shape): + return np.random.random_sample(shape).astype(np.float32).copy(order="F") + logging.basicConfig(level="INFO") prg = get_weno_program() @@ -41,10 +39,10 @@ def benchmark_compute_flux_derivatives_gpu(ctx_factory): nz = n print("ARRAY GEN") - states = setup_random_array_on_device(queue, nvars, nx+6, ny+6, nz+6) - fluxes = setup_random_array_on_device(queue, nvars, ndim, nx+6, ny+6, nz+6) - metrics = setup_random_array_on_device(queue, ndim, ndim, nx+6, ny+6, nz+6) - metric_jacobians = setup_random_array_on_device(queue, nx+6, ny+6, nz+6) + states = random_array_on_device(queue, nvars, nx+6, ny+6, nz+6) + fluxes = random_array_on_device(queue, nvars, ndim, nx+6, ny+6, nz+6) + metrics = random_array_on_device(queue, ndim, ndim, nx+6, ny+6, nz+6) + metric_jacobians = random_array_on_device(queue, nx+6, ny+6, nz+6) print("END ARRAY GEN") flux_derivatives_dev = cl.array.empty(queue, (nvars, ndim, nx+6, ny+6, -- GitLab From 13e2a49d25d7c6d892934e77e374b0003395b672 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 09:17:57 -0500 Subject: [PATCH 58/80] refactor out kernel_compute_flux_derivatives interface function --- test.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/test.py b/test.py index 514ba6f..75b6e16 100644 --- a/test.py +++ b/test.py @@ -185,18 +185,6 @@ def kernel_mult_mat_vec(queue, prg, alpha, a, b): return c_dev.get() -def kernel_compute_flux_derivatives(queue, prg, params, arrays): - flux_derivatives_dev = setup_empty_array_on_device(queue, (params.nvars, params.ndim, - params.nx_halo, params.ny_halo, params.nz_halo)) - - prg(queue, nvars=params.nvars, ndim=params.ndim, - states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, - metric_jacobians=arrays.metric_jacobians, - flux_derivatives=flux_derivatives_dev) - - return flux_derivatives_dev.get() - - def compare_arrays(a, b): assert a == approx(b) @@ -263,7 +251,13 @@ def test_compute_flux_derivatives(ctx_factory): params = setup_flux_derivative_params(ndim=3, nvars=5, n=10) arrays = setup_random_flux_derivative_arrays(params) - kernel_compute_flux_derivatives(queue, prg, params, arrays) + flux_derivatives_dev = setup_empty_array_on_device(queue, (params.nvars, params.ndim, + params.nx_halo, params.ny_halo, params.nz_halo)) + + prg(queue, nvars=params.nvars, ndim=params.ndim, + states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, + metric_jacobians=arrays.metric_jacobians, + flux_derivatives=flux_derivatives_dev) #@pytest.mark.slow @@ -275,7 +269,13 @@ def test_compute_flux_derivatives_gpu(ctx_factory): params = setup_flux_derivative_params(ndim=3, nvars=5, n=10) arrays = setup_random_flux_derivative_arrays_on_device(ctx_factory, params) - kernel_compute_flux_derivatives(queue, prg, params, arrays) + flux_derivatives_dev = setup_empty_array_on_device(queue, (params.nvars, params.ndim, + params.nx_halo, params.ny_halo, params.nz_halo)) + + prg(queue, nvars=params.nvars, ndim=params.ndim, + states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, + metric_jacobians=arrays.metric_jacobians, + flux_derivatives=flux_derivatives_dev) # This lets you run 'python test.py test_case(cl._csc)' without pytest. -- GitLab From b8979ede9f64ef8d23d39453292f9157c225a3e3 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 10:19:57 -0500 Subject: [PATCH 59/80] refactor test_compute_flux_derivatives to use lp.auto_test_vs_ref --- test.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/test.py b/test.py index 75b6e16..2e810e9 100644 --- a/test.py +++ b/test.py @@ -244,20 +244,11 @@ def test_matvec(ctx_factory): #@pytest.mark.slow def test_compute_flux_derivatives(ctx_factory): - queue = get_queue(ctx_factory) prg = get_weno_program() prg = transform_compute_flux_derivative_basic(prg) - params = setup_flux_derivative_params(ndim=3, nvars=5, n=10) - arrays = setup_random_flux_derivative_arrays(params) - - flux_derivatives_dev = setup_empty_array_on_device(queue, (params.nvars, params.ndim, - params.nx_halo, params.ny_halo, params.nz_halo)) - - prg(queue, nvars=params.nvars, ndim=params.ndim, - states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, - metric_jacobians=arrays.metric_jacobians, - flux_derivatives=flux_derivatives_dev) + lp.auto_test_vs_ref(prg, ctx_factory(), + parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16)) #@pytest.mark.slow -- GitLab From 2ac94b6fddb6726b63130127f25c623df4144f5e Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 10:31:40 -0500 Subject: [PATCH 60/80] refactor test_compute_flux_derivatives to use lp.auto_test_vs_ref --- test.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/test.py b/test.py index 2e810e9..2b694fb 100644 --- a/test.py +++ b/test.py @@ -253,20 +253,11 @@ def test_compute_flux_derivatives(ctx_factory): #@pytest.mark.slow def test_compute_flux_derivatives_gpu(ctx_factory): - queue = get_queue(ctx_factory) prg = get_weno_program() - prg = transform_compute_flux_derivative_gpu(queue, prg) - - params = setup_flux_derivative_params(ndim=3, nvars=5, n=10) - arrays = setup_random_flux_derivative_arrays_on_device(ctx_factory, params) - - flux_derivatives_dev = setup_empty_array_on_device(queue, (params.nvars, params.ndim, - params.nx_halo, params.ny_halo, params.nz_halo)) + prg = transform_compute_flux_derivative_gpu(get_queue(ctx_factory), prg) - prg(queue, nvars=params.nvars, ndim=params.ndim, - states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, - metric_jacobians=arrays.metric_jacobians, - flux_derivatives=flux_derivatives_dev) + lp.auto_test_vs_ref(prg, ctx_factory(), + parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16)) # This lets you run 'python test.py test_case(cl._csc)' without pytest. -- GitLab From c2d76477b408a2cc7f7276fe41e44ca7db541d5b Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 11:00:12 -0500 Subject: [PATCH 61/80] heavy reorganization to put things in utilities.py or as test-local functions --- benchmark.py | 6 -- test.py | 249 ++++++++++++--------------------------------------- utilities.py | 63 +++++++++++++ 3 files changed, 122 insertions(+), 196 deletions(-) diff --git a/benchmark.py b/benchmark.py index b48c19c..f65cd58 100644 --- a/benchmark.py +++ b/benchmark.py @@ -18,12 +18,6 @@ from utilities import * def benchmark_compute_flux_derivatives_gpu(ctx_factory): - def random_array_on_device(queue, *shape): - return cl.array.to_device(queue, random_array(*shape)) - - def random_array(*shape): - return np.random.random_sample(shape).astype(np.float32).copy(order="F") - logging.basicConfig(level="INFO") prg = get_weno_program() diff --git a/test.py b/test.py index 2b694fb..7d84ace 100644 --- a/test.py +++ b/test.py @@ -18,191 +18,6 @@ from pyopencl.tools import ( # noqa from utilities import * -class RoeParams: - def __init__(self, nvars, ndim, d): - self.nvars = nvars - self.ndim = ndim - self.d = d - - def mat_bounds(self): - return self.nvars, self.nvars - - def vec_bounds(self): - return self.nvars - - -class FluxDerivativeParams: - def __init__(self, nvars, ndim, nx, ny, nz): - self.nvars = nvars - self.ndim = ndim - - self.nx = nx - self.ny = ny - self.nz = nz - - self.nhalo = 3 - self.nx_halo = self.nx + 2*self.nhalo - self.ny_halo = self.ny + 2*self.nhalo - self.nz_halo = self.nz + 2*self.nhalo - - def state_bounds(self): - return self.nvars, self.nx_halo, self.ny_halo, self.nz_halo - - def flux_bounds(self): - return self.nvars, self.ndim, self.nx_halo, self.ny_halo, self.nz_halo - - def metric_bounds(self): - return self.ndim, self.ndim, self.nx_halo, self.ny_halo, self.nz_halo - - def jacobian_bounds(self): - return self.nx_halo, self.ny_halo, self.nz_halo - - -class FluxDerivativeArrays: - def __init__(self, states, fluxes, metrics, metric_jacobians): - self.states = states - self.fluxes = fluxes - self.metrics = metrics - self.metric_jacobians = metric_jacobians - - -def setup_roe_params(nvars, ndim, direction): - dirs = {"x" : 1, "y" : 2, "z" : 3} - return RoeParams(nvars, ndim, dirs[direction]) - - -def setup_flux_derivative_params(nvars, ndim, n): - return FluxDerivativeParams(nvars, ndim, n, n, n) - - -def setup_empty_array_on_device(queue, shape): - return cl.array.empty(queue, shape, dtype=np.float32, order="F") - - -def setup_identity(n): - return np.identity(n).astype(np.float32).copy(order="F") - - -def setup_random_array(*shape): - return np.random.random_sample(shape).astype(np.float32).copy(order="F") - - -def setup_random_array_on_device(queue, *shape): - return cl.array.to_device(queue, setup_random_array(*shape)) - - -def setup_random_flux_derivative_arrays(params): - states = setup_random_array(*params.state_bounds()) - fluxes = setup_random_array(*params.flux_bounds()) - metrics = setup_random_array(*params.metric_bounds()) - metric_jacobians = setup_random_array(*params.jacobian_bounds()) - - return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) - - -def setup_random_flux_derivative_arrays_on_device(ctx_factory, params): - queue = get_queue(ctx_factory) - - states = setup_random_array_on_device(queue, *params.state_bounds()) - fluxes = setup_random_array_on_device(queue, *params.flux_bounds()) - metrics = setup_random_array_on_device(queue, *params.metric_bounds()) - metric_jacobians = setup_random_array_on_device(queue, *params.jacobian_bounds()) - - return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) - - -def arrays_from_string(string_arrays): - return split_map_to_list(string_arrays, array_from_string, ":") - - -def array_from_string(string_array): - if ";" not in string_array: - if "," not in string_array: - array = array_from_string_1d(string_array) - else: - array = array_from_string_2d(string_array) - else: - array = array_from_string_3d(string_array) - return array.copy(order="F") - - -def array_from_string_3d(string_array): - if string_array[0] == ";": - return array_from_string_1d(string_array[1:]).reshape((-1, 1, 1)) - else: - return np.array(split_map_to_list(string_array, array_from_string_2d, ";")) - - -def array_from_string_2d(string_array): - if string_array[0] == ",": - return array_from_string_1d(string_array[1:]).reshape((-1, 1)) - else: - return np.array(split_map_to_list(string_array, array_from_string_1d, ",")) - - -def array_from_string_1d(string_array): - if string_array[0] == "i": - return np.array(split_map_to_list(string_array[1:], int, " ")) - else: - return np.array(split_map_to_list(string_array, float, " "), dtype=np.float32) - - -def split_map_to_list(string, map_func, splitter): - return list(map(map_func, string.split(splitter))) - - -def with_root_kernel(prg, root_name): - # FIXME This is a little less beautiful than it could be - new_prg = prg.copy(name=root_name) - for name in prg: - clbl = new_prg[name] - if isinstance(clbl, lp.LoopKernel) and clbl.is_called_from_host: - new_prg = new_prg.with_kernel(clbl.copy(is_called_from_host=False)) - - new_prg = new_prg.with_kernel(prg[root_name].copy(is_called_from_host=True)) - return new_prg - - -def kernel_roe_eigensystem(queue, prg, params, states, metrics_frozen): - R_dev = setup_empty_array_on_device(queue, params.mat_bounds()) - Rinv_dev = setup_empty_array_on_device(queue, params.mat_bounds()) - lam_dev = setup_empty_array_on_device(queue, params.vec_bounds()) - - prg = with_root_kernel(prg, "roe_eigensystem") - prg(queue, nvars=params.nvars, ndim=params.ndim, d=params.d, - states=states, metrics_frozen=metrics_frozen, - R=R_dev, R_inv=Rinv_dev, lambda_roe=lam_dev) - - return R_dev.get(), Rinv_dev.get(), lam_dev.get() - - -def kernel_mult_mat_vec(queue, prg, alpha, a, b): - c_dev = setup_empty_array_on_device(queue, b.shape) - - prg = with_root_kernel(prg, "mult_mat_vec") - prg(queue, a=a, b=b, c=c_dev, alpha=alpha) - - return c_dev.get() - - -def compare_arrays(a, b): - assert a == approx(b) - - -def compare_roe_identity(states, R, Rinv): - dState = states[:,1] - states[:,0] - compare_arrays(R@(Rinv@dState), dState) - - -def compare_roe_property(states, fluxes, R, Rinv, lam): - dState = states[:,1] - states[:,0] - dFlux = fluxes[:,1] - fluxes[:,0] - - temp = Rinv@dState - temp = np.multiply(lam, temp) - compare_arrays(R@temp, dFlux) - - @pytest.mark.xfail @pytest.mark.parametrize("states_str,fluxes_str,direction", [ ("2 1,4 1,4 1,4 1,20 5.5", "4 1,11.2 2.6,8 1,8 1,46.4 7.1", "x"), @@ -216,26 +31,80 @@ def compare_roe_property(states, fluxes, R, Rinv, lam): ("2 1,4 1,8 2,12 3,64 11", "12 3,24 3,48 6,75.2 10.6,403.2 37.8", "z") ]) def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): + class RoeParams: + def __init__(self, nvars, ndim, d): + self.nvars = nvars + self.ndim = ndim + self.d = d + + def mat_bounds(self): + return self.nvars, self.nvars + + def vec_bounds(self): + return self.nvars + + def setup_roe_params(nvars, ndim, direction): + dirs = {"x" : 1, "y" : 2, "z" : 3} + return RoeParams(nvars, ndim, dirs[direction]) + + def identity_matrix(n): + return np.identity(n).astype(np.float32).copy(order="F") + + def kernel_roe_eigensystem(queue, prg, params, states, metrics_frozen): + R_dev = empty_array_on_device(queue, params.mat_bounds()) + Rinv_dev = empty_array_on_device(queue, params.mat_bounds()) + lam_dev = empty_array_on_device(queue, params.vec_bounds()) + + prg = with_root_kernel(prg, "roe_eigensystem") + prg(queue, nvars=params.nvars, ndim=params.ndim, d=params.d, + states=states, metrics_frozen=metrics_frozen, + R=R_dev, R_inv=Rinv_dev, lambda_roe=lam_dev) + + return R_dev.get(), Rinv_dev.get(), lam_dev.get() + + def check_roe_identity(states, R, Rinv): + dState = states[:,1] - states[:,0] + compare_arrays(R@(Rinv@dState), dState) + + def check_roe_property(states, fluxes, R, Rinv, lam): + dState = states[:,1] - states[:,0] + dFlux = fluxes[:,1] - fluxes[:,0] + + temp = Rinv@dState + temp = np.multiply(lam, temp) + compare_arrays(R@temp, dFlux) + queue = get_queue(ctx_factory) prg = get_weno_program() params = setup_roe_params(nvars=5, ndim=3, direction=direction) states = array_from_string(states_str) - metrics_frozen = setup_identity(params.ndim) + metrics_frozen = identity_matrix(params.ndim) R, Rinv, lam = kernel_roe_eigensystem(queue, prg, params, states, metrics_frozen) - compare_roe_identity(states, R, Rinv) + check_roe_identity(states, R, Rinv) fluxes = array_from_string(fluxes_str) - compare_roe_property(states, fluxes, R, Rinv, lam) + check_roe_property(states, fluxes, R, Rinv, lam) def test_matvec(ctx_factory): + def kernel_mult_mat_vec(queue, prg, alpha, a, b): + c_dev = empty_array_on_device(queue, b.shape) + + prg = with_root_kernel(prg, "mult_mat_vec") + prg(queue, a=a, b=b, c=c_dev, alpha=alpha) + + return c_dev.get() + + def random_array(*shape): + return np.random.random_sample(shape).astype(np.float32).copy(order="F") + queue = get_queue(ctx_factory) prg = get_weno_program() - a = setup_random_array(10, 10) - b = setup_random_array(10) + a = random_array(10, 10) + b = random_array(10) c = kernel_mult_mat_vec(queue, prg, alpha=1.0, a=a, b=b) diff --git a/utilities.py b/utilities.py index a188dce..8c942d7 100644 --- a/utilities.py +++ b/utilities.py @@ -5,6 +5,69 @@ import pyopencl.array # noqa import pyopencl.tools # noqa import pyopencl.clrandom # noqa import loopy as lp # noqa +from pytest import approx + + +def split_map_to_list(string, map_func, splitter): + return list(map(map_func, string.split(splitter))) + + +def arrays_from_string(string_arrays): + return split_map_to_list(string_arrays, array_from_string, ":") + + +def array_from_string(string_array): + def array_from_string_1d(string_array): + if string_array[0] == "i": + return np.array(split_map_to_list(string_array[1:], int, " ")) + else: + return np.array(split_map_to_list(string_array, float, " "), dtype=np.float32) + + def array_from_string_2d(string_array): + if string_array[0] == ",": + return array_from_string_1d(string_array[1:]).reshape((-1, 1)) + else: + return np.array(split_map_to_list(string_array, array_from_string_1d, ",")) + + def array_from_string_3d(string_array): + if string_array[0] == ";": + return array_from_string_1d(string_array[1:]).reshape((-1, 1, 1)) + else: + return np.array(split_map_to_list(string_array, array_from_string_2d, ";")) + + if ";" not in string_array: + if "," not in string_array: + array = array_from_string_1d(string_array) + else: + array = array_from_string_2d(string_array) + else: + array = array_from_string_3d(string_array) + return array.copy(order="F") + + +def with_root_kernel(prg, root_name): + # FIXME This is a little less beautiful than it could be + new_prg = prg.copy(name=root_name) + for name in prg: + clbl = new_prg[name] + if isinstance(clbl, lp.LoopKernel) and clbl.is_called_from_host: + new_prg = new_prg.with_kernel(clbl.copy(is_called_from_host=False)) + + new_prg = new_prg.with_kernel(prg[root_name].copy(is_called_from_host=True)) + return new_prg + + +def compare_arrays(a, b): + assert a == approx(b) + + +def random_array_on_device(queue, *shape): + empty = empty_array_on_device(queue, shape) + return cl.clrandom.fill_rand(empty) + + +def empty_array_on_device(queue, shape): + return cl.array.empty(queue, shape, dtype=np.float32, order="F") _QUEUE = [] -- GitLab From a8f1ddbca390464c3db951c4802168dc9a29e268 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 11:07:21 -0500 Subject: [PATCH 62/80] reorganize order of utility functions --- utilities.py | 100 +++++++++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 46 deletions(-) diff --git a/utilities.py b/utilities.py index 8c942d7..2ada8c7 100644 --- a/utilities.py +++ b/utilities.py @@ -8,8 +8,19 @@ import loopy as lp # noqa from pytest import approx -def split_map_to_list(string, map_func, splitter): - return list(map(map_func, string.split(splitter))) +### Arrays ### + +def compare_arrays(a, b): + assert a == approx(b) + + +def random_array_on_device(queue, *shape): + empty = empty_array_on_device(queue, shape) + return cl.clrandom.fill_rand(empty) + + +def empty_array_on_device(queue, shape): + return cl.array.empty(queue, shape, dtype=np.float32, order="F") def arrays_from_string(string_arrays): @@ -45,30 +56,11 @@ def array_from_string(string_array): return array.copy(order="F") -def with_root_kernel(prg, root_name): - # FIXME This is a little less beautiful than it could be - new_prg = prg.copy(name=root_name) - for name in prg: - clbl = new_prg[name] - if isinstance(clbl, lp.LoopKernel) and clbl.is_called_from_host: - new_prg = new_prg.with_kernel(clbl.copy(is_called_from_host=False)) - - new_prg = new_prg.with_kernel(prg[root_name].copy(is_called_from_host=True)) - return new_prg - - -def compare_arrays(a, b): - assert a == approx(b) - - -def random_array_on_device(queue, *shape): - empty = empty_array_on_device(queue, shape) - return cl.clrandom.fill_rand(empty) - +def split_map_to_list(string, map_func, splitter): + return list(map(map_func, string.split(splitter))) -def empty_array_on_device(queue, shape): - return cl.array.empty(queue, shape, dtype=np.float32, order="F") +### Device ### _QUEUE = [] @@ -84,9 +76,17 @@ def setup_queue(ctx_factory): _QUEUE.append(cl.CommandQueue(ctx)) +### Program / Kernel ### + _WENO_PRG = [] +def get_weno_program(): + if not _WENO_PRG: + parse_weno() + return _WENO_PRG[0] + + def parse_weno(): fn = "WENO.F90" @@ -97,25 +97,29 @@ def parse_weno(): _WENO_PRG.append(prg) -def get_weno_program(): - if not _WENO_PRG: - parse_weno() - return _WENO_PRG[0] +def with_root_kernel(prg, root_name): + # FIXME This is a little less beautiful than it could be + new_prg = prg.copy(name=root_name) + for name in prg: + clbl = new_prg[name] + if isinstance(clbl, lp.LoopKernel) and clbl.is_called_from_host: + new_prg = new_prg.with_kernel(clbl.copy(is_called_from_host=False)) + new_prg = new_prg.with_kernel(prg[root_name].copy(is_called_from_host=True)) + return new_prg -def transform_compute_flux_derivative_basic(prg): - cfd = prg["compute_flux_derivatives"] - cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") +def transform_compute_flux_derivative_gpu(queue, prg): + prg = transform_weno_for_gpu(prg) - cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", - lp.AddressSpace.GLOBAL) + prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - return prg.with_kernel(cfd) + if 1: + with open("gen-code.cl", "w") as outf: + outf.write(lp.generate_code_v2(prg).device_code()) + + prg = lp.set_options(prg, no_numpy=True) + return prg def transform_weno_for_gpu(prg): @@ -154,14 +158,18 @@ def transform_weno_for_gpu(prg): return prg -def transform_compute_flux_derivative_gpu(queue, prg): - prg = transform_weno_for_gpu(prg) +def transform_compute_flux_derivative_basic(prg): + cfd = prg["compute_flux_derivatives"] - prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) + cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") + + cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", + lp.AddressSpace.GLOBAL) + cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", + lp.AddressSpace.GLOBAL) + cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", + lp.AddressSpace.GLOBAL) + + return prg.with_kernel(cfd) - if 1: - with open("gen-code.cl", "w") as outf: - outf.write(lp.generate_code_v2(prg).device_code()) - prg = lp.set_options(prg, no_numpy=True) - return prg -- GitLab From 23da6d6d0ca1ad89d6cc46ddce72fad0546a4bbb Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 13:55:57 -0500 Subject: [PATCH 63/80] use empty_array utility for benchmark code, refactor it to have same interface as random_array utility --- benchmark.py | 3 +-- test.py | 10 +++++----- utilities.py | 4 ++-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/benchmark.py b/benchmark.py index f65cd58..df06f97 100644 --- a/benchmark.py +++ b/benchmark.py @@ -39,8 +39,7 @@ def benchmark_compute_flux_derivatives_gpu(ctx_factory): metric_jacobians = random_array_on_device(queue, nx+6, ny+6, nz+6) print("END ARRAY GEN") - flux_derivatives_dev = cl.array.empty(queue, (nvars, ndim, nx+6, ny+6, - nz+6), dtype=np.float32, order="F") + flux_derivatives_dev = empty_array_on_device(queue, nvars, ndim, nx+6, ny+6, nz+6) prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) diff --git a/test.py b/test.py index 7d84ace..a79a1f3 100644 --- a/test.py +++ b/test.py @@ -40,7 +40,7 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): def mat_bounds(self): return self.nvars, self.nvars - def vec_bounds(self): + def vec_bound(self): return self.nvars def setup_roe_params(nvars, ndim, direction): @@ -51,9 +51,9 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): return np.identity(n).astype(np.float32).copy(order="F") def kernel_roe_eigensystem(queue, prg, params, states, metrics_frozen): - R_dev = empty_array_on_device(queue, params.mat_bounds()) - Rinv_dev = empty_array_on_device(queue, params.mat_bounds()) - lam_dev = empty_array_on_device(queue, params.vec_bounds()) + R_dev = empty_array_on_device(queue, *params.mat_bounds()) + Rinv_dev = empty_array_on_device(queue, *params.mat_bounds()) + lam_dev = empty_array_on_device(queue, params.vec_bound()) prg = with_root_kernel(prg, "roe_eigensystem") prg(queue, nvars=params.nvars, ndim=params.ndim, d=params.d, @@ -90,7 +90,7 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): def test_matvec(ctx_factory): def kernel_mult_mat_vec(queue, prg, alpha, a, b): - c_dev = empty_array_on_device(queue, b.shape) + c_dev = empty_array_on_device(queue, *b.shape) prg = with_root_kernel(prg, "mult_mat_vec") prg(queue, a=a, b=b, c=c_dev, alpha=alpha) diff --git a/utilities.py b/utilities.py index 2ada8c7..45b1012 100644 --- a/utilities.py +++ b/utilities.py @@ -15,11 +15,11 @@ def compare_arrays(a, b): def random_array_on_device(queue, *shape): - empty = empty_array_on_device(queue, shape) + empty = empty_array_on_device(queue, *shape) return cl.clrandom.fill_rand(empty) -def empty_array_on_device(queue, shape): +def empty_array_on_device(queue, *shape): return cl.array.empty(queue, shape, dtype=np.float32, order="F") -- GitLab From f925d9be89ed91a6ceb27adb59f78ecf0f3fc4f5 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 14:08:09 -0500 Subject: [PATCH 64/80] refactor out utility we don't need --- test.py | 12 +++++++++++- utilities.py | 13 ------------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/test.py b/test.py index a79a1f3..7283726 100644 --- a/test.py +++ b/test.py @@ -123,7 +123,17 @@ def test_compute_flux_derivatives(ctx_factory): #@pytest.mark.slow def test_compute_flux_derivatives_gpu(ctx_factory): prg = get_weno_program() - prg = transform_compute_flux_derivative_gpu(get_queue(ctx_factory), prg) + prg = transform_weno_for_gpu(prg) + + queue = get_queue(ctx_factory) + + prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) + + if 1: + with open("gen-code.cl", "w") as outf: + outf.write(lp.generate_code_v2(prg).device_code()) + + prg = lp.set_options(prg, no_numpy=True) lp.auto_test_vs_ref(prg, ctx_factory(), parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16)) diff --git a/utilities.py b/utilities.py index 45b1012..333eb4b 100644 --- a/utilities.py +++ b/utilities.py @@ -109,19 +109,6 @@ def with_root_kernel(prg, root_name): return new_prg -def transform_compute_flux_derivative_gpu(queue, prg): - prg = transform_weno_for_gpu(prg) - - prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - - if 1: - with open("gen-code.cl", "w") as outf: - outf.write(lp.generate_code_v2(prg).device_code()) - - prg = lp.set_options(prg, no_numpy=True) - return prg - - def transform_weno_for_gpu(prg): prg = transform_compute_flux_derivative_basic(prg) -- GitLab From f76f38b09496b0f14a3f7b99fcdbc27738fa8a3a Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 14:13:41 -0500 Subject: [PATCH 65/80] add flag for printing compute_flux_derivative kernel --- utilities.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utilities.py b/utilities.py index 333eb4b..f911066 100644 --- a/utilities.py +++ b/utilities.py @@ -109,7 +109,7 @@ def with_root_kernel(prg, root_name): return new_prg -def transform_weno_for_gpu(prg): +def transform_weno_for_gpu(prg, print_kernel=False): prg = transform_compute_flux_derivative_basic(prg) cfd = prg["compute_flux_derivatives"] @@ -138,7 +138,7 @@ def transform_weno_for_gpu(prg): #prg = lp.inline_callable_kernel(prg, "convert_to_generalized") #prg = lp.inline_callable_kernel(prg, "convert_from_generalized") - if 0: + if print_kernel: print(prg["convert_to_generalized_frozen"]) 1/0 -- GitLab From aa931210051ec87ad13cb7d76efa495adf5c6ea5 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 15:29:55 -0500 Subject: [PATCH 66/80] refactor out a utility and move the code to the diff loopy block in WENO.F90 --- WENO.F90 | 14 ++++++++++++++ test.py | 5 +++-- utilities.py | 17 ----------------- 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/WENO.F90 b/WENO.F90 index 995d3b7..6bfad5c 100644 --- a/WENO.F90 +++ b/WENO.F90 @@ -951,6 +951,20 @@ end subroutine ! ! prg = lp.parse_fortran(lp.c_preprocess(SOURCE), FILENAME) ! prg = lp.fix_parameters(prg, ndim=3, nvars=5, _remove=False) +! +! cfd = prg["compute_flux_derivatives"] +! +! cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") +! +! cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", +! lp.AddressSpace.GLOBAL) +! cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", +! lp.AddressSpace.GLOBAL) +! cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", +! lp.AddressSpace.GLOBAL) +! +! prg = prg.with_kernel(cfd) +! ! RESULT = prg ! !$loopy end diff --git a/test.py b/test.py index 7283726..b5db8c7 100644 --- a/test.py +++ b/test.py @@ -114,7 +114,9 @@ def test_matvec(ctx_factory): #@pytest.mark.slow def test_compute_flux_derivatives(ctx_factory): prg = get_weno_program() - prg = transform_compute_flux_derivative_basic(prg) + + queue = get_queue(ctx_factory) + prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) lp.auto_test_vs_ref(prg, ctx_factory(), parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16)) @@ -126,7 +128,6 @@ def test_compute_flux_derivatives_gpu(ctx_factory): prg = transform_weno_for_gpu(prg) queue = get_queue(ctx_factory) - prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) if 1: diff --git a/utilities.py b/utilities.py index f911066..1f21f09 100644 --- a/utilities.py +++ b/utilities.py @@ -110,8 +110,6 @@ def with_root_kernel(prg, root_name): def transform_weno_for_gpu(prg, print_kernel=False): - prg = transform_compute_flux_derivative_basic(prg) - cfd = prg["compute_flux_derivatives"] for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6", "_7"]: @@ -145,18 +143,3 @@ def transform_weno_for_gpu(prg, print_kernel=False): return prg -def transform_compute_flux_derivative_basic(prg): - cfd = prg["compute_flux_derivatives"] - - cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") - - cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", - lp.AddressSpace.GLOBAL) - - return prg.with_kernel(cfd) - - -- GitLab From 21995d88aae4f3c5877e36bdda2b2392fb99588c Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 15:43:16 -0500 Subject: [PATCH 67/80] refactor out a utility to write CL code --- benchmark.py | 22 +++++++++------------- test.py | 10 ++++------ utilities.py | 4 ++++ 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/benchmark.py b/benchmark.py index df06f97..5f48726 100644 --- a/benchmark.py +++ b/benchmark.py @@ -17,13 +17,19 @@ from pyopencl.tools import ( # noqa from utilities import * -def benchmark_compute_flux_derivatives_gpu(ctx_factory): +def benchmark_compute_flux_derivatives_gpu(ctx_factory, write_code=False): logging.basicConfig(level="INFO") prg = get_weno_program() prg = transform_weno_for_gpu(prg) queue = get_queue(ctx_factory) + prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) + prg = lp.set_options(prg, no_numpy=True) + prg = lp.set_options(prg, ignore_boostable_into=True) + #prg = lp.set_options(prg, write_wrapper=True) + #op_map = lp.get_op_map(prg, count_redundant_work=False) + #print(op_map) ndim = 3 nvars = 5 @@ -41,18 +47,8 @@ def benchmark_compute_flux_derivatives_gpu(ctx_factory): flux_derivatives_dev = empty_array_on_device(queue, nvars, ndim, nx+6, ny+6, nz+6) - prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - - if 0: - with open("gen-code.cl", "w") as outf: - outf.write(lp.generate_code_v2(prg).device_code()) - - prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - prg = lp.set_options(prg, ignore_boostable_into=True) - prg = lp.set_options(prg, no_numpy=True) - #prg = lp.set_options(prg, write_wrapper=True) - #op_map = lp.get_op_map(prg, count_redundant_work=False) - #print(op_map) + if write_code: + write_to_cl(prg) allocator = pyopencl.tools.MemoryPool(pyopencl.tools.ImmediateAllocator(queue)) diff --git a/test.py b/test.py index b5db8c7..4d2a764 100644 --- a/test.py +++ b/test.py @@ -123,19 +123,17 @@ def test_compute_flux_derivatives(ctx_factory): #@pytest.mark.slow -def test_compute_flux_derivatives_gpu(ctx_factory): +def test_compute_flux_derivatives_gpu(ctx_factory, write_code=False): prg = get_weno_program() prg = transform_weno_for_gpu(prg) queue = get_queue(ctx_factory) prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - - if 1: - with open("gen-code.cl", "w") as outf: - outf.write(lp.generate_code_v2(prg).device_code()) - prg = lp.set_options(prg, no_numpy=True) + if write_code: + write_to_cl(prg) + lp.auto_test_vs_ref(prg, ctx_factory(), parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16)) diff --git a/utilities.py b/utilities.py index 1f21f09..d68ab87 100644 --- a/utilities.py +++ b/utilities.py @@ -143,3 +143,7 @@ def transform_weno_for_gpu(prg, print_kernel=False): return prg +def write_to_cl(prg, outfilename="gen-code.cl"): + with open(outfilename, "w") as outf: + outf.write(lp.generate_code_v2(prg).device_code()) + -- GitLab From fdbef6e721fd9c7bc264b1d6f96a9d731aa90fd3 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 16:13:15 -0500 Subject: [PATCH 68/80] using one warmup round only for auto_test_vs_ref --- test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test.py b/test.py index 4d2a764..8a59001 100644 --- a/test.py +++ b/test.py @@ -118,7 +118,7 @@ def test_compute_flux_derivatives(ctx_factory): queue = get_queue(ctx_factory) prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - lp.auto_test_vs_ref(prg, ctx_factory(), + lp.auto_test_vs_ref(prg, ctx_factory(), warmup_rounds=1, parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16)) @@ -134,7 +134,7 @@ def test_compute_flux_derivatives_gpu(ctx_factory, write_code=False): if write_code: write_to_cl(prg) - lp.auto_test_vs_ref(prg, ctx_factory(), + lp.auto_test_vs_ref(prg, ctx_factory(), warmup_rounds=1, parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16)) -- GitLab From 28b9a3fe2402426323b732e23cf85d3ab21e19d0 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 17:24:05 -0500 Subject: [PATCH 69/80] refactor out kernel_mult_mat_vec --- test.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/test.py b/test.py index 8a59001..81dff66 100644 --- a/test.py +++ b/test.py @@ -89,29 +89,24 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): def test_matvec(ctx_factory): - def kernel_mult_mat_vec(queue, prg, alpha, a, b): - c_dev = empty_array_on_device(queue, *b.shape) - - prg = with_root_kernel(prg, "mult_mat_vec") - prg(queue, a=a, b=b, c=c_dev, alpha=alpha) - - return c_dev.get() - def random_array(*shape): return np.random.random_sample(shape).astype(np.float32).copy(order="F") - queue = get_queue(ctx_factory) prg = get_weno_program() + queue = get_queue(ctx_factory) a = random_array(10, 10) b = random_array(10) - c = kernel_mult_mat_vec(queue, prg, alpha=1.0, a=a, b=b) + c_dev = empty_array_on_device(queue, *b.shape) + + prg = with_root_kernel(prg, "mult_mat_vec") + prg(queue, alpha=1.0, a=a, b=b, c=c_dev) - compare_arrays(a@b, c) + compare_arrays(a@b, c_dev.get()) -#@pytest.mark.slow +@pytest.mark.slow def test_compute_flux_derivatives(ctx_factory): prg = get_weno_program() @@ -122,7 +117,7 @@ def test_compute_flux_derivatives(ctx_factory): parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16)) -#@pytest.mark.slow +@pytest.mark.slow def test_compute_flux_derivatives_gpu(ctx_factory, write_code=False): prg = get_weno_program() prg = transform_weno_for_gpu(prg) -- GitLab From 633a80a41da443b04a98ab913b6ce89b9ca41731 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 17:34:07 -0500 Subject: [PATCH 70/80] bugfix in random_array_on_device --- test.py | 13 +++++-------- utilities.py | 5 +++-- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/test.py b/test.py index 81dff66..e972734 100644 --- a/test.py +++ b/test.py @@ -89,21 +89,18 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): def test_matvec(ctx_factory): - def random_array(*shape): - return np.random.random_sample(shape).astype(np.float32).copy(order="F") - prg = get_weno_program() queue = get_queue(ctx_factory) - a = random_array(10, 10) - b = random_array(10) + a = random_array_on_device(queue, 10, 10) + b = random_array_on_device(queue, 10) - c_dev = empty_array_on_device(queue, *b.shape) + c = empty_array_on_device(queue, 10) prg = with_root_kernel(prg, "mult_mat_vec") - prg(queue, alpha=1.0, a=a, b=b, c=c_dev) + prg(queue, alpha=1.0, a=a, b=b, c=c) - compare_arrays(a@b, c_dev.get()) + compare_arrays(a.get()@b.get(), c.get()) @pytest.mark.slow diff --git a/utilities.py b/utilities.py index d68ab87..306c28e 100644 --- a/utilities.py +++ b/utilities.py @@ -15,8 +15,9 @@ def compare_arrays(a, b): def random_array_on_device(queue, *shape): - empty = empty_array_on_device(queue, *shape) - return cl.clrandom.fill_rand(empty) + ary = empty_array_on_device(queue, *shape) + cl.clrandom.fill_rand(ary) + return ary def empty_array_on_device(queue, *shape): -- GitLab From 567555e034e29470d7133fbed376290eb23c6c5d Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 21:26:57 -0500 Subject: [PATCH 71/80] memoize calls to with_root_kernel --- test.py | 6 ++---- utilities.py | 22 +++++++++++++++++----- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/test.py b/test.py index e972734..bc9cbe6 100644 --- a/test.py +++ b/test.py @@ -55,7 +55,6 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): Rinv_dev = empty_array_on_device(queue, *params.mat_bounds()) lam_dev = empty_array_on_device(queue, params.vec_bound()) - prg = with_root_kernel(prg, "roe_eigensystem") prg(queue, nvars=params.nvars, ndim=params.ndim, d=params.d, states=states, metrics_frozen=metrics_frozen, R=R_dev, R_inv=Rinv_dev, lambda_roe=lam_dev) @@ -75,7 +74,7 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): compare_arrays(R@temp, dFlux) queue = get_queue(ctx_factory) - prg = get_weno_program() + prg = get_weno_program_with_root_kernel("roe_eigensystem") params = setup_roe_params(nvars=5, ndim=3, direction=direction) states = array_from_string(states_str) @@ -89,7 +88,7 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): def test_matvec(ctx_factory): - prg = get_weno_program() + prg = get_weno_program_with_root_kernel("mult_mat_vec") queue = get_queue(ctx_factory) a = random_array_on_device(queue, 10, 10) @@ -97,7 +96,6 @@ def test_matvec(ctx_factory): c = empty_array_on_device(queue, 10) - prg = with_root_kernel(prg, "mult_mat_vec") prg(queue, alpha=1.0, a=a, b=b, c=c) compare_arrays(a.get()@b.get(), c.get()) diff --git a/utilities.py b/utilities.py index 306c28e..05ee24a 100644 --- a/utilities.py +++ b/utilities.py @@ -63,7 +63,7 @@ def split_map_to_list(string, map_func, splitter): ### Device ### -_QUEUE = [] +_QUEUE = {} def get_queue(ctx_factory): @@ -74,18 +74,30 @@ def get_queue(ctx_factory): def setup_queue(ctx_factory): ctx = ctx_factory() - _QUEUE.append(cl.CommandQueue(ctx)) + _QUEUE[0] = cl.CommandQueue(ctx) ### Program / Kernel ### -_WENO_PRG = [] +_WENO_PRG = {} + + +def get_weno_program_with_root_kernel(knl): + if not knl in _WENO_PRG: + setup_weno_program_with_root_kernel(knl) + return _WENO_PRG[knl] + + +def setup_weno_program_with_root_kernel(knl): + prg = get_weno_program() + prg = with_root_kernel(prg, knl) + _WENO_PRG[knl] = prg def get_weno_program(): if not _WENO_PRG: parse_weno() - return _WENO_PRG[0] + return _WENO_PRG["default"] def parse_weno(): @@ -95,7 +107,7 @@ def parse_weno(): infile_content = infile.read() prg = lp.parse_transformed_fortran(infile_content, filename=fn) - _WENO_PRG.append(prg) + _WENO_PRG["default"] = prg def with_root_kernel(prg, root_name): -- GitLab From 202248d790227adc87debd1e554eb15c53c570af Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Fri, 21 Jun 2019 00:47:35 -0500 Subject: [PATCH 72/80] get rid of some too-complicated code in the Roe test --- test.py | 49 ++++++++++++++++--------------------------------- 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/test.py b/test.py index bc9cbe6..01dec36 100644 --- a/test.py +++ b/test.py @@ -31,36 +31,9 @@ from utilities import * ("2 1,4 1,8 2,12 3,64 11", "12 3,24 3,48 6,75.2 10.6,403.2 37.8", "z") ]) def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): - class RoeParams: - def __init__(self, nvars, ndim, d): - self.nvars = nvars - self.ndim = ndim - self.d = d - - def mat_bounds(self): - return self.nvars, self.nvars - - def vec_bound(self): - return self.nvars - - def setup_roe_params(nvars, ndim, direction): - dirs = {"x" : 1, "y" : 2, "z" : 3} - return RoeParams(nvars, ndim, dirs[direction]) - def identity_matrix(n): return np.identity(n).astype(np.float32).copy(order="F") - def kernel_roe_eigensystem(queue, prg, params, states, metrics_frozen): - R_dev = empty_array_on_device(queue, *params.mat_bounds()) - Rinv_dev = empty_array_on_device(queue, *params.mat_bounds()) - lam_dev = empty_array_on_device(queue, params.vec_bound()) - - prg(queue, nvars=params.nvars, ndim=params.ndim, d=params.d, - states=states, metrics_frozen=metrics_frozen, - R=R_dev, R_inv=Rinv_dev, lambda_roe=lam_dev) - - return R_dev.get(), Rinv_dev.get(), lam_dev.get() - def check_roe_identity(states, R, Rinv): dState = states[:,1] - states[:,0] compare_arrays(R@(Rinv@dState), dState) @@ -73,17 +46,27 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): temp = np.multiply(lam, temp) compare_arrays(R@temp, dFlux) - queue = get_queue(ctx_factory) prg = get_weno_program_with_root_kernel("roe_eigensystem") + queue = get_queue(ctx_factory) - params = setup_roe_params(nvars=5, ndim=3, direction=direction) + dirs = {"x" : 1, "y" : 2, "z" : 3} states = array_from_string(states_str) - metrics_frozen = identity_matrix(params.ndim) - R, Rinv, lam = kernel_roe_eigensystem(queue, prg, params, states, metrics_frozen) + fluxes = array_from_string(fluxes_str) + metrics_frozen = identity_matrix(3) - check_roe_identity(states, R, Rinv) + R_dev = empty_array_on_device(queue, 5, 5) + Rinv_dev = empty_array_on_device(queue, 5, 5) + lam_dev = empty_array_on_device(queue, 5) - fluxes = array_from_string(fluxes_str) + prg(queue, nvars=5, ndim=3, d=dirs[direction], + states=states, metrics_frozen=metrics_frozen, + R=R_dev, R_inv=Rinv_dev, lambda_roe=lam_dev) + + R = R_dev.get() + Rinv = Rinv_dev.get() + lam = lam_dev.get() + + check_roe_identity(states, R, Rinv) check_roe_property(states, fluxes, R, Rinv, lam) -- GitLab From 1f1fdce2772cd03eb11a605aa67676bf9f1d1a9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= Date: Fri, 21 Jun 2019 10:22:59 +0200 Subject: [PATCH 73/80] Introduced vim folds --- utilities.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/utilities.py b/utilities.py index 306c28e..c5d1d4c 100644 --- a/utilities.py +++ b/utilities.py @@ -8,7 +8,7 @@ import loopy as lp # noqa from pytest import approx -### Arrays ### +# {{{ arrays def compare_arrays(a, b): assert a == approx(b) @@ -60,8 +60,9 @@ def array_from_string(string_array): def split_map_to_list(string, map_func, splitter): return list(map(map_func, string.split(splitter))) +# }}} -### Device ### +# {{{ device _QUEUE = [] @@ -76,8 +77,9 @@ def setup_queue(ctx_factory): ctx = ctx_factory() _QUEUE.append(cl.CommandQueue(ctx)) +# }}} -### Program / Kernel ### +# {{{ program / kernel _WENO_PRG = [] @@ -148,3 +150,6 @@ def write_to_cl(prg, outfilename="gen-code.cl"): with open(outfilename, "w") as outf: outf.write(lp.generate_code_v2(prg).device_code()) +# }}} + +# vim: foldmethod=marker \ No newline at end of file -- GitLab From 4c9955448b864c8e06e49d49dac9337cc11b1960 Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Fri, 21 Jun 2019 16:30:33 +0200 Subject: [PATCH 74/80] Remove unneeded pytest imports from benchmark.py --- benchmark.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/benchmark.py b/benchmark.py index 5f48726..5e8345b 100644 --- a/benchmark.py +++ b/benchmark.py @@ -9,11 +9,6 @@ import sys import logging -import pytest -from pyopencl.tools import ( # noqa - pytest_generate_tests_for_pyopencl - as pytest_generate_tests) - from utilities import * -- GitLab From 11241bf325ec9feb5e3d687df70505dce5647450 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Fri, 21 Jun 2019 09:53:08 -0500 Subject: [PATCH 75/80] rename poorly-named function --- benchmark.py | 2 +- test.py | 2 +- utilities.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmark.py b/benchmark.py index 5e8345b..71859c7 100644 --- a/benchmark.py +++ b/benchmark.py @@ -43,7 +43,7 @@ def benchmark_compute_flux_derivatives_gpu(ctx_factory, write_code=False): flux_derivatives_dev = empty_array_on_device(queue, nvars, ndim, nx+6, ny+6, nz+6) if write_code: - write_to_cl(prg) + write_target_device_code(prg) allocator = pyopencl.tools.MemoryPool(pyopencl.tools.ImmediateAllocator(queue)) diff --git a/test.py b/test.py index e972734..0a27274 100644 --- a/test.py +++ b/test.py @@ -124,7 +124,7 @@ def test_compute_flux_derivatives_gpu(ctx_factory, write_code=False): prg = lp.set_options(prg, no_numpy=True) if write_code: - write_to_cl(prg) + write_target_device_code(prg) lp.auto_test_vs_ref(prg, ctx_factory(), warmup_rounds=1, parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16)) diff --git a/utilities.py b/utilities.py index c5d1d4c..f9c66b4 100644 --- a/utilities.py +++ b/utilities.py @@ -146,7 +146,7 @@ def transform_weno_for_gpu(prg, print_kernel=False): return prg -def write_to_cl(prg, outfilename="gen-code.cl"): +def write_target_device_code(prg, outfilename="gen-code.cl"): with open(outfilename, "w") as outf: outf.write(lp.generate_code_v2(prg).device_code()) -- GitLab From ef8380974aaa11cc484ab0b12eb05f5732218364 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Fri, 21 Jun 2019 21:05:36 -0500 Subject: [PATCH 76/80] remove queue memoization --- utilities.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/utilities.py b/utilities.py index f9c66b4..2283432 100644 --- a/utilities.py +++ b/utilities.py @@ -64,18 +64,9 @@ def split_map_to_list(string, map_func, splitter): # {{{ device -_QUEUE = [] - - def get_queue(ctx_factory): - if not _QUEUE: - setup_queue(ctx_factory) - return _QUEUE[0] - - -def setup_queue(ctx_factory): ctx = ctx_factory() - _QUEUE.append(cl.CommandQueue(ctx)) + return cl.CommandQueue(ctx) # }}} @@ -152,4 +143,4 @@ def write_target_device_code(prg, outfilename="gen-code.cl"): # }}} -# vim: foldmethod=marker \ No newline at end of file +# vim: foldmethod=marker -- GitLab From 259d5cf2ffa4ddb638ad4d948e2e72de753110e7 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Fri, 21 Jun 2019 21:46:51 -0500 Subject: [PATCH 77/80] refactor Roe test so input arrays are transposed for easier viewing --- test.py | 35 +++++++++++++++++++---------------- utilities.py | 4 ++++ 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/test.py b/test.py index 27b583b..412090d 100644 --- a/test.py +++ b/test.py @@ -20,15 +20,15 @@ from utilities import * @pytest.mark.xfail @pytest.mark.parametrize("states_str,fluxes_str,direction", [ - ("2 1,4 1,4 1,4 1,20 5.5", "4 1,11.2 2.6,8 1,8 1,46.4 7.1", "x"), - ("2 1,4 1,4 1,4 1,20 5.5", "4 1,8 1,11.2 2.6,8 1,46.4 7.1", "y"), - ("2 1,4 1,4 1,4 1,20 5.5", "4 1,8 1,8 1,11.2 2.6,46.4 7.1", "z"), - ("1 2,-1 -4,-1 -4,-1 -4,5.5 20", "-1 -4,2.6 11.2,1 8,1 8,-7.1 -46.4", "x"), - ("1 2,-1 -4,-1 -4,-1 -4,5.5 20", "-1 -4,1 8,2.6 11.2,1 8,-7.1 -46.4", "y"), - ("1 2,-1 -4,-1 -4,-1 -4,5.5 20", "-1 -4,1 8,1 8,2.6 11.2,-7.1 -46.4", "z"), - ("2 1,4 1,8 2,12 3,64 11", "4 1,11.2 2.6,16 2,24 3,134.4 12.6", "x"), - ("2 1,4 1,8 2,12 3,64 11", "8 2,16 2,35.2 5.6,48 6,268.8 25.2", "y"), - ("2 1,4 1,8 2,12 3,64 11", "12 3,24 3,48 6,75.2 10.6,403.2 37.8", "z") + ("2 4 4 4 20,1 1 1 1 5.5", "4 11.2 8 8 46.4,1 2.6 1 1 7.1", "x"), + ("2 4 4 4 20,1 1 1 1 5.5", "4 8 11.2 8 46.4,1 1 2.6 1 7.1", "y"), + ("2 4 4 4 20,1 1 1 1 5.5", "4 8 8 11.2 46.4,1 1 1 2.6 7.1", "z"), + ("1 -1 -1 -1 5.5,2 -4 -4 -4 20", "-1 2.6 1 1 -7.1,-4 11.2 8 8 -46.4", "x"), + ("1 -1 -1 -1 5.5,2 -4 -4 -4 20", "-1 1 2.6 1 -7.1,-4 8 11.2 8 -46.4", "y"), + ("1 -1 -1 -1 5.5,2 -4 -4 -4 20", "-1 1 1 2.6 -7.1,-4 8 8 11.2 -46.4", "z"), + ("2 4 8 12 64,1 1 2 3 11", "4 11.2 16 24 134.4,1 2.6 2 3 12.6", "x"), + ("2 4 8 12 64,1 1 2 3 11", "8 16 35.2 48 268.8,2 2 5.6 6 25.2", "y"), + ("2 4 8 12 64,1 1 2 3 11", "12 24 48 75.2 403.2,3 3 6 10.6 37.8", "z") ]) def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): def identity_matrix(n): @@ -49,16 +49,19 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): prg = get_weno_program_with_root_kernel("roe_eigensystem") queue = get_queue(ctx_factory) + nvars = 5 + ndim = 3 dirs = {"x" : 1, "y" : 2, "z" : 3} - states = array_from_string(states_str) - fluxes = array_from_string(fluxes_str) - metrics_frozen = identity_matrix(3) - R_dev = empty_array_on_device(queue, 5, 5) - Rinv_dev = empty_array_on_device(queue, 5, 5) - lam_dev = empty_array_on_device(queue, 5) + states = transposed_array_from_string(states_str) + fluxes = transposed_array_from_string(fluxes_str) + metrics_frozen = identity_matrix(ndim) - prg(queue, nvars=5, ndim=3, d=dirs[direction], + R_dev = empty_array_on_device(queue, nvars, nvars) + Rinv_dev = empty_array_on_device(queue, nvars, nvars) + lam_dev = empty_array_on_device(queue, nvars) + + prg(queue, nvars=nvars, ndim=ndim, d=dirs[direction], states=states, metrics_frozen=metrics_frozen, R=R_dev, R_inv=Rinv_dev, lambda_roe=lam_dev) diff --git a/utilities.py b/utilities.py index 171f78e..e4981a3 100644 --- a/utilities.py +++ b/utilities.py @@ -28,6 +28,10 @@ def arrays_from_string(string_arrays): return split_map_to_list(string_arrays, array_from_string, ":") +def transposed_array_from_string(string_array): + return array_from_string(string_array).transpose().copy(order="F") + + def array_from_string(string_array): def array_from_string_1d(string_array): if string_array[0] == "i": -- GitLab From f511e9207ae196d48d9342f5901462a9f0b48f23 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Fri, 21 Jun 2019 22:31:18 -0500 Subject: [PATCH 78/80] bugfix in Roe eigensystem --- WENO.F90 | 6 +++--- test.py | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/WENO.F90 b/WENO.F90 index 6bfad5c..829080e 100644 --- a/WENO.F90 +++ b/WENO.F90 @@ -303,9 +303,9 @@ subroutine roe_eigensystem(nvars, ndim, d, states, metrics_frozen, R, R_inv, lam if (im > 3) im = im - 3 do j=1,2 - u_orig(1,j) = states(ik+1,j)/states(1,j) - u_orig(2,j) = states(il+1,j)/states(1,j) - u_orig(3,j) = states(im+1,j)/states(1,j) + do i=1,ndim + u_orig(i,j) = states(i+1,j)/states(1,j) + end do end do do j=1,2 diff --git a/test.py b/test.py index 412090d..741a691 100644 --- a/test.py +++ b/test.py @@ -18,7 +18,6 @@ from pyopencl.tools import ( # noqa from utilities import * -@pytest.mark.xfail @pytest.mark.parametrize("states_str,fluxes_str,direction", [ ("2 4 4 4 20,1 1 1 1 5.5", "4 11.2 8 8 46.4,1 2.6 1 1 7.1", "x"), ("2 4 4 4 20,1 1 1 1 5.5", "4 8 11.2 8 46.4,1 1 2.6 1 7.1", "y"), @@ -69,6 +68,8 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): Rinv = Rinv_dev.get() lam = lam_dev.get() + print(lam) + check_roe_identity(states, R, Rinv) check_roe_property(states, fluxes, R, Rinv, lam) -- GitLab From 97654c94b5b7f7e17a78d8e61ac5a5ba16c80d12 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 24 Jun 2019 09:53:50 -0500 Subject: [PATCH 79/80] added new test cases for Roe test --- test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test.py b/test.py index 741a691..97ccfe7 100644 --- a/test.py +++ b/test.py @@ -27,7 +27,10 @@ from utilities import * ("1 -1 -1 -1 5.5,2 -4 -4 -4 20", "-1 1 1 2.6 -7.1,-4 8 8 11.2 -46.4", "z"), ("2 4 8 12 64,1 1 2 3 11", "4 11.2 16 24 134.4,1 2.6 2 3 12.6", "x"), ("2 4 8 12 64,1 1 2 3 11", "8 16 35.2 48 268.8,2 2 5.6 6 25.2", "y"), - ("2 4 8 12 64,1 1 2 3 11", "12 24 48 75.2 403.2,3 3 6 10.6 37.8", "z") + ("2 4 8 12 64,1 1 2 3 11", "12 24 48 75.2 403.2,3 3 6 10.6 37.8", "z"), + ("1 -1 -2 -3 11,2 -4 -8 -12 64", "-1 2.6 2 3 -12.6,-4 11.2 16 24 -134.4", "x"), + ("1 -1 -2 -3 11,2 -4 -8 -12 64", "-2 2 5.6 6 -25.2,-8 16 35.2 48 -268.8", "y"), + ("1 -1 -2 -3 11,2 -4 -8 -12 64", "-3 3 6 10.6 -37.8,-12 24 48 75.2 -403.2", "z") ]) def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): def identity_matrix(n): -- GitLab From 26e26a532d1016bbab56495fcf1a4fb8672a9ffe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= Date: Tue, 25 Jun 2019 20:51:10 +0200 Subject: [PATCH 80/80] Reorder so that callers follow callees --- utilities.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/utilities.py b/utilities.py index e4981a3..b825fcb 100644 --- a/utilities.py +++ b/utilities.py @@ -79,24 +79,6 @@ def get_queue(ctx_factory): _WENO_PRG = {} -def get_weno_program_with_root_kernel(knl): - if not knl in _WENO_PRG: - setup_weno_program_with_root_kernel(knl) - return _WENO_PRG[knl] - - -def setup_weno_program_with_root_kernel(knl): - prg = get_weno_program() - prg = with_root_kernel(prg, knl) - _WENO_PRG[knl] = prg - - -def get_weno_program(): - if not _WENO_PRG: - parse_weno() - return _WENO_PRG["default"] - - def parse_weno(): fn = "WENO.F90" @@ -107,6 +89,12 @@ def parse_weno(): _WENO_PRG["default"] = prg +def get_weno_program(): + if not _WENO_PRG: + parse_weno() + return _WENO_PRG["default"] + + def with_root_kernel(prg, root_name): # FIXME This is a little less beautiful than it could be new_prg = prg.copy(name=root_name) @@ -119,6 +107,18 @@ def with_root_kernel(prg, root_name): return new_prg +def setup_weno_program_with_root_kernel(knl): + prg = get_weno_program() + prg = with_root_kernel(prg, knl) + _WENO_PRG[knl] = prg + + +def get_weno_program_with_root_kernel(knl): + if not knl in _WENO_PRG: + setup_weno_program_with_root_kernel(knl) + return _WENO_PRG[knl] + + def transform_weno_for_gpu(prg, print_kernel=False): cfd = prg["compute_flux_derivatives"] -- GitLab