From f3a2d6b2a0787465f56ac97cdccc802edfd4905f Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 17 Jun 2019 10:59:11 -0500 Subject: [PATCH 01/21] run all tests, even slow ones --- test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test.py b/test.py index 924171f..e97a345 100644 --- a/test.py +++ b/test.py @@ -53,7 +53,7 @@ def test_matvec(ctx_factory): compare.arrays(a@b, c) -@pytest.mark.slow +#@pytest.mark.slow def test_compute_flux_derivatives(ctx_factory): queue = device.get_queue(ctx_factory) prg = program.get_weno() @@ -65,7 +65,7 @@ def test_compute_flux_derivatives(ctx_factory): kernel.compute_flux_derivatives(queue, prg, params, arrays) -@pytest.mark.slow +#@pytest.mark.slow def test_compute_flux_derivatives_gpu(ctx_factory): queue = device.get_queue(ctx_factory) prg = program.get_weno() -- GitLab From 0b99456599c7673f20182efe728de57d40afc05e Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 17 Jun 2019 11:03:12 -0500 Subject: [PATCH 02/21] move comparison fixtures to main test file --- comparison_fixtures.py | 20 -------------------- test.py | 35 +++++++++++++++++++++++++++++++---- 2 files changed, 31 insertions(+), 24 deletions(-) delete mode 100644 comparison_fixtures.py diff --git a/comparison_fixtures.py b/comparison_fixtures.py deleted file mode 100644 index 04c7432..0000000 --- a/comparison_fixtures.py +++ /dev/null @@ -1,20 +0,0 @@ -import numpy as np -from pytest import approx - - -def arrays(a, b): - assert a == approx(b) - - -def roe_identity(states, R, Rinv): - dState = states[:,1] - states[:,0] - arrays(R@(Rinv@dState), dState) - - -def roe_property(states, fluxes, R, Rinv, lam): - dState = states[:,1] - states[:,0] - dFlux = fluxes[:,1] - fluxes[:,0] - - temp = Rinv@dState - temp = np.multiply(lam, temp) - arrays(R@temp, dFlux) diff --git a/test.py b/test.py index e97a345..d48f18a 100644 --- a/test.py +++ b/test.py @@ -1,7 +1,16 @@ +import numpy as np +import numpy.linalg as la +import pyopencl as cl +import pyopencl.array # noqa +import pyopencl.tools # noqa +import pyopencl.clrandom # noqa +import loopy as lp # noqa + import sys import logging import pytest +from pytest import approx import pyopencl as cl from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl @@ -12,7 +21,25 @@ import program_fixtures as program import transform_fixtures as transform import setup_fixtures as setup import kernel_fixtures as kernel -import comparison_fixtures as compare + + +def compare_arrays(a, b): + assert a == approx(b) + + +def compare_roe_identity(states, R, Rinv): + dState = states[:,1] - states[:,0] + compare_arrays(R@(Rinv@dState), dState) + + +def compare_roe_property(states, fluxes, R, Rinv, lam): + dState = states[:,1] - states[:,0] + dFlux = fluxes[:,1] - fluxes[:,0] + + temp = Rinv@dState + temp = np.multiply(lam, temp) + compare_arrays(R@temp, dFlux) + @pytest.mark.xfail @pytest.mark.parametrize("states_str,fluxes_str,direction", [ @@ -35,10 +62,10 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): metrics_frozen = setup.identity(params.ndim) R, Rinv, lam = kernel.roe_eigensystem(queue, prg, params, states, metrics_frozen) - compare.roe_identity(states, R, Rinv) + compare_roe_identity(states, R, Rinv) fluxes = setup.array_from_string(fluxes_str) - compare.roe_property(states, fluxes, R, Rinv, lam) + compare_roe_property(states, fluxes, R, Rinv, lam) def test_matvec(ctx_factory): @@ -50,7 +77,7 @@ def test_matvec(ctx_factory): c = kernel.mult_mat_vec(queue, prg, alpha=1.0, a=a, b=b) - compare.arrays(a@b, c) + compare_arrays(a@b, c) #@pytest.mark.slow -- GitLab From 440ecc0f045eb2fa94091e0ff805000d858386f4 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 17 Jun 2019 12:05:08 -0500 Subject: [PATCH 03/21] put transformation fixtures in test.py --- test.py | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/test.py b/test.py index d48f18a..6b42fdd 100644 --- a/test.py +++ b/test.py @@ -18,7 +18,6 @@ from pyopencl.tools import ( # noqa import device_fixtures as device import program_fixtures as program -import transform_fixtures as transform import setup_fixtures as setup import kernel_fixtures as kernel @@ -41,6 +40,70 @@ def compare_roe_property(states, fluxes, R, Rinv, lam): compare_arrays(R@temp, dFlux) +def transform_compute_flux_derivative_basic(prg): + cfd = prg["compute_flux_derivatives"] + + cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") + + cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", + lp.AddressSpace.GLOBAL) + cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", + lp.AddressSpace.GLOBAL) + cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", + lp.AddressSpace.GLOBAL) + + return prg.with_kernel(cfd) + + +def transform_weno_for_gpu(prg): + prg = transform_compute_flux_derivative_basic(prg) + + cfd = prg["compute_flux_derivatives"] + + for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6", "_7"]: + cfd = lp.split_iname(cfd, "i"+suffix, 16, + outer_tag="g.0", inner_tag="l.0") + cfd = lp.split_iname(cfd, "j"+suffix, 16, + outer_tag="g.1", inner_tag="l.1") + + for var_name in ["delta_xi", "delta_eta", "delta_zeta"]: + cfd = lp.assignment_to_subst(cfd, var_name) + + cfd = lp.add_barrier(cfd, "tag:to_generalized", "tag:flux_x_compute") + cfd = lp.add_barrier(cfd, "tag:flux_x_compute", "tag:flux_x_diff") + cfd = lp.add_barrier(cfd, "tag:flux_x_diff", "tag:flux_y_compute") + cfd = lp.add_barrier(cfd, "tag:flux_y_compute", "tag:flux_y_diff") + cfd = lp.add_barrier(cfd, "tag:flux_y_diff", "tag:flux_z_compute") + cfd = lp.add_barrier(cfd, "tag:flux_z_compute", "tag:flux_z_diff") + cfd = lp.add_barrier(cfd, "tag:flux_z_diff", "tag:from_generalized") + + prg = prg.with_kernel(cfd) + + # FIXME: These should work, but don't + # FIXME: Undo the hand-inlining in WENO.F90 + #prg = lp.inline_callable_kernel(prg, "convert_to_generalized") + #prg = lp.inline_callable_kernel(prg, "convert_from_generalized") + + if 0: + print(prg["convert_to_generalized_frozen"]) + 1/0 + + return prg + + +def transform_compute_flux_derivative_gpu(queue, prg): + prg = transform_weno_for_gpu(prg) + + prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) + + if 1: + with open("gen-code.cl", "w") as outf: + outf.write(lp.generate_code_v2(prg).device_code()) + + prg = lp.set_options(prg, no_numpy=True) + return prg + + @pytest.mark.xfail @pytest.mark.parametrize("states_str,fluxes_str,direction", [ ("2 1,4 1,4 1,4 1,20 5.5", "4 1,11.2 2.6,8 1,8 1,46.4 7.1", "x"), @@ -84,7 +147,7 @@ def test_matvec(ctx_factory): def test_compute_flux_derivatives(ctx_factory): queue = device.get_queue(ctx_factory) prg = program.get_weno() - prg = transform.compute_flux_derivative_basic(prg) + prg = transform_compute_flux_derivative_basic(prg) params = setup.flux_derivative_params(ndim=3, nvars=5, n=10) arrays = setup.random_flux_derivative_arrays(params) @@ -96,7 +159,7 @@ def test_compute_flux_derivatives(ctx_factory): def test_compute_flux_derivatives_gpu(ctx_factory): queue = device.get_queue(ctx_factory) prg = program.get_weno() - prg = transform.compute_flux_derivative_gpu(queue, prg) + prg = transform_compute_flux_derivative_gpu(queue, prg) params = setup.flux_derivative_params(ndim=3, nvars=5, n=10) arrays = setup.random_flux_derivative_arrays_on_device(ctx_factory, params) -- GitLab From a4df56a052e0f5d01cf29844d4d9f8538096363f Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 17 Jun 2019 12:11:21 -0500 Subject: [PATCH 04/21] move program/device fixtures inside test.py --- test.py | 51 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/test.py b/test.py index 6b42fdd..3db55b6 100644 --- a/test.py +++ b/test.py @@ -16,12 +16,43 @@ from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) -import device_fixtures as device -import program_fixtures as program import setup_fixtures as setup import kernel_fixtures as kernel +_QUEUE = [] + + +def get_queue(ctx_factory): + if not _QUEUE: + setup_queue(ctx_factory) + return _QUEUE[0] + + +def setup_queue(ctx_factory): + ctx = ctx_factory() + _QUEUE.append(cl.CommandQueue(ctx)) + + +_WENO_PRG = [] + + +def parse_weno(): + fn = "WENO.F90" + + with open(fn, "r") as infile: + infile_content = infile.read() + + prg = lp.parse_transformed_fortran(infile_content, filename=fn) + _WENO_PRG.append(prg) + + +def get_weno_program(): + if not _WENO_PRG: + parse_weno() + return _WENO_PRG[0] + + def compare_arrays(a, b): assert a == approx(b) @@ -117,8 +148,8 @@ def transform_compute_flux_derivative_gpu(queue, prg): ("2 1,4 1,8 2,12 3,64 11", "12 3,24 3,48 6,75.2 10.6,403.2 37.8", "z") ]) def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): - queue = device.get_queue(ctx_factory) - prg = program.get_weno() + queue = get_queue(ctx_factory) + prg = get_weno_program() params = setup.roe_params(nvars=5, ndim=3, direction=direction) states = setup.array_from_string(states_str) @@ -132,8 +163,8 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): def test_matvec(ctx_factory): - queue = device.get_queue(ctx_factory) - prg = program.get_weno() + queue = get_queue(ctx_factory) + prg = get_weno_program() a = setup.random_array(10, 10) b = setup.random_array(10) @@ -145,8 +176,8 @@ def test_matvec(ctx_factory): #@pytest.mark.slow def test_compute_flux_derivatives(ctx_factory): - queue = device.get_queue(ctx_factory) - prg = program.get_weno() + queue = get_queue(ctx_factory) + prg = get_weno_program() prg = transform_compute_flux_derivative_basic(prg) params = setup.flux_derivative_params(ndim=3, nvars=5, n=10) @@ -157,8 +188,8 @@ def test_compute_flux_derivatives(ctx_factory): #@pytest.mark.slow def test_compute_flux_derivatives_gpu(ctx_factory): - queue = device.get_queue(ctx_factory) - prg = program.get_weno() + queue = get_queue(ctx_factory) + prg = get_weno_program() prg = transform_compute_flux_derivative_gpu(queue, prg) params = setup.flux_derivative_params(ndim=3, nvars=5, n=10) -- GitLab From d5afdfbb66d8ceb87b8917d316715d61dc6db5a8 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 17 Jun 2019 12:26:51 -0500 Subject: [PATCH 05/21] put setup fixtures in test.py --- test.py | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 143 insertions(+), 11 deletions(-) diff --git a/test.py b/test.py index 3db55b6..dbc6261 100644 --- a/test.py +++ b/test.py @@ -16,7 +16,6 @@ from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) -import setup_fixtures as setup import kernel_fixtures as kernel @@ -53,6 +52,139 @@ def get_weno_program(): return _WENO_PRG[0] +class RoeParams: + def __init__(self, nvars, ndim, d): + self.nvars = nvars + self.ndim = ndim + self.d = d + + def mat_bounds(self): + return self.nvars, self.nvars + + def vec_bounds(self): + return self.nvars + + +class FluxDerivativeParams: + def __init__(self, nvars, ndim, nx, ny, nz): + self.nvars = nvars + self.ndim = ndim + + self.nx = nx + self.ny = ny + self.nz = nz + + self.nhalo = 3 + self.nx_halo = self.nx + 2*self.nhalo + self.ny_halo = self.ny + 2*self.nhalo + self.nz_halo = self.nz + 2*self.nhalo + + def state_bounds(self): + return self.nvars, self.nx_halo, self.ny_halo, self.nz_halo + + def flux_bounds(self): + return self.nvars, self.ndim, self.nx_halo, self.ny_halo, self.nz_halo + + def metric_bounds(self): + return self.ndim, self.ndim, self.nx_halo, self.ny_halo, self.nz_halo + + def jacobian_bounds(self): + return self.nx_halo, self.ny_halo, self.nz_halo + + +class FluxDerivativeArrays: + def __init__(self, states, fluxes, metrics, metric_jacobians): + self.states = states + self.fluxes = fluxes + self.metrics = metrics + self.metric_jacobians = metric_jacobians + + +def setup_roe_params(nvars, ndim, direction): + dirs = {"x" : 1, "y" : 2, "z" : 3} + return RoeParams(nvars, ndim, dirs[direction]) + + +def setup_flux_derivative_params(nvars, ndim, n): + return FluxDerivativeParams(nvars, ndim, n, n, n) + + +def setup_empty_array_on_device(queue, shape): + return cl.array.empty(queue, shape, dtype=np.float32, order="F") + + +def setup_identity(n): + return np.identity(n).astype(np.float32).copy(order="F") + + +def setup_random_array(*shape): + return np.random.random_sample(shape).astype(np.float32).copy(order="F") + + +def setup_random_array_on_device(queue, *shape): + return cl.array.to_device(queue, setup_random_array(*shape)) + + +def setup_random_flux_derivative_arrays(params): + states = setup_random_array(*params.state_bounds()) + fluxes = setup_random_array(*params.flux_bounds()) + metrics = setup_random_array(*params.metric_bounds()) + metric_jacobians = setup_random_array(*params.jacobian_bounds()) + + return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) + + +def setup_random_flux_derivative_arrays_on_device(ctx_factory, params): + queue = get_queue(ctx_factory) + + states = setup_random_array_on_device(queue, *params.state_bounds()) + fluxes = setup_random_array_on_device(queue, *params.flux_bounds()) + metrics = setup_random_array_on_device(queue, *params.metric_bounds()) + metric_jacobians = setup_random_array_on_device(queue, *params.jacobian_bounds()) + + return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) + + +def arrays_from_string(string_arrays): + return split_map_to_list(string_arrays, array_from_string, ":") + + +def array_from_string(string_array): + if ";" not in string_array: + if "," not in string_array: + array = array_from_string_1d(string_array) + else: + array = array_from_string_2d(string_array) + else: + array = array_from_string_3d(string_array) + return array.copy(order="F") + + +def array_from_string_3d(string_array): + if string_array[0] == ";": + return array_from_string_1d(string_array[1:]).reshape((-1, 1, 1)) + else: + return np.array(split_map_to_list(string_array, array_from_string_2d, ";")) + + +def array_from_string_2d(string_array): + if string_array[0] == ",": + return array_from_string_1d(string_array[1:]).reshape((-1, 1)) + else: + return np.array(split_map_to_list(string_array, array_from_string_1d, ",")) + + +def array_from_string_1d(string_array): + if string_array[0] == "i": + return np.array(split_map_to_list(string_array[1:], int, " ")) + else: + return np.array(split_map_to_list(string_array, float, " "), dtype=np.float32) + + +def split_map_to_list(string, map_func, splitter): + return list(map(map_func, string.split(splitter))) + + def compare_arrays(a, b): assert a == approx(b) @@ -151,14 +283,14 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): queue = get_queue(ctx_factory) prg = get_weno_program() - params = setup.roe_params(nvars=5, ndim=3, direction=direction) - states = setup.array_from_string(states_str) - metrics_frozen = setup.identity(params.ndim) + params = setup_roe_params(nvars=5, ndim=3, direction=direction) + states = array_from_string(states_str) + metrics_frozen = setup_identity(params.ndim) R, Rinv, lam = kernel.roe_eigensystem(queue, prg, params, states, metrics_frozen) compare_roe_identity(states, R, Rinv) - fluxes = setup.array_from_string(fluxes_str) + fluxes = array_from_string(fluxes_str) compare_roe_property(states, fluxes, R, Rinv, lam) @@ -166,8 +298,8 @@ def test_matvec(ctx_factory): queue = get_queue(ctx_factory) prg = get_weno_program() - a = setup.random_array(10, 10) - b = setup.random_array(10) + a = setup_random_array(10, 10) + b = setup_random_array(10) c = kernel.mult_mat_vec(queue, prg, alpha=1.0, a=a, b=b) @@ -180,8 +312,8 @@ def test_compute_flux_derivatives(ctx_factory): prg = get_weno_program() prg = transform_compute_flux_derivative_basic(prg) - params = setup.flux_derivative_params(ndim=3, nvars=5, n=10) - arrays = setup.random_flux_derivative_arrays(params) + params = setup_flux_derivative_params(ndim=3, nvars=5, n=10) + arrays = setup_random_flux_derivative_arrays(params) kernel.compute_flux_derivatives(queue, prg, params, arrays) @@ -192,8 +324,8 @@ def test_compute_flux_derivatives_gpu(ctx_factory): prg = get_weno_program() prg = transform_compute_flux_derivative_gpu(queue, prg) - params = setup.flux_derivative_params(ndim=3, nvars=5, n=10) - arrays = setup.random_flux_derivative_arrays_on_device(ctx_factory, params) + params = setup_flux_derivative_params(ndim=3, nvars=5, n=10) + arrays = setup_random_flux_derivative_arrays_on_device(ctx_factory, params) kernel.compute_flux_derivatives(queue, prg, params, arrays) -- GitLab From 2797f50236c512939a604175a6e7572ec1303f72 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Mon, 17 Jun 2019 12:33:37 -0500 Subject: [PATCH 06/21] move kernel fixtures into test.py --- test.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/test.py b/test.py index dbc6261..17e7f8f 100644 --- a/test.py +++ b/test.py @@ -16,8 +16,6 @@ from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) -import kernel_fixtures as kernel - _QUEUE = [] @@ -185,6 +183,52 @@ def split_map_to_list(string, map_func, splitter): return list(map(map_func, string.split(splitter))) +def with_root_kernel(prg, root_name): + # FIXME This is a little less beautiful than it could be + new_prg = prg.copy(name=root_name) + for name in prg: + clbl = new_prg[name] + if isinstance(clbl, lp.LoopKernel) and clbl.is_called_from_host: + new_prg = new_prg.with_kernel(clbl.copy(is_called_from_host=False)) + + new_prg = new_prg.with_kernel(prg[root_name].copy(is_called_from_host=True)) + return new_prg + + +def kernel_roe_eigensystem(queue, prg, params, states, metrics_frozen): + R_dev = setup_empty_array_on_device(queue, params.mat_bounds()) + Rinv_dev = setup_empty_array_on_device(queue, params.mat_bounds()) + lam_dev = setup_empty_array_on_device(queue, params.vec_bounds()) + + prg = with_root_kernel(prg, "roe_eigensystem") + prg(queue, nvars=params.nvars, ndim=params.ndim, d=params.d, + states=states, metrics_frozen=metrics_frozen, + R=R_dev, R_inv=Rinv_dev, lambda_roe=lam_dev) + + return R_dev.get(), Rinv_dev.get(), lam_dev.get() + + +def kernel_mult_mat_vec(queue, prg, alpha, a, b): + c_dev = setup_empty_array_on_device(queue, b.shape) + + prg = with_root_kernel(prg, "mult_mat_vec") + prg(queue, a=a, b=b, c=c_dev, alpha=alpha) + + return c_dev.get() + + +def kernel_compute_flux_derivatives(queue, prg, params, arrays): + flux_derivatives_dev = setup_empty_array_on_device(queue, (params.nvars, params.ndim, + params.nx_halo, params.ny_halo, params.nz_halo)) + + prg(queue, nvars=params.nvars, ndim=params.ndim, + states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, + metric_jacobians=arrays.metric_jacobians, + flux_derivatives=flux_derivatives_dev) + + return flux_derivatives_dev.get() + + def compare_arrays(a, b): assert a == approx(b) @@ -286,7 +330,7 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): params = setup_roe_params(nvars=5, ndim=3, direction=direction) states = array_from_string(states_str) metrics_frozen = setup_identity(params.ndim) - R, Rinv, lam = kernel.roe_eigensystem(queue, prg, params, states, metrics_frozen) + R, Rinv, lam = kernel_roe_eigensystem(queue, prg, params, states, metrics_frozen) compare_roe_identity(states, R, Rinv) @@ -301,7 +345,7 @@ def test_matvec(ctx_factory): a = setup_random_array(10, 10) b = setup_random_array(10) - c = kernel.mult_mat_vec(queue, prg, alpha=1.0, a=a, b=b) + c = kernel_mult_mat_vec(queue, prg, alpha=1.0, a=a, b=b) compare_arrays(a@b, c) @@ -315,7 +359,7 @@ def test_compute_flux_derivatives(ctx_factory): params = setup_flux_derivative_params(ndim=3, nvars=5, n=10) arrays = setup_random_flux_derivative_arrays(params) - kernel.compute_flux_derivatives(queue, prg, params, arrays) + kernel_compute_flux_derivatives(queue, prg, params, arrays) #@pytest.mark.slow @@ -327,7 +371,7 @@ def test_compute_flux_derivatives_gpu(ctx_factory): params = setup_flux_derivative_params(ndim=3, nvars=5, n=10) arrays = setup_random_flux_derivative_arrays_on_device(ctx_factory, params) - kernel.compute_flux_derivatives(queue, prg, params, arrays) + kernel_compute_flux_derivatives(queue, prg, params, arrays) # This lets you run 'python test.py test_case(cl._csc)' without pytest. -- GitLab From 59267775b2d303af5f6882704a511d7cec770e64 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Wed, 19 Jun 2019 21:34:20 -0500 Subject: [PATCH 07/21] create utilities.py for all utilities, remove fixture files, update benchmark script to use utilities --- benchmark.py | 28 ++++-- device_fixtures.py | 15 --- kernel_fixtures.py | 49 --------- program_fixtures.py | 20 ---- setup_fixtures.py | 138 -------------------------- test.py | 98 +----------------- transform_fixtures.py => utilities.py | 51 ++++++++-- 7 files changed, 63 insertions(+), 336 deletions(-) delete mode 100644 device_fixtures.py delete mode 100644 kernel_fixtures.py delete mode 100644 program_fixtures.py delete mode 100644 setup_fixtures.py rename transform_fixtures.py => utilities.py (67%) diff --git a/benchmark.py b/benchmark.py index 00034a7..444b689 100644 --- a/benchmark.py +++ b/benchmark.py @@ -14,18 +14,24 @@ from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) -import device_fixtures as device -import program_fixtures as program -import transform_fixtures as transform -import setup_fixtures as setup +from utilities import * + + +def setup_random_array(*shape): + return np.random.random_sample(shape).astype(np.float32).copy(order="F") + + +def setup_random_array_on_device(queue, *shape): + return cl.array.to_device(queue, setup_random_array(*shape)) + def benchmark_compute_flux_derivatives_gpu(ctx_factory): logging.basicConfig(level="INFO") - prg = program.get_weno() - prg = transform.weno_for_gpu(prg) + prg = get_weno_program() + prg = transform_weno_for_gpu(prg) - queue = device.get_queue(ctx_factory) + queue = get_queue(ctx_factory) ndim = 3 nvars = 5 @@ -35,10 +41,10 @@ def benchmark_compute_flux_derivatives_gpu(ctx_factory): nz = n print("ARRAY GEN") - states = setup.random_array_on_device(queue, nvars, nx+6, ny+6, nz+6) - fluxes = setup.random_array_on_device(queue, nvars, ndim, nx+6, ny+6, nz+6) - metrics = setup.random_array_on_device(queue, ndim, ndim, nx+6, ny+6, nz+6) - metric_jacobians = setup.random_array_on_device(queue, nx+6, ny+6, nz+6) + states = setup_random_array_on_device(queue, nvars, nx+6, ny+6, nz+6) + fluxes = setup_random_array_on_device(queue, nvars, ndim, nx+6, ny+6, nz+6) + metrics = setup_random_array_on_device(queue, ndim, ndim, nx+6, ny+6, nz+6) + metric_jacobians = setup_random_array_on_device(queue, nx+6, ny+6, nz+6) print("END ARRAY GEN") flux_derivatives_dev = cl.array.empty(queue, (nvars, ndim, nx+6, ny+6, diff --git a/device_fixtures.py b/device_fixtures.py deleted file mode 100644 index d0dbc59..0000000 --- a/device_fixtures.py +++ /dev/null @@ -1,15 +0,0 @@ -import pyopencl as cl - - -_QUEUE = [] - - -def get_queue(ctx_factory): - if not _QUEUE: - setup_queue(ctx_factory) - return _QUEUE[0] - - -def setup_queue(ctx_factory): - ctx = ctx_factory() - _QUEUE.append(cl.CommandQueue(ctx)) diff --git a/kernel_fixtures.py b/kernel_fixtures.py deleted file mode 100644 index 7f3dff4..0000000 --- a/kernel_fixtures.py +++ /dev/null @@ -1,49 +0,0 @@ -import loopy as lp # noqa - -import setup_fixtures as setup - - -def with_root_kernel(prg, root_name): - # FIXME This is a little less beautiful than it could be - new_prg = prg.copy(name=root_name) - for name in prg: - clbl = new_prg[name] - if isinstance(clbl, lp.LoopKernel) and clbl.is_called_from_host: - new_prg = new_prg.with_kernel(clbl.copy(is_called_from_host=False)) - - new_prg = new_prg.with_kernel(prg[root_name].copy(is_called_from_host=True)) - return new_prg - - -def roe_eigensystem(queue, prg, params, states, metrics_frozen): - R_dev = setup.empty_array_on_device(queue, params.mat_bounds()) - Rinv_dev = setup.empty_array_on_device(queue, params.mat_bounds()) - lam_dev = setup.empty_array_on_device(queue, params.vec_bounds()) - - prg = with_root_kernel(prg, "roe_eigensystem") - prg(queue, nvars=params.nvars, ndim=params.ndim, d=params.d, - states=states, metrics_frozen=metrics_frozen, - R=R_dev, R_inv=Rinv_dev, lambda_roe=lam_dev) - - return R_dev.get(), Rinv_dev.get(), lam_dev.get() - - -def mult_mat_vec(queue, prg, alpha, a, b): - c_dev = setup.empty_array_on_device(queue, b.shape) - - prg = with_root_kernel(prg, "mult_mat_vec") - prg(queue, a=a, b=b, c=c_dev, alpha=alpha) - - return c_dev.get() - - -def compute_flux_derivatives(queue, prg, params, arrays): - flux_derivatives_dev = setup.empty_array_on_device(queue, (params.nvars, params.ndim, - params.nx_halo, params.ny_halo, params.nz_halo)) - - prg(queue, nvars=params.nvars, ndim=params.ndim, - states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, - metric_jacobians=arrays.metric_jacobians, - flux_derivatives=flux_derivatives_dev) - - return flux_derivatives_dev.get() diff --git a/program_fixtures.py b/program_fixtures.py deleted file mode 100644 index 0f50ff1..0000000 --- a/program_fixtures.py +++ /dev/null @@ -1,20 +0,0 @@ -import loopy as lp - - -_WENO_PRG = [] - - -def parse_weno(): - fn = "WENO.F90" - - with open(fn, "r") as infile: - infile_content = infile.read() - - prg = lp.parse_transformed_fortran(infile_content, filename=fn) - _WENO_PRG.append(prg) - - -def get_weno(): - if not _WENO_PRG: - parse_weno() - return _WENO_PRG[0] diff --git a/setup_fixtures.py b/setup_fixtures.py deleted file mode 100644 index 6f1debc..0000000 --- a/setup_fixtures.py +++ /dev/null @@ -1,138 +0,0 @@ -import numpy as np -import pyopencl as cl -import pyopencl.array # noqa - -import device_fixtures as device - - -class RoeParams: - def __init__(self, nvars, ndim, d): - self.nvars = nvars - self.ndim = ndim - self.d = d - - def mat_bounds(self): - return self.nvars, self.nvars - - def vec_bounds(self): - return self.nvars - - -class FluxDerivativeParams: - def __init__(self, nvars, ndim, nx, ny, nz): - self.nvars = nvars - self.ndim = ndim - - self.nx = nx - self.ny = ny - self.nz = nz - - self.nhalo = 3 - self.nx_halo = self.nx + 2*self.nhalo - self.ny_halo = self.ny + 2*self.nhalo - self.nz_halo = self.nz + 2*self.nhalo - - def state_bounds(self): - return self.nvars, self.nx_halo, self.ny_halo, self.nz_halo - - def flux_bounds(self): - return self.nvars, self.ndim, self.nx_halo, self.ny_halo, self.nz_halo - - def metric_bounds(self): - return self.ndim, self.ndim, self.nx_halo, self.ny_halo, self.nz_halo - - def jacobian_bounds(self): - return self.nx_halo, self.ny_halo, self.nz_halo - - -class FluxDerivativeArrays: - def __init__(self, states, fluxes, metrics, metric_jacobians): - self.states = states - self.fluxes = fluxes - self.metrics = metrics - self.metric_jacobians = metric_jacobians - - -def roe_params(nvars, ndim, direction): - dirs = {"x" : 1, "y" : 2, "z" : 3} - return RoeParams(nvars, ndim, dirs[direction]) - - -def flux_derivative_params(nvars, ndim, n): - return FluxDerivativeParams(nvars, ndim, n, n, n) - - -def empty_array_on_device(queue, shape): - return cl.array.empty(queue, shape, dtype=np.float32, order="F") - - -def identity(n): - return np.identity(n).astype(np.float32).copy(order="F") - - -def random_array(*shape): - return np.random.random_sample(shape).astype(np.float32).copy(order="F") - - -def random_array_on_device(queue, *shape): - return cl.array.to_device(queue, random_array(*shape)) - - -def random_flux_derivative_arrays(params): - states = random_array(*params.state_bounds()) - fluxes = random_array(*params.flux_bounds()) - metrics = random_array(*params.metric_bounds()) - metric_jacobians = random_array(*params.jacobian_bounds()) - - return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) - - -def random_flux_derivative_arrays_on_device(ctx_factory, params): - queue = device.get_queue(ctx_factory) - - states = random_array_on_device(queue, *params.state_bounds()) - fluxes = random_array_on_device(queue, *params.flux_bounds()) - metrics = random_array_on_device(queue, *params.metric_bounds()) - metric_jacobians = random_array_on_device(queue, *params.jacobian_bounds()) - - return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) - - -def arrays_from_string(string_arrays): - return split_map_to_list(string_arrays, array_from_string, ":") - - -def array_from_string(string_array): - if ";" not in string_array: - if "," not in string_array: - array = array_from_string_1d(string_array) - else: - array = array_from_string_2d(string_array) - else: - array = array_from_string_3d(string_array) - return array.copy(order="F") - - -def array_from_string_3d(string_array): - if string_array[0] == ";": - return array_from_string_1d(string_array[1:]).reshape((-1, 1, 1)) - else: - return np.array(split_map_to_list(string_array, array_from_string_2d, ";")) - - -def array_from_string_2d(string_array): - if string_array[0] == ",": - return array_from_string_1d(string_array[1:]).reshape((-1, 1)) - else: - return np.array(split_map_to_list(string_array, array_from_string_1d, ",")) - - -def array_from_string_1d(string_array): - if string_array[0] == "i": - return np.array(split_map_to_list(string_array[1:], int, " ")) - else: - return np.array(split_map_to_list(string_array, float, " "), dtype=np.float32) - - -def split_map_to_list(string, map_func, splitter): - return list(map(map_func, string.split(splitter))) diff --git a/test.py b/test.py index 17e7f8f..514ba6f 100644 --- a/test.py +++ b/test.py @@ -11,43 +11,11 @@ import logging import pytest from pytest import approx -import pyopencl as cl from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) - -_QUEUE = [] - - -def get_queue(ctx_factory): - if not _QUEUE: - setup_queue(ctx_factory) - return _QUEUE[0] - - -def setup_queue(ctx_factory): - ctx = ctx_factory() - _QUEUE.append(cl.CommandQueue(ctx)) - - -_WENO_PRG = [] - - -def parse_weno(): - fn = "WENO.F90" - - with open(fn, "r") as infile: - infile_content = infile.read() - - prg = lp.parse_transformed_fortran(infile_content, filename=fn) - _WENO_PRG.append(prg) - - -def get_weno_program(): - if not _WENO_PRG: - parse_weno() - return _WENO_PRG[0] +from utilities import * class RoeParams: @@ -247,70 +215,6 @@ def compare_roe_property(states, fluxes, R, Rinv, lam): compare_arrays(R@temp, dFlux) -def transform_compute_flux_derivative_basic(prg): - cfd = prg["compute_flux_derivatives"] - - cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") - - cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", - lp.AddressSpace.GLOBAL) - - return prg.with_kernel(cfd) - - -def transform_weno_for_gpu(prg): - prg = transform_compute_flux_derivative_basic(prg) - - cfd = prg["compute_flux_derivatives"] - - for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6", "_7"]: - cfd = lp.split_iname(cfd, "i"+suffix, 16, - outer_tag="g.0", inner_tag="l.0") - cfd = lp.split_iname(cfd, "j"+suffix, 16, - outer_tag="g.1", inner_tag="l.1") - - for var_name in ["delta_xi", "delta_eta", "delta_zeta"]: - cfd = lp.assignment_to_subst(cfd, var_name) - - cfd = lp.add_barrier(cfd, "tag:to_generalized", "tag:flux_x_compute") - cfd = lp.add_barrier(cfd, "tag:flux_x_compute", "tag:flux_x_diff") - cfd = lp.add_barrier(cfd, "tag:flux_x_diff", "tag:flux_y_compute") - cfd = lp.add_barrier(cfd, "tag:flux_y_compute", "tag:flux_y_diff") - cfd = lp.add_barrier(cfd, "tag:flux_y_diff", "tag:flux_z_compute") - cfd = lp.add_barrier(cfd, "tag:flux_z_compute", "tag:flux_z_diff") - cfd = lp.add_barrier(cfd, "tag:flux_z_diff", "tag:from_generalized") - - prg = prg.with_kernel(cfd) - - # FIXME: These should work, but don't - # FIXME: Undo the hand-inlining in WENO.F90 - #prg = lp.inline_callable_kernel(prg, "convert_to_generalized") - #prg = lp.inline_callable_kernel(prg, "convert_from_generalized") - - if 0: - print(prg["convert_to_generalized_frozen"]) - 1/0 - - return prg - - -def transform_compute_flux_derivative_gpu(queue, prg): - prg = transform_weno_for_gpu(prg) - - prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - - if 1: - with open("gen-code.cl", "w") as outf: - outf.write(lp.generate_code_v2(prg).device_code()) - - prg = lp.set_options(prg, no_numpy=True) - return prg - - @pytest.mark.xfail @pytest.mark.parametrize("states_str,fluxes_str,direction", [ ("2 1,4 1,4 1,4 1,20 5.5", "4 1,11.2 2.6,8 1,8 1,46.4 7.1", "x"), diff --git a/transform_fixtures.py b/utilities.py similarity index 67% rename from transform_fixtures.py rename to utilities.py index f69581a..a188dce 100644 --- a/transform_fixtures.py +++ b/utilities.py @@ -1,7 +1,46 @@ -import loopy as lp +import numpy as np +import numpy.linalg as la +import pyopencl as cl +import pyopencl.array # noqa +import pyopencl.tools # noqa +import pyopencl.clrandom # noqa +import loopy as lp # noqa -def compute_flux_derivative_basic(prg): +_QUEUE = [] + + +def get_queue(ctx_factory): + if not _QUEUE: + setup_queue(ctx_factory) + return _QUEUE[0] + + +def setup_queue(ctx_factory): + ctx = ctx_factory() + _QUEUE.append(cl.CommandQueue(ctx)) + + +_WENO_PRG = [] + + +def parse_weno(): + fn = "WENO.F90" + + with open(fn, "r") as infile: + infile_content = infile.read() + + prg = lp.parse_transformed_fortran(infile_content, filename=fn) + _WENO_PRG.append(prg) + + +def get_weno_program(): + if not _WENO_PRG: + parse_weno() + return _WENO_PRG[0] + + +def transform_compute_flux_derivative_basic(prg): cfd = prg["compute_flux_derivatives"] cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") @@ -16,8 +55,8 @@ def compute_flux_derivative_basic(prg): return prg.with_kernel(cfd) -def weno_for_gpu(prg): - prg = compute_flux_derivative_basic(prg) +def transform_weno_for_gpu(prg): + prg = transform_compute_flux_derivative_basic(prg) cfd = prg["compute_flux_derivatives"] @@ -52,8 +91,8 @@ def weno_for_gpu(prg): return prg -def compute_flux_derivative_gpu(queue, prg): - prg = weno_for_gpu(prg) +def transform_compute_flux_derivative_gpu(queue, prg): + prg = transform_weno_for_gpu(prg) prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) -- GitLab From 68f74b816c6f6e489a3d348cd34153c2cf240942 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Wed, 19 Jun 2019 22:08:12 -0500 Subject: [PATCH 08/21] move benchmark array generation to test-local functions --- benchmark.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/benchmark.py b/benchmark.py index 444b689..b48c19c 100644 --- a/benchmark.py +++ b/benchmark.py @@ -17,15 +17,13 @@ from pyopencl.tools import ( # noqa from utilities import * -def setup_random_array(*shape): - return np.random.random_sample(shape).astype(np.float32).copy(order="F") - - -def setup_random_array_on_device(queue, *shape): - return cl.array.to_device(queue, setup_random_array(*shape)) - - def benchmark_compute_flux_derivatives_gpu(ctx_factory): + def random_array_on_device(queue, *shape): + return cl.array.to_device(queue, random_array(*shape)) + + def random_array(*shape): + return np.random.random_sample(shape).astype(np.float32).copy(order="F") + logging.basicConfig(level="INFO") prg = get_weno_program() @@ -41,10 +39,10 @@ def benchmark_compute_flux_derivatives_gpu(ctx_factory): nz = n print("ARRAY GEN") - states = setup_random_array_on_device(queue, nvars, nx+6, ny+6, nz+6) - fluxes = setup_random_array_on_device(queue, nvars, ndim, nx+6, ny+6, nz+6) - metrics = setup_random_array_on_device(queue, ndim, ndim, nx+6, ny+6, nz+6) - metric_jacobians = setup_random_array_on_device(queue, nx+6, ny+6, nz+6) + states = random_array_on_device(queue, nvars, nx+6, ny+6, nz+6) + fluxes = random_array_on_device(queue, nvars, ndim, nx+6, ny+6, nz+6) + metrics = random_array_on_device(queue, ndim, ndim, nx+6, ny+6, nz+6) + metric_jacobians = random_array_on_device(queue, nx+6, ny+6, nz+6) print("END ARRAY GEN") flux_derivatives_dev = cl.array.empty(queue, (nvars, ndim, nx+6, ny+6, -- GitLab From 13e2a49d25d7c6d892934e77e374b0003395b672 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 09:17:57 -0500 Subject: [PATCH 09/21] refactor out kernel_compute_flux_derivatives interface function --- test.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/test.py b/test.py index 514ba6f..75b6e16 100644 --- a/test.py +++ b/test.py @@ -185,18 +185,6 @@ def kernel_mult_mat_vec(queue, prg, alpha, a, b): return c_dev.get() -def kernel_compute_flux_derivatives(queue, prg, params, arrays): - flux_derivatives_dev = setup_empty_array_on_device(queue, (params.nvars, params.ndim, - params.nx_halo, params.ny_halo, params.nz_halo)) - - prg(queue, nvars=params.nvars, ndim=params.ndim, - states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, - metric_jacobians=arrays.metric_jacobians, - flux_derivatives=flux_derivatives_dev) - - return flux_derivatives_dev.get() - - def compare_arrays(a, b): assert a == approx(b) @@ -263,7 +251,13 @@ def test_compute_flux_derivatives(ctx_factory): params = setup_flux_derivative_params(ndim=3, nvars=5, n=10) arrays = setup_random_flux_derivative_arrays(params) - kernel_compute_flux_derivatives(queue, prg, params, arrays) + flux_derivatives_dev = setup_empty_array_on_device(queue, (params.nvars, params.ndim, + params.nx_halo, params.ny_halo, params.nz_halo)) + + prg(queue, nvars=params.nvars, ndim=params.ndim, + states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, + metric_jacobians=arrays.metric_jacobians, + flux_derivatives=flux_derivatives_dev) #@pytest.mark.slow @@ -275,7 +269,13 @@ def test_compute_flux_derivatives_gpu(ctx_factory): params = setup_flux_derivative_params(ndim=3, nvars=5, n=10) arrays = setup_random_flux_derivative_arrays_on_device(ctx_factory, params) - kernel_compute_flux_derivatives(queue, prg, params, arrays) + flux_derivatives_dev = setup_empty_array_on_device(queue, (params.nvars, params.ndim, + params.nx_halo, params.ny_halo, params.nz_halo)) + + prg(queue, nvars=params.nvars, ndim=params.ndim, + states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, + metric_jacobians=arrays.metric_jacobians, + flux_derivatives=flux_derivatives_dev) # This lets you run 'python test.py test_case(cl._csc)' without pytest. -- GitLab From b8979ede9f64ef8d23d39453292f9157c225a3e3 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 10:19:57 -0500 Subject: [PATCH 10/21] refactor test_compute_flux_derivatives to use lp.auto_test_vs_ref --- test.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/test.py b/test.py index 75b6e16..2e810e9 100644 --- a/test.py +++ b/test.py @@ -244,20 +244,11 @@ def test_matvec(ctx_factory): #@pytest.mark.slow def test_compute_flux_derivatives(ctx_factory): - queue = get_queue(ctx_factory) prg = get_weno_program() prg = transform_compute_flux_derivative_basic(prg) - params = setup_flux_derivative_params(ndim=3, nvars=5, n=10) - arrays = setup_random_flux_derivative_arrays(params) - - flux_derivatives_dev = setup_empty_array_on_device(queue, (params.nvars, params.ndim, - params.nx_halo, params.ny_halo, params.nz_halo)) - - prg(queue, nvars=params.nvars, ndim=params.ndim, - states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, - metric_jacobians=arrays.metric_jacobians, - flux_derivatives=flux_derivatives_dev) + lp.auto_test_vs_ref(prg, ctx_factory(), + parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16)) #@pytest.mark.slow -- GitLab From 2ac94b6fddb6726b63130127f25c623df4144f5e Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 10:31:40 -0500 Subject: [PATCH 11/21] refactor test_compute_flux_derivatives to use lp.auto_test_vs_ref --- test.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/test.py b/test.py index 2e810e9..2b694fb 100644 --- a/test.py +++ b/test.py @@ -253,20 +253,11 @@ def test_compute_flux_derivatives(ctx_factory): #@pytest.mark.slow def test_compute_flux_derivatives_gpu(ctx_factory): - queue = get_queue(ctx_factory) prg = get_weno_program() - prg = transform_compute_flux_derivative_gpu(queue, prg) - - params = setup_flux_derivative_params(ndim=3, nvars=5, n=10) - arrays = setup_random_flux_derivative_arrays_on_device(ctx_factory, params) - - flux_derivatives_dev = setup_empty_array_on_device(queue, (params.nvars, params.ndim, - params.nx_halo, params.ny_halo, params.nz_halo)) + prg = transform_compute_flux_derivative_gpu(get_queue(ctx_factory), prg) - prg(queue, nvars=params.nvars, ndim=params.ndim, - states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, - metric_jacobians=arrays.metric_jacobians, - flux_derivatives=flux_derivatives_dev) + lp.auto_test_vs_ref(prg, ctx_factory(), + parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16)) # This lets you run 'python test.py test_case(cl._csc)' without pytest. -- GitLab From c2d76477b408a2cc7f7276fe41e44ca7db541d5b Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 11:00:12 -0500 Subject: [PATCH 12/21] heavy reorganization to put things in utilities.py or as test-local functions --- benchmark.py | 6 -- test.py | 249 ++++++++++++--------------------------------------- utilities.py | 63 +++++++++++++ 3 files changed, 122 insertions(+), 196 deletions(-) diff --git a/benchmark.py b/benchmark.py index b48c19c..f65cd58 100644 --- a/benchmark.py +++ b/benchmark.py @@ -18,12 +18,6 @@ from utilities import * def benchmark_compute_flux_derivatives_gpu(ctx_factory): - def random_array_on_device(queue, *shape): - return cl.array.to_device(queue, random_array(*shape)) - - def random_array(*shape): - return np.random.random_sample(shape).astype(np.float32).copy(order="F") - logging.basicConfig(level="INFO") prg = get_weno_program() diff --git a/test.py b/test.py index 2b694fb..7d84ace 100644 --- a/test.py +++ b/test.py @@ -18,191 +18,6 @@ from pyopencl.tools import ( # noqa from utilities import * -class RoeParams: - def __init__(self, nvars, ndim, d): - self.nvars = nvars - self.ndim = ndim - self.d = d - - def mat_bounds(self): - return self.nvars, self.nvars - - def vec_bounds(self): - return self.nvars - - -class FluxDerivativeParams: - def __init__(self, nvars, ndim, nx, ny, nz): - self.nvars = nvars - self.ndim = ndim - - self.nx = nx - self.ny = ny - self.nz = nz - - self.nhalo = 3 - self.nx_halo = self.nx + 2*self.nhalo - self.ny_halo = self.ny + 2*self.nhalo - self.nz_halo = self.nz + 2*self.nhalo - - def state_bounds(self): - return self.nvars, self.nx_halo, self.ny_halo, self.nz_halo - - def flux_bounds(self): - return self.nvars, self.ndim, self.nx_halo, self.ny_halo, self.nz_halo - - def metric_bounds(self): - return self.ndim, self.ndim, self.nx_halo, self.ny_halo, self.nz_halo - - def jacobian_bounds(self): - return self.nx_halo, self.ny_halo, self.nz_halo - - -class FluxDerivativeArrays: - def __init__(self, states, fluxes, metrics, metric_jacobians): - self.states = states - self.fluxes = fluxes - self.metrics = metrics - self.metric_jacobians = metric_jacobians - - -def setup_roe_params(nvars, ndim, direction): - dirs = {"x" : 1, "y" : 2, "z" : 3} - return RoeParams(nvars, ndim, dirs[direction]) - - -def setup_flux_derivative_params(nvars, ndim, n): - return FluxDerivativeParams(nvars, ndim, n, n, n) - - -def setup_empty_array_on_device(queue, shape): - return cl.array.empty(queue, shape, dtype=np.float32, order="F") - - -def setup_identity(n): - return np.identity(n).astype(np.float32).copy(order="F") - - -def setup_random_array(*shape): - return np.random.random_sample(shape).astype(np.float32).copy(order="F") - - -def setup_random_array_on_device(queue, *shape): - return cl.array.to_device(queue, setup_random_array(*shape)) - - -def setup_random_flux_derivative_arrays(params): - states = setup_random_array(*params.state_bounds()) - fluxes = setup_random_array(*params.flux_bounds()) - metrics = setup_random_array(*params.metric_bounds()) - metric_jacobians = setup_random_array(*params.jacobian_bounds()) - - return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) - - -def setup_random_flux_derivative_arrays_on_device(ctx_factory, params): - queue = get_queue(ctx_factory) - - states = setup_random_array_on_device(queue, *params.state_bounds()) - fluxes = setup_random_array_on_device(queue, *params.flux_bounds()) - metrics = setup_random_array_on_device(queue, *params.metric_bounds()) - metric_jacobians = setup_random_array_on_device(queue, *params.jacobian_bounds()) - - return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) - - -def arrays_from_string(string_arrays): - return split_map_to_list(string_arrays, array_from_string, ":") - - -def array_from_string(string_array): - if ";" not in string_array: - if "," not in string_array: - array = array_from_string_1d(string_array) - else: - array = array_from_string_2d(string_array) - else: - array = array_from_string_3d(string_array) - return array.copy(order="F") - - -def array_from_string_3d(string_array): - if string_array[0] == ";": - return array_from_string_1d(string_array[1:]).reshape((-1, 1, 1)) - else: - return np.array(split_map_to_list(string_array, array_from_string_2d, ";")) - - -def array_from_string_2d(string_array): - if string_array[0] == ",": - return array_from_string_1d(string_array[1:]).reshape((-1, 1)) - else: - return np.array(split_map_to_list(string_array, array_from_string_1d, ",")) - - -def array_from_string_1d(string_array): - if string_array[0] == "i": - return np.array(split_map_to_list(string_array[1:], int, " ")) - else: - return np.array(split_map_to_list(string_array, float, " "), dtype=np.float32) - - -def split_map_to_list(string, map_func, splitter): - return list(map(map_func, string.split(splitter))) - - -def with_root_kernel(prg, root_name): - # FIXME This is a little less beautiful than it could be - new_prg = prg.copy(name=root_name) - for name in prg: - clbl = new_prg[name] - if isinstance(clbl, lp.LoopKernel) and clbl.is_called_from_host: - new_prg = new_prg.with_kernel(clbl.copy(is_called_from_host=False)) - - new_prg = new_prg.with_kernel(prg[root_name].copy(is_called_from_host=True)) - return new_prg - - -def kernel_roe_eigensystem(queue, prg, params, states, metrics_frozen): - R_dev = setup_empty_array_on_device(queue, params.mat_bounds()) - Rinv_dev = setup_empty_array_on_device(queue, params.mat_bounds()) - lam_dev = setup_empty_array_on_device(queue, params.vec_bounds()) - - prg = with_root_kernel(prg, "roe_eigensystem") - prg(queue, nvars=params.nvars, ndim=params.ndim, d=params.d, - states=states, metrics_frozen=metrics_frozen, - R=R_dev, R_inv=Rinv_dev, lambda_roe=lam_dev) - - return R_dev.get(), Rinv_dev.get(), lam_dev.get() - - -def kernel_mult_mat_vec(queue, prg, alpha, a, b): - c_dev = setup_empty_array_on_device(queue, b.shape) - - prg = with_root_kernel(prg, "mult_mat_vec") - prg(queue, a=a, b=b, c=c_dev, alpha=alpha) - - return c_dev.get() - - -def compare_arrays(a, b): - assert a == approx(b) - - -def compare_roe_identity(states, R, Rinv): - dState = states[:,1] - states[:,0] - compare_arrays(R@(Rinv@dState), dState) - - -def compare_roe_property(states, fluxes, R, Rinv, lam): - dState = states[:,1] - states[:,0] - dFlux = fluxes[:,1] - fluxes[:,0] - - temp = Rinv@dState - temp = np.multiply(lam, temp) - compare_arrays(R@temp, dFlux) - - @pytest.mark.xfail @pytest.mark.parametrize("states_str,fluxes_str,direction", [ ("2 1,4 1,4 1,4 1,20 5.5", "4 1,11.2 2.6,8 1,8 1,46.4 7.1", "x"), @@ -216,26 +31,80 @@ def compare_roe_property(states, fluxes, R, Rinv, lam): ("2 1,4 1,8 2,12 3,64 11", "12 3,24 3,48 6,75.2 10.6,403.2 37.8", "z") ]) def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): + class RoeParams: + def __init__(self, nvars, ndim, d): + self.nvars = nvars + self.ndim = ndim + self.d = d + + def mat_bounds(self): + return self.nvars, self.nvars + + def vec_bounds(self): + return self.nvars + + def setup_roe_params(nvars, ndim, direction): + dirs = {"x" : 1, "y" : 2, "z" : 3} + return RoeParams(nvars, ndim, dirs[direction]) + + def identity_matrix(n): + return np.identity(n).astype(np.float32).copy(order="F") + + def kernel_roe_eigensystem(queue, prg, params, states, metrics_frozen): + R_dev = empty_array_on_device(queue, params.mat_bounds()) + Rinv_dev = empty_array_on_device(queue, params.mat_bounds()) + lam_dev = empty_array_on_device(queue, params.vec_bounds()) + + prg = with_root_kernel(prg, "roe_eigensystem") + prg(queue, nvars=params.nvars, ndim=params.ndim, d=params.d, + states=states, metrics_frozen=metrics_frozen, + R=R_dev, R_inv=Rinv_dev, lambda_roe=lam_dev) + + return R_dev.get(), Rinv_dev.get(), lam_dev.get() + + def check_roe_identity(states, R, Rinv): + dState = states[:,1] - states[:,0] + compare_arrays(R@(Rinv@dState), dState) + + def check_roe_property(states, fluxes, R, Rinv, lam): + dState = states[:,1] - states[:,0] + dFlux = fluxes[:,1] - fluxes[:,0] + + temp = Rinv@dState + temp = np.multiply(lam, temp) + compare_arrays(R@temp, dFlux) + queue = get_queue(ctx_factory) prg = get_weno_program() params = setup_roe_params(nvars=5, ndim=3, direction=direction) states = array_from_string(states_str) - metrics_frozen = setup_identity(params.ndim) + metrics_frozen = identity_matrix(params.ndim) R, Rinv, lam = kernel_roe_eigensystem(queue, prg, params, states, metrics_frozen) - compare_roe_identity(states, R, Rinv) + check_roe_identity(states, R, Rinv) fluxes = array_from_string(fluxes_str) - compare_roe_property(states, fluxes, R, Rinv, lam) + check_roe_property(states, fluxes, R, Rinv, lam) def test_matvec(ctx_factory): + def kernel_mult_mat_vec(queue, prg, alpha, a, b): + c_dev = empty_array_on_device(queue, b.shape) + + prg = with_root_kernel(prg, "mult_mat_vec") + prg(queue, a=a, b=b, c=c_dev, alpha=alpha) + + return c_dev.get() + + def random_array(*shape): + return np.random.random_sample(shape).astype(np.float32).copy(order="F") + queue = get_queue(ctx_factory) prg = get_weno_program() - a = setup_random_array(10, 10) - b = setup_random_array(10) + a = random_array(10, 10) + b = random_array(10) c = kernel_mult_mat_vec(queue, prg, alpha=1.0, a=a, b=b) diff --git a/utilities.py b/utilities.py index a188dce..8c942d7 100644 --- a/utilities.py +++ b/utilities.py @@ -5,6 +5,69 @@ import pyopencl.array # noqa import pyopencl.tools # noqa import pyopencl.clrandom # noqa import loopy as lp # noqa +from pytest import approx + + +def split_map_to_list(string, map_func, splitter): + return list(map(map_func, string.split(splitter))) + + +def arrays_from_string(string_arrays): + return split_map_to_list(string_arrays, array_from_string, ":") + + +def array_from_string(string_array): + def array_from_string_1d(string_array): + if string_array[0] == "i": + return np.array(split_map_to_list(string_array[1:], int, " ")) + else: + return np.array(split_map_to_list(string_array, float, " "), dtype=np.float32) + + def array_from_string_2d(string_array): + if string_array[0] == ",": + return array_from_string_1d(string_array[1:]).reshape((-1, 1)) + else: + return np.array(split_map_to_list(string_array, array_from_string_1d, ",")) + + def array_from_string_3d(string_array): + if string_array[0] == ";": + return array_from_string_1d(string_array[1:]).reshape((-1, 1, 1)) + else: + return np.array(split_map_to_list(string_array, array_from_string_2d, ";")) + + if ";" not in string_array: + if "," not in string_array: + array = array_from_string_1d(string_array) + else: + array = array_from_string_2d(string_array) + else: + array = array_from_string_3d(string_array) + return array.copy(order="F") + + +def with_root_kernel(prg, root_name): + # FIXME This is a little less beautiful than it could be + new_prg = prg.copy(name=root_name) + for name in prg: + clbl = new_prg[name] + if isinstance(clbl, lp.LoopKernel) and clbl.is_called_from_host: + new_prg = new_prg.with_kernel(clbl.copy(is_called_from_host=False)) + + new_prg = new_prg.with_kernel(prg[root_name].copy(is_called_from_host=True)) + return new_prg + + +def compare_arrays(a, b): + assert a == approx(b) + + +def random_array_on_device(queue, *shape): + empty = empty_array_on_device(queue, shape) + return cl.clrandom.fill_rand(empty) + + +def empty_array_on_device(queue, shape): + return cl.array.empty(queue, shape, dtype=np.float32, order="F") _QUEUE = [] -- GitLab From a8f1ddbca390464c3db951c4802168dc9a29e268 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 11:07:21 -0500 Subject: [PATCH 13/21] reorganize order of utility functions --- utilities.py | 100 +++++++++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 46 deletions(-) diff --git a/utilities.py b/utilities.py index 8c942d7..2ada8c7 100644 --- a/utilities.py +++ b/utilities.py @@ -8,8 +8,19 @@ import loopy as lp # noqa from pytest import approx -def split_map_to_list(string, map_func, splitter): - return list(map(map_func, string.split(splitter))) +### Arrays ### + +def compare_arrays(a, b): + assert a == approx(b) + + +def random_array_on_device(queue, *shape): + empty = empty_array_on_device(queue, shape) + return cl.clrandom.fill_rand(empty) + + +def empty_array_on_device(queue, shape): + return cl.array.empty(queue, shape, dtype=np.float32, order="F") def arrays_from_string(string_arrays): @@ -45,30 +56,11 @@ def array_from_string(string_array): return array.copy(order="F") -def with_root_kernel(prg, root_name): - # FIXME This is a little less beautiful than it could be - new_prg = prg.copy(name=root_name) - for name in prg: - clbl = new_prg[name] - if isinstance(clbl, lp.LoopKernel) and clbl.is_called_from_host: - new_prg = new_prg.with_kernel(clbl.copy(is_called_from_host=False)) - - new_prg = new_prg.with_kernel(prg[root_name].copy(is_called_from_host=True)) - return new_prg - - -def compare_arrays(a, b): - assert a == approx(b) - - -def random_array_on_device(queue, *shape): - empty = empty_array_on_device(queue, shape) - return cl.clrandom.fill_rand(empty) - +def split_map_to_list(string, map_func, splitter): + return list(map(map_func, string.split(splitter))) -def empty_array_on_device(queue, shape): - return cl.array.empty(queue, shape, dtype=np.float32, order="F") +### Device ### _QUEUE = [] @@ -84,9 +76,17 @@ def setup_queue(ctx_factory): _QUEUE.append(cl.CommandQueue(ctx)) +### Program / Kernel ### + _WENO_PRG = [] +def get_weno_program(): + if not _WENO_PRG: + parse_weno() + return _WENO_PRG[0] + + def parse_weno(): fn = "WENO.F90" @@ -97,25 +97,29 @@ def parse_weno(): _WENO_PRG.append(prg) -def get_weno_program(): - if not _WENO_PRG: - parse_weno() - return _WENO_PRG[0] +def with_root_kernel(prg, root_name): + # FIXME This is a little less beautiful than it could be + new_prg = prg.copy(name=root_name) + for name in prg: + clbl = new_prg[name] + if isinstance(clbl, lp.LoopKernel) and clbl.is_called_from_host: + new_prg = new_prg.with_kernel(clbl.copy(is_called_from_host=False)) + new_prg = new_prg.with_kernel(prg[root_name].copy(is_called_from_host=True)) + return new_prg -def transform_compute_flux_derivative_basic(prg): - cfd = prg["compute_flux_derivatives"] - cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") +def transform_compute_flux_derivative_gpu(queue, prg): + prg = transform_weno_for_gpu(prg) - cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", - lp.AddressSpace.GLOBAL) + prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - return prg.with_kernel(cfd) + if 1: + with open("gen-code.cl", "w") as outf: + outf.write(lp.generate_code_v2(prg).device_code()) + + prg = lp.set_options(prg, no_numpy=True) + return prg def transform_weno_for_gpu(prg): @@ -154,14 +158,18 @@ def transform_weno_for_gpu(prg): return prg -def transform_compute_flux_derivative_gpu(queue, prg): - prg = transform_weno_for_gpu(prg) +def transform_compute_flux_derivative_basic(prg): + cfd = prg["compute_flux_derivatives"] - prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) + cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") + + cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", + lp.AddressSpace.GLOBAL) + cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", + lp.AddressSpace.GLOBAL) + cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", + lp.AddressSpace.GLOBAL) + + return prg.with_kernel(cfd) - if 1: - with open("gen-code.cl", "w") as outf: - outf.write(lp.generate_code_v2(prg).device_code()) - prg = lp.set_options(prg, no_numpy=True) - return prg -- GitLab From 23da6d6d0ca1ad89d6cc46ddce72fad0546a4bbb Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 13:55:57 -0500 Subject: [PATCH 14/21] use empty_array utility for benchmark code, refactor it to have same interface as random_array utility --- benchmark.py | 3 +-- test.py | 10 +++++----- utilities.py | 4 ++-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/benchmark.py b/benchmark.py index f65cd58..df06f97 100644 --- a/benchmark.py +++ b/benchmark.py @@ -39,8 +39,7 @@ def benchmark_compute_flux_derivatives_gpu(ctx_factory): metric_jacobians = random_array_on_device(queue, nx+6, ny+6, nz+6) print("END ARRAY GEN") - flux_derivatives_dev = cl.array.empty(queue, (nvars, ndim, nx+6, ny+6, - nz+6), dtype=np.float32, order="F") + flux_derivatives_dev = empty_array_on_device(queue, nvars, ndim, nx+6, ny+6, nz+6) prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) diff --git a/test.py b/test.py index 7d84ace..a79a1f3 100644 --- a/test.py +++ b/test.py @@ -40,7 +40,7 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): def mat_bounds(self): return self.nvars, self.nvars - def vec_bounds(self): + def vec_bound(self): return self.nvars def setup_roe_params(nvars, ndim, direction): @@ -51,9 +51,9 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): return np.identity(n).astype(np.float32).copy(order="F") def kernel_roe_eigensystem(queue, prg, params, states, metrics_frozen): - R_dev = empty_array_on_device(queue, params.mat_bounds()) - Rinv_dev = empty_array_on_device(queue, params.mat_bounds()) - lam_dev = empty_array_on_device(queue, params.vec_bounds()) + R_dev = empty_array_on_device(queue, *params.mat_bounds()) + Rinv_dev = empty_array_on_device(queue, *params.mat_bounds()) + lam_dev = empty_array_on_device(queue, params.vec_bound()) prg = with_root_kernel(prg, "roe_eigensystem") prg(queue, nvars=params.nvars, ndim=params.ndim, d=params.d, @@ -90,7 +90,7 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): def test_matvec(ctx_factory): def kernel_mult_mat_vec(queue, prg, alpha, a, b): - c_dev = empty_array_on_device(queue, b.shape) + c_dev = empty_array_on_device(queue, *b.shape) prg = with_root_kernel(prg, "mult_mat_vec") prg(queue, a=a, b=b, c=c_dev, alpha=alpha) diff --git a/utilities.py b/utilities.py index 2ada8c7..45b1012 100644 --- a/utilities.py +++ b/utilities.py @@ -15,11 +15,11 @@ def compare_arrays(a, b): def random_array_on_device(queue, *shape): - empty = empty_array_on_device(queue, shape) + empty = empty_array_on_device(queue, *shape) return cl.clrandom.fill_rand(empty) -def empty_array_on_device(queue, shape): +def empty_array_on_device(queue, *shape): return cl.array.empty(queue, shape, dtype=np.float32, order="F") -- GitLab From f925d9be89ed91a6ceb27adb59f78ecf0f3fc4f5 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 14:08:09 -0500 Subject: [PATCH 15/21] refactor out utility we don't need --- test.py | 12 +++++++++++- utilities.py | 13 ------------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/test.py b/test.py index a79a1f3..7283726 100644 --- a/test.py +++ b/test.py @@ -123,7 +123,17 @@ def test_compute_flux_derivatives(ctx_factory): #@pytest.mark.slow def test_compute_flux_derivatives_gpu(ctx_factory): prg = get_weno_program() - prg = transform_compute_flux_derivative_gpu(get_queue(ctx_factory), prg) + prg = transform_weno_for_gpu(prg) + + queue = get_queue(ctx_factory) + + prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) + + if 1: + with open("gen-code.cl", "w") as outf: + outf.write(lp.generate_code_v2(prg).device_code()) + + prg = lp.set_options(prg, no_numpy=True) lp.auto_test_vs_ref(prg, ctx_factory(), parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16)) diff --git a/utilities.py b/utilities.py index 45b1012..333eb4b 100644 --- a/utilities.py +++ b/utilities.py @@ -109,19 +109,6 @@ def with_root_kernel(prg, root_name): return new_prg -def transform_compute_flux_derivative_gpu(queue, prg): - prg = transform_weno_for_gpu(prg) - - prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - - if 1: - with open("gen-code.cl", "w") as outf: - outf.write(lp.generate_code_v2(prg).device_code()) - - prg = lp.set_options(prg, no_numpy=True) - return prg - - def transform_weno_for_gpu(prg): prg = transform_compute_flux_derivative_basic(prg) -- GitLab From f76f38b09496b0f14a3f7b99fcdbc27738fa8a3a Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 14:13:41 -0500 Subject: [PATCH 16/21] add flag for printing compute_flux_derivative kernel --- utilities.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utilities.py b/utilities.py index 333eb4b..f911066 100644 --- a/utilities.py +++ b/utilities.py @@ -109,7 +109,7 @@ def with_root_kernel(prg, root_name): return new_prg -def transform_weno_for_gpu(prg): +def transform_weno_for_gpu(prg, print_kernel=False): prg = transform_compute_flux_derivative_basic(prg) cfd = prg["compute_flux_derivatives"] @@ -138,7 +138,7 @@ def transform_weno_for_gpu(prg): #prg = lp.inline_callable_kernel(prg, "convert_to_generalized") #prg = lp.inline_callable_kernel(prg, "convert_from_generalized") - if 0: + if print_kernel: print(prg["convert_to_generalized_frozen"]) 1/0 -- GitLab From aa931210051ec87ad13cb7d76efa495adf5c6ea5 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 15:29:55 -0500 Subject: [PATCH 17/21] refactor out a utility and move the code to the diff loopy block in WENO.F90 --- WENO.F90 | 14 ++++++++++++++ test.py | 5 +++-- utilities.py | 17 ----------------- 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/WENO.F90 b/WENO.F90 index 995d3b7..6bfad5c 100644 --- a/WENO.F90 +++ b/WENO.F90 @@ -951,6 +951,20 @@ end subroutine ! ! prg = lp.parse_fortran(lp.c_preprocess(SOURCE), FILENAME) ! prg = lp.fix_parameters(prg, ndim=3, nvars=5, _remove=False) +! +! cfd = prg["compute_flux_derivatives"] +! +! cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") +! +! cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", +! lp.AddressSpace.GLOBAL) +! cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", +! lp.AddressSpace.GLOBAL) +! cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", +! lp.AddressSpace.GLOBAL) +! +! prg = prg.with_kernel(cfd) +! ! RESULT = prg ! !$loopy end diff --git a/test.py b/test.py index 7283726..b5db8c7 100644 --- a/test.py +++ b/test.py @@ -114,7 +114,9 @@ def test_matvec(ctx_factory): #@pytest.mark.slow def test_compute_flux_derivatives(ctx_factory): prg = get_weno_program() - prg = transform_compute_flux_derivative_basic(prg) + + queue = get_queue(ctx_factory) + prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) lp.auto_test_vs_ref(prg, ctx_factory(), parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16)) @@ -126,7 +128,6 @@ def test_compute_flux_derivatives_gpu(ctx_factory): prg = transform_weno_for_gpu(prg) queue = get_queue(ctx_factory) - prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) if 1: diff --git a/utilities.py b/utilities.py index f911066..1f21f09 100644 --- a/utilities.py +++ b/utilities.py @@ -110,8 +110,6 @@ def with_root_kernel(prg, root_name): def transform_weno_for_gpu(prg, print_kernel=False): - prg = transform_compute_flux_derivative_basic(prg) - cfd = prg["compute_flux_derivatives"] for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6", "_7"]: @@ -145,18 +143,3 @@ def transform_weno_for_gpu(prg, print_kernel=False): return prg -def transform_compute_flux_derivative_basic(prg): - cfd = prg["compute_flux_derivatives"] - - cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") - - cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", - lp.AddressSpace.GLOBAL) - - return prg.with_kernel(cfd) - - -- GitLab From 21995d88aae4f3c5877e36bdda2b2392fb99588c Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 15:43:16 -0500 Subject: [PATCH 18/21] refactor out a utility to write CL code --- benchmark.py | 22 +++++++++------------- test.py | 10 ++++------ utilities.py | 4 ++++ 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/benchmark.py b/benchmark.py index df06f97..5f48726 100644 --- a/benchmark.py +++ b/benchmark.py @@ -17,13 +17,19 @@ from pyopencl.tools import ( # noqa from utilities import * -def benchmark_compute_flux_derivatives_gpu(ctx_factory): +def benchmark_compute_flux_derivatives_gpu(ctx_factory, write_code=False): logging.basicConfig(level="INFO") prg = get_weno_program() prg = transform_weno_for_gpu(prg) queue = get_queue(ctx_factory) + prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) + prg = lp.set_options(prg, no_numpy=True) + prg = lp.set_options(prg, ignore_boostable_into=True) + #prg = lp.set_options(prg, write_wrapper=True) + #op_map = lp.get_op_map(prg, count_redundant_work=False) + #print(op_map) ndim = 3 nvars = 5 @@ -41,18 +47,8 @@ def benchmark_compute_flux_derivatives_gpu(ctx_factory): flux_derivatives_dev = empty_array_on_device(queue, nvars, ndim, nx+6, ny+6, nz+6) - prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - - if 0: - with open("gen-code.cl", "w") as outf: - outf.write(lp.generate_code_v2(prg).device_code()) - - prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - prg = lp.set_options(prg, ignore_boostable_into=True) - prg = lp.set_options(prg, no_numpy=True) - #prg = lp.set_options(prg, write_wrapper=True) - #op_map = lp.get_op_map(prg, count_redundant_work=False) - #print(op_map) + if write_code: + write_to_cl(prg) allocator = pyopencl.tools.MemoryPool(pyopencl.tools.ImmediateAllocator(queue)) diff --git a/test.py b/test.py index b5db8c7..4d2a764 100644 --- a/test.py +++ b/test.py @@ -123,19 +123,17 @@ def test_compute_flux_derivatives(ctx_factory): #@pytest.mark.slow -def test_compute_flux_derivatives_gpu(ctx_factory): +def test_compute_flux_derivatives_gpu(ctx_factory, write_code=False): prg = get_weno_program() prg = transform_weno_for_gpu(prg) queue = get_queue(ctx_factory) prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - - if 1: - with open("gen-code.cl", "w") as outf: - outf.write(lp.generate_code_v2(prg).device_code()) - prg = lp.set_options(prg, no_numpy=True) + if write_code: + write_to_cl(prg) + lp.auto_test_vs_ref(prg, ctx_factory(), parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16)) diff --git a/utilities.py b/utilities.py index 1f21f09..d68ab87 100644 --- a/utilities.py +++ b/utilities.py @@ -143,3 +143,7 @@ def transform_weno_for_gpu(prg, print_kernel=False): return prg +def write_to_cl(prg, outfilename="gen-code.cl"): + with open(outfilename, "w") as outf: + outf.write(lp.generate_code_v2(prg).device_code()) + -- GitLab From fdbef6e721fd9c7bc264b1d6f96a9d731aa90fd3 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 16:13:15 -0500 Subject: [PATCH 19/21] using one warmup round only for auto_test_vs_ref --- test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test.py b/test.py index 4d2a764..8a59001 100644 --- a/test.py +++ b/test.py @@ -118,7 +118,7 @@ def test_compute_flux_derivatives(ctx_factory): queue = get_queue(ctx_factory) prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - lp.auto_test_vs_ref(prg, ctx_factory(), + lp.auto_test_vs_ref(prg, ctx_factory(), warmup_rounds=1, parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16)) @@ -134,7 +134,7 @@ def test_compute_flux_derivatives_gpu(ctx_factory, write_code=False): if write_code: write_to_cl(prg) - lp.auto_test_vs_ref(prg, ctx_factory(), + lp.auto_test_vs_ref(prg, ctx_factory(), warmup_rounds=1, parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16)) -- GitLab From 28b9a3fe2402426323b732e23cf85d3ab21e19d0 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 17:24:05 -0500 Subject: [PATCH 20/21] refactor out kernel_mult_mat_vec --- test.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/test.py b/test.py index 8a59001..81dff66 100644 --- a/test.py +++ b/test.py @@ -89,29 +89,24 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): def test_matvec(ctx_factory): - def kernel_mult_mat_vec(queue, prg, alpha, a, b): - c_dev = empty_array_on_device(queue, *b.shape) - - prg = with_root_kernel(prg, "mult_mat_vec") - prg(queue, a=a, b=b, c=c_dev, alpha=alpha) - - return c_dev.get() - def random_array(*shape): return np.random.random_sample(shape).astype(np.float32).copy(order="F") - queue = get_queue(ctx_factory) prg = get_weno_program() + queue = get_queue(ctx_factory) a = random_array(10, 10) b = random_array(10) - c = kernel_mult_mat_vec(queue, prg, alpha=1.0, a=a, b=b) + c_dev = empty_array_on_device(queue, *b.shape) + + prg = with_root_kernel(prg, "mult_mat_vec") + prg(queue, alpha=1.0, a=a, b=b, c=c_dev) - compare_arrays(a@b, c) + compare_arrays(a@b, c_dev.get()) -#@pytest.mark.slow +@pytest.mark.slow def test_compute_flux_derivatives(ctx_factory): prg = get_weno_program() @@ -122,7 +117,7 @@ def test_compute_flux_derivatives(ctx_factory): parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16)) -#@pytest.mark.slow +@pytest.mark.slow def test_compute_flux_derivatives_gpu(ctx_factory, write_code=False): prg = get_weno_program() prg = transform_weno_for_gpu(prg) -- GitLab From 633a80a41da443b04a98ab913b6ce89b9ca41731 Mon Sep 17 00:00:00 2001 From: "Timothy A. Smith" Date: Thu, 20 Jun 2019 17:34:07 -0500 Subject: [PATCH 21/21] bugfix in random_array_on_device --- test.py | 13 +++++-------- utilities.py | 5 +++-- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/test.py b/test.py index 81dff66..e972734 100644 --- a/test.py +++ b/test.py @@ -89,21 +89,18 @@ def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): def test_matvec(ctx_factory): - def random_array(*shape): - return np.random.random_sample(shape).astype(np.float32).copy(order="F") - prg = get_weno_program() queue = get_queue(ctx_factory) - a = random_array(10, 10) - b = random_array(10) + a = random_array_on_device(queue, 10, 10) + b = random_array_on_device(queue, 10) - c_dev = empty_array_on_device(queue, *b.shape) + c = empty_array_on_device(queue, 10) prg = with_root_kernel(prg, "mult_mat_vec") - prg(queue, alpha=1.0, a=a, b=b, c=c_dev) + prg(queue, alpha=1.0, a=a, b=b, c=c) - compare_arrays(a@b, c_dev.get()) + compare_arrays(a.get()@b.get(), c.get()) @pytest.mark.slow diff --git a/utilities.py b/utilities.py index d68ab87..306c28e 100644 --- a/utilities.py +++ b/utilities.py @@ -15,8 +15,9 @@ def compare_arrays(a, b): def random_array_on_device(queue, *shape): - empty = empty_array_on_device(queue, *shape) - return cl.clrandom.fill_rand(empty) + ary = empty_array_on_device(queue, *shape) + cl.clrandom.fill_rand(ary) + return ary def empty_array_on_device(queue, *shape): -- GitLab