diff --git a/benchmark.py b/benchmark.py index 00034a7648dc8d7e31ecf85ad36040db2ba02ed5..444b689e43a3a794ece2162c0511aceadb9f66c5 100644 --- a/benchmark.py +++ b/benchmark.py @@ -14,18 +14,24 @@ from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) -import device_fixtures as device -import program_fixtures as program -import transform_fixtures as transform -import setup_fixtures as setup +from utilities import * + + +def setup_random_array(*shape): + return np.random.random_sample(shape).astype(np.float32).copy(order="F") + + +def setup_random_array_on_device(queue, *shape): + return cl.array.to_device(queue, setup_random_array(*shape)) + def benchmark_compute_flux_derivatives_gpu(ctx_factory): logging.basicConfig(level="INFO") - prg = program.get_weno() - prg = transform.weno_for_gpu(prg) + prg = get_weno_program() + prg = transform_weno_for_gpu(prg) - queue = device.get_queue(ctx_factory) + queue = get_queue(ctx_factory) ndim = 3 nvars = 5 @@ -35,10 +41,10 @@ def benchmark_compute_flux_derivatives_gpu(ctx_factory): nz = n print("ARRAY GEN") - states = setup.random_array_on_device(queue, nvars, nx+6, ny+6, nz+6) - fluxes = setup.random_array_on_device(queue, nvars, ndim, nx+6, ny+6, nz+6) - metrics = setup.random_array_on_device(queue, ndim, ndim, nx+6, ny+6, nz+6) - metric_jacobians = setup.random_array_on_device(queue, nx+6, ny+6, nz+6) + states = setup_random_array_on_device(queue, nvars, nx+6, ny+6, nz+6) + fluxes = setup_random_array_on_device(queue, nvars, ndim, nx+6, ny+6, nz+6) + metrics = setup_random_array_on_device(queue, ndim, ndim, nx+6, ny+6, nz+6) + metric_jacobians = setup_random_array_on_device(queue, nx+6, ny+6, nz+6) print("END ARRAY GEN") flux_derivatives_dev = cl.array.empty(queue, (nvars, ndim, nx+6, ny+6, diff --git a/device_fixtures.py b/device_fixtures.py deleted file mode 100644 index d0dbc5929dc744a0fed6354f3aa8aa34897bf57c..0000000000000000000000000000000000000000 --- a/device_fixtures.py +++ /dev/null @@ -1,15 +0,0 @@ -import pyopencl as cl - - -_QUEUE = [] - - -def get_queue(ctx_factory): - if not _QUEUE: - setup_queue(ctx_factory) - return _QUEUE[0] - - -def setup_queue(ctx_factory): - ctx = ctx_factory() - _QUEUE.append(cl.CommandQueue(ctx)) diff --git a/kernel_fixtures.py b/kernel_fixtures.py deleted file mode 100644 index 7f3dff465c30ab8fc9101ab092ef4c134bcfa951..0000000000000000000000000000000000000000 --- a/kernel_fixtures.py +++ /dev/null @@ -1,49 +0,0 @@ -import loopy as lp # noqa - -import setup_fixtures as setup - - -def with_root_kernel(prg, root_name): - # FIXME This is a little less beautiful than it could be - new_prg = prg.copy(name=root_name) - for name in prg: - clbl = new_prg[name] - if isinstance(clbl, lp.LoopKernel) and clbl.is_called_from_host: - new_prg = new_prg.with_kernel(clbl.copy(is_called_from_host=False)) - - new_prg = new_prg.with_kernel(prg[root_name].copy(is_called_from_host=True)) - return new_prg - - -def roe_eigensystem(queue, prg, params, states, metrics_frozen): - R_dev = setup.empty_array_on_device(queue, params.mat_bounds()) - Rinv_dev = setup.empty_array_on_device(queue, params.mat_bounds()) - lam_dev = setup.empty_array_on_device(queue, params.vec_bounds()) - - prg = with_root_kernel(prg, "roe_eigensystem") - prg(queue, nvars=params.nvars, ndim=params.ndim, d=params.d, - states=states, metrics_frozen=metrics_frozen, - R=R_dev, R_inv=Rinv_dev, lambda_roe=lam_dev) - - return R_dev.get(), Rinv_dev.get(), lam_dev.get() - - -def mult_mat_vec(queue, prg, alpha, a, b): - c_dev = setup.empty_array_on_device(queue, b.shape) - - prg = with_root_kernel(prg, "mult_mat_vec") - prg(queue, a=a, b=b, c=c_dev, alpha=alpha) - - return c_dev.get() - - -def compute_flux_derivatives(queue, prg, params, arrays): - flux_derivatives_dev = setup.empty_array_on_device(queue, (params.nvars, params.ndim, - params.nx_halo, params.ny_halo, params.nz_halo)) - - prg(queue, nvars=params.nvars, ndim=params.ndim, - states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics, - metric_jacobians=arrays.metric_jacobians, - flux_derivatives=flux_derivatives_dev) - - return flux_derivatives_dev.get() diff --git a/program_fixtures.py b/program_fixtures.py deleted file mode 100644 index 0f50ff1c364bc59f201fb1e551a34fde1602c6f2..0000000000000000000000000000000000000000 --- a/program_fixtures.py +++ /dev/null @@ -1,20 +0,0 @@ -import loopy as lp - - -_WENO_PRG = [] - - -def parse_weno(): - fn = "WENO.F90" - - with open(fn, "r") as infile: - infile_content = infile.read() - - prg = lp.parse_transformed_fortran(infile_content, filename=fn) - _WENO_PRG.append(prg) - - -def get_weno(): - if not _WENO_PRG: - parse_weno() - return _WENO_PRG[0] diff --git a/setup_fixtures.py b/setup_fixtures.py deleted file mode 100644 index 6f1debcb3e2bc1b8acb16f45d58bd3a16771a25a..0000000000000000000000000000000000000000 --- a/setup_fixtures.py +++ /dev/null @@ -1,138 +0,0 @@ -import numpy as np -import pyopencl as cl -import pyopencl.array # noqa - -import device_fixtures as device - - -class RoeParams: - def __init__(self, nvars, ndim, d): - self.nvars = nvars - self.ndim = ndim - self.d = d - - def mat_bounds(self): - return self.nvars, self.nvars - - def vec_bounds(self): - return self.nvars - - -class FluxDerivativeParams: - def __init__(self, nvars, ndim, nx, ny, nz): - self.nvars = nvars - self.ndim = ndim - - self.nx = nx - self.ny = ny - self.nz = nz - - self.nhalo = 3 - self.nx_halo = self.nx + 2*self.nhalo - self.ny_halo = self.ny + 2*self.nhalo - self.nz_halo = self.nz + 2*self.nhalo - - def state_bounds(self): - return self.nvars, self.nx_halo, self.ny_halo, self.nz_halo - - def flux_bounds(self): - return self.nvars, self.ndim, self.nx_halo, self.ny_halo, self.nz_halo - - def metric_bounds(self): - return self.ndim, self.ndim, self.nx_halo, self.ny_halo, self.nz_halo - - def jacobian_bounds(self): - return self.nx_halo, self.ny_halo, self.nz_halo - - -class FluxDerivativeArrays: - def __init__(self, states, fluxes, metrics, metric_jacobians): - self.states = states - self.fluxes = fluxes - self.metrics = metrics - self.metric_jacobians = metric_jacobians - - -def roe_params(nvars, ndim, direction): - dirs = {"x" : 1, "y" : 2, "z" : 3} - return RoeParams(nvars, ndim, dirs[direction]) - - -def flux_derivative_params(nvars, ndim, n): - return FluxDerivativeParams(nvars, ndim, n, n, n) - - -def empty_array_on_device(queue, shape): - return cl.array.empty(queue, shape, dtype=np.float32, order="F") - - -def identity(n): - return np.identity(n).astype(np.float32).copy(order="F") - - -def random_array(*shape): - return np.random.random_sample(shape).astype(np.float32).copy(order="F") - - -def random_array_on_device(queue, *shape): - return cl.array.to_device(queue, random_array(*shape)) - - -def random_flux_derivative_arrays(params): - states = random_array(*params.state_bounds()) - fluxes = random_array(*params.flux_bounds()) - metrics = random_array(*params.metric_bounds()) - metric_jacobians = random_array(*params.jacobian_bounds()) - - return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) - - -def random_flux_derivative_arrays_on_device(ctx_factory, params): - queue = device.get_queue(ctx_factory) - - states = random_array_on_device(queue, *params.state_bounds()) - fluxes = random_array_on_device(queue, *params.flux_bounds()) - metrics = random_array_on_device(queue, *params.metric_bounds()) - metric_jacobians = random_array_on_device(queue, *params.jacobian_bounds()) - - return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians) - - -def arrays_from_string(string_arrays): - return split_map_to_list(string_arrays, array_from_string, ":") - - -def array_from_string(string_array): - if ";" not in string_array: - if "," not in string_array: - array = array_from_string_1d(string_array) - else: - array = array_from_string_2d(string_array) - else: - array = array_from_string_3d(string_array) - return array.copy(order="F") - - -def array_from_string_3d(string_array): - if string_array[0] == ";": - return array_from_string_1d(string_array[1:]).reshape((-1, 1, 1)) - else: - return np.array(split_map_to_list(string_array, array_from_string_2d, ";")) - - -def array_from_string_2d(string_array): - if string_array[0] == ",": - return array_from_string_1d(string_array[1:]).reshape((-1, 1)) - else: - return np.array(split_map_to_list(string_array, array_from_string_1d, ",")) - - -def array_from_string_1d(string_array): - if string_array[0] == "i": - return np.array(split_map_to_list(string_array[1:], int, " ")) - else: - return np.array(split_map_to_list(string_array, float, " "), dtype=np.float32) - - -def split_map_to_list(string, map_func, splitter): - return list(map(map_func, string.split(splitter))) diff --git a/test.py b/test.py index 17e7f8f5bf84b352ca3bc5980dae57729865a929..514ba6ff78548fe7fbf3af1744146ed0a6c7ac58 100644 --- a/test.py +++ b/test.py @@ -11,43 +11,11 @@ import logging import pytest from pytest import approx -import pyopencl as cl from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) - -_QUEUE = [] - - -def get_queue(ctx_factory): - if not _QUEUE: - setup_queue(ctx_factory) - return _QUEUE[0] - - -def setup_queue(ctx_factory): - ctx = ctx_factory() - _QUEUE.append(cl.CommandQueue(ctx)) - - -_WENO_PRG = [] - - -def parse_weno(): - fn = "WENO.F90" - - with open(fn, "r") as infile: - infile_content = infile.read() - - prg = lp.parse_transformed_fortran(infile_content, filename=fn) - _WENO_PRG.append(prg) - - -def get_weno_program(): - if not _WENO_PRG: - parse_weno() - return _WENO_PRG[0] +from utilities import * class RoeParams: @@ -247,70 +215,6 @@ def compare_roe_property(states, fluxes, R, Rinv, lam): compare_arrays(R@temp, dFlux) -def transform_compute_flux_derivative_basic(prg): - cfd = prg["compute_flux_derivatives"] - - cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") - - cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", - lp.AddressSpace.GLOBAL) - cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", - lp.AddressSpace.GLOBAL) - - return prg.with_kernel(cfd) - - -def transform_weno_for_gpu(prg): - prg = transform_compute_flux_derivative_basic(prg) - - cfd = prg["compute_flux_derivatives"] - - for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6", "_7"]: - cfd = lp.split_iname(cfd, "i"+suffix, 16, - outer_tag="g.0", inner_tag="l.0") - cfd = lp.split_iname(cfd, "j"+suffix, 16, - outer_tag="g.1", inner_tag="l.1") - - for var_name in ["delta_xi", "delta_eta", "delta_zeta"]: - cfd = lp.assignment_to_subst(cfd, var_name) - - cfd = lp.add_barrier(cfd, "tag:to_generalized", "tag:flux_x_compute") - cfd = lp.add_barrier(cfd, "tag:flux_x_compute", "tag:flux_x_diff") - cfd = lp.add_barrier(cfd, "tag:flux_x_diff", "tag:flux_y_compute") - cfd = lp.add_barrier(cfd, "tag:flux_y_compute", "tag:flux_y_diff") - cfd = lp.add_barrier(cfd, "tag:flux_y_diff", "tag:flux_z_compute") - cfd = lp.add_barrier(cfd, "tag:flux_z_compute", "tag:flux_z_diff") - cfd = lp.add_barrier(cfd, "tag:flux_z_diff", "tag:from_generalized") - - prg = prg.with_kernel(cfd) - - # FIXME: These should work, but don't - # FIXME: Undo the hand-inlining in WENO.F90 - #prg = lp.inline_callable_kernel(prg, "convert_to_generalized") - #prg = lp.inline_callable_kernel(prg, "convert_from_generalized") - - if 0: - print(prg["convert_to_generalized_frozen"]) - 1/0 - - return prg - - -def transform_compute_flux_derivative_gpu(queue, prg): - prg = transform_weno_for_gpu(prg) - - prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) - - if 1: - with open("gen-code.cl", "w") as outf: - outf.write(lp.generate_code_v2(prg).device_code()) - - prg = lp.set_options(prg, no_numpy=True) - return prg - - @pytest.mark.xfail @pytest.mark.parametrize("states_str,fluxes_str,direction", [ ("2 1,4 1,4 1,4 1,20 5.5", "4 1,11.2 2.6,8 1,8 1,46.4 7.1", "x"), diff --git a/transform_fixtures.py b/utilities.py similarity index 67% rename from transform_fixtures.py rename to utilities.py index f69581a045eae6d0289910fc635b2b289c0d5178..a188dcef2726ebbc0a4e325b96f7b06abd23412b 100644 --- a/transform_fixtures.py +++ b/utilities.py @@ -1,7 +1,46 @@ -import loopy as lp +import numpy as np +import numpy.linalg as la +import pyopencl as cl +import pyopencl.array # noqa +import pyopencl.tools # noqa +import pyopencl.clrandom # noqa +import loopy as lp # noqa -def compute_flux_derivative_basic(prg): +_QUEUE = [] + + +def get_queue(ctx_factory): + if not _QUEUE: + setup_queue(ctx_factory) + return _QUEUE[0] + + +def setup_queue(ctx_factory): + ctx = ctx_factory() + _QUEUE.append(cl.CommandQueue(ctx)) + + +_WENO_PRG = [] + + +def parse_weno(): + fn = "WENO.F90" + + with open(fn, "r") as infile: + infile_content = infile.read() + + prg = lp.parse_transformed_fortran(infile_content, filename=fn) + _WENO_PRG.append(prg) + + +def get_weno_program(): + if not _WENO_PRG: + parse_weno() + return _WENO_PRG[0] + + +def transform_compute_flux_derivative_basic(prg): cfd = prg["compute_flux_derivatives"] cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") @@ -16,8 +55,8 @@ def compute_flux_derivative_basic(prg): return prg.with_kernel(cfd) -def weno_for_gpu(prg): - prg = compute_flux_derivative_basic(prg) +def transform_weno_for_gpu(prg): + prg = transform_compute_flux_derivative_basic(prg) cfd = prg["compute_flux_derivatives"] @@ -52,8 +91,8 @@ def weno_for_gpu(prg): return prg -def compute_flux_derivative_gpu(queue, prg): - prg = weno_for_gpu(prg) +def transform_compute_flux_derivative_gpu(queue, prg): + prg = transform_weno_for_gpu(prg) prg = prg.copy(target=lp.PyOpenCLTarget(queue.device))