import numpy as np import numpy.linalg as la import pyopencl as cl import pyopencl.array # noqa import pyopencl.tools # noqa import pyopencl.clrandom # noqa import loopy as lp # noqa import sys import logging import pytest from pytest import approx import pyopencl as cl from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) import setup_fixtures as setup import kernel_fixtures as kernel _QUEUE = [] def get_queue(ctx_factory): if not _QUEUE: setup_queue(ctx_factory) return _QUEUE[0] def setup_queue(ctx_factory): ctx = ctx_factory() _QUEUE.append(cl.CommandQueue(ctx)) _WENO_PRG = [] def parse_weno(): fn = "WENO.F90" with open(fn, "r") as infile: infile_content = infile.read() prg = lp.parse_transformed_fortran(infile_content, filename=fn) _WENO_PRG.append(prg) def get_weno_program(): if not _WENO_PRG: parse_weno() return _WENO_PRG[0] def compare_arrays(a, b): assert a == approx(b) def compare_roe_identity(states, R, Rinv): dState = states[:,1] - states[:,0] compare_arrays(R@(Rinv@dState), dState) def compare_roe_property(states, fluxes, R, Rinv, lam): dState = states[:,1] - states[:,0] dFlux = fluxes[:,1] - fluxes[:,0] temp = Rinv@dState temp = np.multiply(lam, temp) compare_arrays(R@temp, dFlux) def transform_compute_flux_derivative_basic(prg): cfd = prg["compute_flux_derivatives"] cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0") cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized", lp.AddressSpace.GLOBAL) cfd = lp.set_temporary_scope(cfd, "generalized_fluxes", lp.AddressSpace.GLOBAL) cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp", lp.AddressSpace.GLOBAL) return prg.with_kernel(cfd) def transform_weno_for_gpu(prg): prg = transform_compute_flux_derivative_basic(prg) cfd = prg["compute_flux_derivatives"] for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6", "_7"]: cfd = lp.split_iname(cfd, "i"+suffix, 16, outer_tag="g.0", inner_tag="l.0") cfd = lp.split_iname(cfd, "j"+suffix, 16, outer_tag="g.1", inner_tag="l.1") for var_name in ["delta_xi", "delta_eta", "delta_zeta"]: cfd = lp.assignment_to_subst(cfd, var_name) cfd = lp.add_barrier(cfd, "tag:to_generalized", "tag:flux_x_compute") cfd = lp.add_barrier(cfd, "tag:flux_x_compute", "tag:flux_x_diff") cfd = lp.add_barrier(cfd, "tag:flux_x_diff", "tag:flux_y_compute") cfd = lp.add_barrier(cfd, "tag:flux_y_compute", "tag:flux_y_diff") cfd = lp.add_barrier(cfd, "tag:flux_y_diff", "tag:flux_z_compute") cfd = lp.add_barrier(cfd, "tag:flux_z_compute", "tag:flux_z_diff") cfd = lp.add_barrier(cfd, "tag:flux_z_diff", "tag:from_generalized") prg = prg.with_kernel(cfd) # FIXME: These should work, but don't # FIXME: Undo the hand-inlining in WENO.F90 #prg = lp.inline_callable_kernel(prg, "convert_to_generalized") #prg = lp.inline_callable_kernel(prg, "convert_from_generalized") if 0: print(prg["convert_to_generalized_frozen"]) 1/0 return prg def transform_compute_flux_derivative_gpu(queue, prg): prg = transform_weno_for_gpu(prg) prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) if 1: with open("gen-code.cl", "w") as outf: outf.write(lp.generate_code_v2(prg).device_code()) prg = lp.set_options(prg, no_numpy=True) return prg @pytest.mark.xfail @pytest.mark.parametrize("states_str,fluxes_str,direction", [ ("2 1,4 1,4 1,4 1,20 5.5", "4 1,11.2 2.6,8 1,8 1,46.4 7.1", "x"), ("2 1,4 1,4 1,4 1,20 5.5", "4 1,8 1,11.2 2.6,8 1,46.4 7.1", "y"), ("2 1,4 1,4 1,4 1,20 5.5", "4 1,8 1,8 1,11.2 2.6,46.4 7.1", "z"), ("1 2,-1 -4,-1 -4,-1 -4,5.5 20", "-1 -4,2.6 11.2,1 8,1 8,-7.1 -46.4", "x"), ("1 2,-1 -4,-1 -4,-1 -4,5.5 20", "-1 -4,1 8,2.6 11.2,1 8,-7.1 -46.4", "y"), ("1 2,-1 -4,-1 -4,-1 -4,5.5 20", "-1 -4,1 8,1 8,2.6 11.2,-7.1 -46.4", "z"), ("2 1,4 1,8 2,12 3,64 11", "4 1,11.2 2.6,16 2,24 3,134.4 12.6", "x"), ("2 1,4 1,8 2,12 3,64 11", "8 2,16 2,35.2 5.6,48 6,268.8 25.2", "y"), ("2 1,4 1,8 2,12 3,64 11", "12 3,24 3,48 6,75.2 10.6,403.2 37.8", "z") ]) def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction): queue = get_queue(ctx_factory) prg = get_weno_program() params = setup.roe_params(nvars=5, ndim=3, direction=direction) states = setup.array_from_string(states_str) metrics_frozen = setup.identity(params.ndim) R, Rinv, lam = kernel.roe_eigensystem(queue, prg, params, states, metrics_frozen) compare_roe_identity(states, R, Rinv) fluxes = setup.array_from_string(fluxes_str) compare_roe_property(states, fluxes, R, Rinv, lam) def test_matvec(ctx_factory): queue = get_queue(ctx_factory) prg = get_weno_program() a = setup.random_array(10, 10) b = setup.random_array(10) c = kernel.mult_mat_vec(queue, prg, alpha=1.0, a=a, b=b) compare_arrays(a@b, c) #@pytest.mark.slow def test_compute_flux_derivatives(ctx_factory): queue = get_queue(ctx_factory) prg = get_weno_program() prg = transform_compute_flux_derivative_basic(prg) params = setup.flux_derivative_params(ndim=3, nvars=5, n=10) arrays = setup.random_flux_derivative_arrays(params) kernel.compute_flux_derivatives(queue, prg, params, arrays) #@pytest.mark.slow def test_compute_flux_derivatives_gpu(ctx_factory): queue = get_queue(ctx_factory) prg = get_weno_program() prg = transform_compute_flux_derivative_gpu(queue, prg) params = setup.flux_derivative_params(ndim=3, nvars=5, n=10) arrays = setup.random_flux_derivative_arrays_on_device(ctx_factory, params) kernel.compute_flux_derivatives(queue, prg, params, arrays) # This lets you run 'python test.py test_case(cl._csc)' without pytest. if __name__ == "__main__": if len(sys.argv) > 1: logging.basicConfig(level="INFO") exec(sys.argv[1]) else: pytest.main([__file__])