Newer
Older
import numpy as np
import numpy.linalg as la
import pyopencl as cl
import pyopencl.array # noqa
import pyopencl.tools # noqa
import pyopencl.clrandom # noqa
import loopy as lp # noqa
Timothy A. Smith
committed
import pytest
import pyopencl as cl
from pyopencl.tools import ( # noqa
pytest_generate_tests_for_pyopencl
as pytest_generate_tests)
import device_fixtures as device
import program_fixtures as program
import setup_fixtures as setup
import kernel_fixtures as kernel
def compare_arrays(a, b):
assert a == approx(b)
def compare_roe_identity(states, R, Rinv):
dState = states[:,1] - states[:,0]
compare_arrays(R@(Rinv@dState), dState)
def compare_roe_property(states, fluxes, R, Rinv, lam):
dState = states[:,1] - states[:,0]
dFlux = fluxes[:,1] - fluxes[:,0]
temp = Rinv@dState
temp = np.multiply(lam, temp)
compare_arrays(R@temp, dFlux)
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
def transform_compute_flux_derivative_basic(prg):
cfd = prg["compute_flux_derivatives"]
cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0")
cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized",
lp.AddressSpace.GLOBAL)
cfd = lp.set_temporary_scope(cfd, "generalized_fluxes",
lp.AddressSpace.GLOBAL)
cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp",
lp.AddressSpace.GLOBAL)
return prg.with_kernel(cfd)
def transform_weno_for_gpu(prg):
prg = transform_compute_flux_derivative_basic(prg)
cfd = prg["compute_flux_derivatives"]
for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6", "_7"]:
cfd = lp.split_iname(cfd, "i"+suffix, 16,
outer_tag="g.0", inner_tag="l.0")
cfd = lp.split_iname(cfd, "j"+suffix, 16,
outer_tag="g.1", inner_tag="l.1")
for var_name in ["delta_xi", "delta_eta", "delta_zeta"]:
cfd = lp.assignment_to_subst(cfd, var_name)
cfd = lp.add_barrier(cfd, "tag:to_generalized", "tag:flux_x_compute")
cfd = lp.add_barrier(cfd, "tag:flux_x_compute", "tag:flux_x_diff")
cfd = lp.add_barrier(cfd, "tag:flux_x_diff", "tag:flux_y_compute")
cfd = lp.add_barrier(cfd, "tag:flux_y_compute", "tag:flux_y_diff")
cfd = lp.add_barrier(cfd, "tag:flux_y_diff", "tag:flux_z_compute")
cfd = lp.add_barrier(cfd, "tag:flux_z_compute", "tag:flux_z_diff")
cfd = lp.add_barrier(cfd, "tag:flux_z_diff", "tag:from_generalized")
prg = prg.with_kernel(cfd)
# FIXME: These should work, but don't
# FIXME: Undo the hand-inlining in WENO.F90
#prg = lp.inline_callable_kernel(prg, "convert_to_generalized")
#prg = lp.inline_callable_kernel(prg, "convert_from_generalized")
if 0:
print(prg["convert_to_generalized_frozen"])
1/0
return prg
def transform_compute_flux_derivative_gpu(queue, prg):
prg = transform_weno_for_gpu(prg)
prg = prg.copy(target=lp.PyOpenCLTarget(queue.device))
if 1:
with open("gen-code.cl", "w") as outf:
outf.write(lp.generate_code_v2(prg).device_code())
prg = lp.set_options(prg, no_numpy=True)
return prg
@pytest.mark.parametrize("states_str,fluxes_str,direction", [
("2 1,4 1,4 1,4 1,20 5.5", "4 1,11.2 2.6,8 1,8 1,46.4 7.1", "x"),
("2 1,4 1,4 1,4 1,20 5.5", "4 1,8 1,11.2 2.6,8 1,46.4 7.1", "y"),
("2 1,4 1,4 1,4 1,20 5.5", "4 1,8 1,8 1,11.2 2.6,46.4 7.1", "z"),
("1 2,-1 -4,-1 -4,-1 -4,5.5 20", "-1 -4,2.6 11.2,1 8,1 8,-7.1 -46.4", "x"),
("1 2,-1 -4,-1 -4,-1 -4,5.5 20", "-1 -4,1 8,2.6 11.2,1 8,-7.1 -46.4", "y"),
("1 2,-1 -4,-1 -4,-1 -4,5.5 20", "-1 -4,1 8,1 8,2.6 11.2,-7.1 -46.4", "z"),
("2 1,4 1,8 2,12 3,64 11", "4 1,11.2 2.6,16 2,24 3,134.4 12.6", "x"),
("2 1,4 1,8 2,12 3,64 11", "8 2,16 2,35.2 5.6,48 6,268.8 25.2", "y"),
("2 1,4 1,8 2,12 3,64 11", "12 3,24 3,48 6,75.2 10.6,403.2 37.8", "z")
])
def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction):
queue = device.get_queue(ctx_factory)
prg = program.get_weno()
params = setup.roe_params(nvars=5, ndim=3, direction=direction)
states = setup.array_from_string(states_str)
metrics_frozen = setup.identity(params.ndim)
R, Rinv, lam = kernel.roe_eigensystem(queue, prg, params, states, metrics_frozen)
compare_roe_identity(states, R, Rinv)
fluxes = setup.array_from_string(fluxes_str)
compare_roe_property(states, fluxes, R, Rinv, lam)
queue = device.get_queue(ctx_factory)
a = setup.random_array(10, 10)
b = setup.random_array(10)
c = kernel.mult_mat_vec(queue, prg, alpha=1.0, a=a, b=b)
Kaushik Kulkarni
committed
def test_compute_flux_derivatives(ctx_factory):
queue = device.get_queue(ctx_factory)
prg = program.get_weno()
prg = transform_compute_flux_derivative_basic(prg)
params = setup.flux_derivative_params(ndim=3, nvars=5, n=10)
arrays = setup.random_flux_derivative_arrays(params)
kernel.compute_flux_derivatives(queue, prg, params, arrays)
Kaushik Kulkarni
committed
def test_compute_flux_derivatives_gpu(ctx_factory):
Timothy A. Smith
committed
queue = device.get_queue(ctx_factory)
prg = program.get_weno()
prg = transform_compute_flux_derivative_gpu(queue, prg)
Timothy A. Smith
committed
params = setup.flux_derivative_params(ndim=3, nvars=5, n=10)
arrays = setup.random_flux_derivative_arrays_on_device(ctx_factory, params)
Timothy A. Smith
committed
kernel.compute_flux_derivatives(queue, prg, params, arrays)
# This lets you run 'python test.py test_case(cl._csc)' without pytest.
if __name__ == "__main__":
if len(sys.argv) > 1:
Timothy A. Smith
committed
logging.basicConfig(level="INFO")
exec(sys.argv[1])
else: