Newer
Older
import numpy as np
import numpy.linalg as la
import pyopencl as cl
import pyopencl.array # noqa
import pyopencl.tools # noqa
import pyopencl.clrandom # noqa
import loopy as lp # noqa
Timothy A. Smith
committed
import pytest
import pyopencl as cl
from pyopencl.tools import ( # noqa
pytest_generate_tests_for_pyopencl
as pytest_generate_tests)
import setup_fixtures as setup
import kernel_fixtures as kernel
_QUEUE = []
def get_queue(ctx_factory):
if not _QUEUE:
setup_queue(ctx_factory)
return _QUEUE[0]
def setup_queue(ctx_factory):
ctx = ctx_factory()
_QUEUE.append(cl.CommandQueue(ctx))
_WENO_PRG = []
def parse_weno():
fn = "WENO.F90"
with open(fn, "r") as infile:
infile_content = infile.read()
prg = lp.parse_transformed_fortran(infile_content, filename=fn)
_WENO_PRG.append(prg)
def get_weno_program():
if not _WENO_PRG:
parse_weno()
return _WENO_PRG[0]
def compare_arrays(a, b):
assert a == approx(b)
def compare_roe_identity(states, R, Rinv):
dState = states[:,1] - states[:,0]
compare_arrays(R@(Rinv@dState), dState)
def compare_roe_property(states, fluxes, R, Rinv, lam):
dState = states[:,1] - states[:,0]
dFlux = fluxes[:,1] - fluxes[:,0]
temp = Rinv@dState
temp = np.multiply(lam, temp)
compare_arrays(R@temp, dFlux)
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
def transform_compute_flux_derivative_basic(prg):
cfd = prg["compute_flux_derivatives"]
cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0")
cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized",
lp.AddressSpace.GLOBAL)
cfd = lp.set_temporary_scope(cfd, "generalized_fluxes",
lp.AddressSpace.GLOBAL)
cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp",
lp.AddressSpace.GLOBAL)
return prg.with_kernel(cfd)
def transform_weno_for_gpu(prg):
prg = transform_compute_flux_derivative_basic(prg)
cfd = prg["compute_flux_derivatives"]
for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6", "_7"]:
cfd = lp.split_iname(cfd, "i"+suffix, 16,
outer_tag="g.0", inner_tag="l.0")
cfd = lp.split_iname(cfd, "j"+suffix, 16,
outer_tag="g.1", inner_tag="l.1")
for var_name in ["delta_xi", "delta_eta", "delta_zeta"]:
cfd = lp.assignment_to_subst(cfd, var_name)
cfd = lp.add_barrier(cfd, "tag:to_generalized", "tag:flux_x_compute")
cfd = lp.add_barrier(cfd, "tag:flux_x_compute", "tag:flux_x_diff")
cfd = lp.add_barrier(cfd, "tag:flux_x_diff", "tag:flux_y_compute")
cfd = lp.add_barrier(cfd, "tag:flux_y_compute", "tag:flux_y_diff")
cfd = lp.add_barrier(cfd, "tag:flux_y_diff", "tag:flux_z_compute")
cfd = lp.add_barrier(cfd, "tag:flux_z_compute", "tag:flux_z_diff")
cfd = lp.add_barrier(cfd, "tag:flux_z_diff", "tag:from_generalized")
prg = prg.with_kernel(cfd)
# FIXME: These should work, but don't
# FIXME: Undo the hand-inlining in WENO.F90
#prg = lp.inline_callable_kernel(prg, "convert_to_generalized")
#prg = lp.inline_callable_kernel(prg, "convert_from_generalized")
if 0:
print(prg["convert_to_generalized_frozen"])
1/0
return prg
def transform_compute_flux_derivative_gpu(queue, prg):
prg = transform_weno_for_gpu(prg)
prg = prg.copy(target=lp.PyOpenCLTarget(queue.device))
if 1:
with open("gen-code.cl", "w") as outf:
outf.write(lp.generate_code_v2(prg).device_code())
prg = lp.set_options(prg, no_numpy=True)
return prg
@pytest.mark.parametrize("states_str,fluxes_str,direction", [
("2 1,4 1,4 1,4 1,20 5.5", "4 1,11.2 2.6,8 1,8 1,46.4 7.1", "x"),
("2 1,4 1,4 1,4 1,20 5.5", "4 1,8 1,11.2 2.6,8 1,46.4 7.1", "y"),
("2 1,4 1,4 1,4 1,20 5.5", "4 1,8 1,8 1,11.2 2.6,46.4 7.1", "z"),
("1 2,-1 -4,-1 -4,-1 -4,5.5 20", "-1 -4,2.6 11.2,1 8,1 8,-7.1 -46.4", "x"),
("1 2,-1 -4,-1 -4,-1 -4,5.5 20", "-1 -4,1 8,2.6 11.2,1 8,-7.1 -46.4", "y"),
("1 2,-1 -4,-1 -4,-1 -4,5.5 20", "-1 -4,1 8,1 8,2.6 11.2,-7.1 -46.4", "z"),
("2 1,4 1,8 2,12 3,64 11", "4 1,11.2 2.6,16 2,24 3,134.4 12.6", "x"),
("2 1,4 1,8 2,12 3,64 11", "8 2,16 2,35.2 5.6,48 6,268.8 25.2", "y"),
("2 1,4 1,8 2,12 3,64 11", "12 3,24 3,48 6,75.2 10.6,403.2 37.8", "z")
])
def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction):
queue = get_queue(ctx_factory)
prg = get_weno_program()
params = setup.roe_params(nvars=5, ndim=3, direction=direction)
states = setup.array_from_string(states_str)
metrics_frozen = setup.identity(params.ndim)
R, Rinv, lam = kernel.roe_eigensystem(queue, prg, params, states, metrics_frozen)
compare_roe_identity(states, R, Rinv)
fluxes = setup.array_from_string(fluxes_str)
compare_roe_property(states, fluxes, R, Rinv, lam)
queue = get_queue(ctx_factory)
prg = get_weno_program()
a = setup.random_array(10, 10)
b = setup.random_array(10)
c = kernel.mult_mat_vec(queue, prg, alpha=1.0, a=a, b=b)
Kaushik Kulkarni
committed
def test_compute_flux_derivatives(ctx_factory):
queue = get_queue(ctx_factory)
prg = get_weno_program()
prg = transform_compute_flux_derivative_basic(prg)
params = setup.flux_derivative_params(ndim=3, nvars=5, n=10)
arrays = setup.random_flux_derivative_arrays(params)
kernel.compute_flux_derivatives(queue, prg, params, arrays)
Kaushik Kulkarni
committed
def test_compute_flux_derivatives_gpu(ctx_factory):
queue = get_queue(ctx_factory)
prg = get_weno_program()
prg = transform_compute_flux_derivative_gpu(queue, prg)
Timothy A. Smith
committed
params = setup.flux_derivative_params(ndim=3, nvars=5, n=10)
arrays = setup.random_flux_derivative_arrays_on_device(ctx_factory, params)
Timothy A. Smith
committed
kernel.compute_flux_derivatives(queue, prg, params, arrays)
# This lets you run 'python test.py test_case(cl._csc)' without pytest.
if __name__ == "__main__":
if len(sys.argv) > 1:
Timothy A. Smith
committed
logging.basicConfig(level="INFO")
exec(sys.argv[1])
else: