Newer
Older
import numpy as np
import numpy.linalg as la
import pyopencl as cl
import pyopencl.array # noqa
import pyopencl.tools # noqa
import pyopencl.clrandom # noqa
import loopy as lp # noqa
Timothy A. Smith
committed
import pytest
import pyopencl as cl
from pyopencl.tools import ( # noqa
pytest_generate_tests_for_pyopencl
as pytest_generate_tests)
_QUEUE = []
def get_queue(ctx_factory):
if not _QUEUE:
setup_queue(ctx_factory)
return _QUEUE[0]
def setup_queue(ctx_factory):
ctx = ctx_factory()
_QUEUE.append(cl.CommandQueue(ctx))
_WENO_PRG = []
def parse_weno():
fn = "WENO.F90"
with open(fn, "r") as infile:
infile_content = infile.read()
prg = lp.parse_transformed_fortran(infile_content, filename=fn)
_WENO_PRG.append(prg)
def get_weno_program():
if not _WENO_PRG:
parse_weno()
return _WENO_PRG[0]
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
class RoeParams:
def __init__(self, nvars, ndim, d):
self.nvars = nvars
self.ndim = ndim
self.d = d
def mat_bounds(self):
return self.nvars, self.nvars
def vec_bounds(self):
return self.nvars
class FluxDerivativeParams:
def __init__(self, nvars, ndim, nx, ny, nz):
self.nvars = nvars
self.ndim = ndim
self.nx = nx
self.ny = ny
self.nz = nz
self.nhalo = 3
self.nx_halo = self.nx + 2*self.nhalo
self.ny_halo = self.ny + 2*self.nhalo
self.nz_halo = self.nz + 2*self.nhalo
def state_bounds(self):
return self.nvars, self.nx_halo, self.ny_halo, self.nz_halo
def flux_bounds(self):
return self.nvars, self.ndim, self.nx_halo, self.ny_halo, self.nz_halo
def metric_bounds(self):
return self.ndim, self.ndim, self.nx_halo, self.ny_halo, self.nz_halo
def jacobian_bounds(self):
return self.nx_halo, self.ny_halo, self.nz_halo
class FluxDerivativeArrays:
def __init__(self, states, fluxes, metrics, metric_jacobians):
self.states = states
self.fluxes = fluxes
self.metrics = metrics
self.metric_jacobians = metric_jacobians
def setup_roe_params(nvars, ndim, direction):
dirs = {"x" : 1, "y" : 2, "z" : 3}
return RoeParams(nvars, ndim, dirs[direction])
def setup_flux_derivative_params(nvars, ndim, n):
return FluxDerivativeParams(nvars, ndim, n, n, n)
def setup_empty_array_on_device(queue, shape):
return cl.array.empty(queue, shape, dtype=np.float32, order="F")
def setup_identity(n):
return np.identity(n).astype(np.float32).copy(order="F")
def setup_random_array(*shape):
return np.random.random_sample(shape).astype(np.float32).copy(order="F")
def setup_random_array_on_device(queue, *shape):
return cl.array.to_device(queue, setup_random_array(*shape))
def setup_random_flux_derivative_arrays(params):
states = setup_random_array(*params.state_bounds())
fluxes = setup_random_array(*params.flux_bounds())
metrics = setup_random_array(*params.metric_bounds())
metric_jacobians = setup_random_array(*params.jacobian_bounds())
return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians)
def setup_random_flux_derivative_arrays_on_device(ctx_factory, params):
queue = get_queue(ctx_factory)
states = setup_random_array_on_device(queue, *params.state_bounds())
fluxes = setup_random_array_on_device(queue, *params.flux_bounds())
metrics = setup_random_array_on_device(queue, *params.metric_bounds())
metric_jacobians = setup_random_array_on_device(queue, *params.jacobian_bounds())
return FluxDerivativeArrays(states, fluxes, metrics, metric_jacobians)
def arrays_from_string(string_arrays):
return split_map_to_list(string_arrays, array_from_string, ":")
def array_from_string(string_array):
if ";" not in string_array:
if "," not in string_array:
array = array_from_string_1d(string_array)
else:
array = array_from_string_2d(string_array)
else:
array = array_from_string_3d(string_array)
return array.copy(order="F")
def array_from_string_3d(string_array):
if string_array[0] == ";":
return array_from_string_1d(string_array[1:]).reshape((-1, 1, 1))
else:
return np.array(split_map_to_list(string_array, array_from_string_2d, ";"))
def array_from_string_2d(string_array):
if string_array[0] == ",":
return array_from_string_1d(string_array[1:]).reshape((-1, 1))
else:
return np.array(split_map_to_list(string_array, array_from_string_1d, ","))
def array_from_string_1d(string_array):
if string_array[0] == "i":
return np.array(split_map_to_list(string_array[1:], int, " "))
else:
return np.array(split_map_to_list(string_array, float, " "), dtype=np.float32)
def split_map_to_list(string, map_func, splitter):
return list(map(map_func, string.split(splitter)))
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
def with_root_kernel(prg, root_name):
# FIXME This is a little less beautiful than it could be
new_prg = prg.copy(name=root_name)
for name in prg:
clbl = new_prg[name]
if isinstance(clbl, lp.LoopKernel) and clbl.is_called_from_host:
new_prg = new_prg.with_kernel(clbl.copy(is_called_from_host=False))
new_prg = new_prg.with_kernel(prg[root_name].copy(is_called_from_host=True))
return new_prg
def kernel_roe_eigensystem(queue, prg, params, states, metrics_frozen):
R_dev = setup_empty_array_on_device(queue, params.mat_bounds())
Rinv_dev = setup_empty_array_on_device(queue, params.mat_bounds())
lam_dev = setup_empty_array_on_device(queue, params.vec_bounds())
prg = with_root_kernel(prg, "roe_eigensystem")
prg(queue, nvars=params.nvars, ndim=params.ndim, d=params.d,
states=states, metrics_frozen=metrics_frozen,
R=R_dev, R_inv=Rinv_dev, lambda_roe=lam_dev)
return R_dev.get(), Rinv_dev.get(), lam_dev.get()
def kernel_mult_mat_vec(queue, prg, alpha, a, b):
c_dev = setup_empty_array_on_device(queue, b.shape)
prg = with_root_kernel(prg, "mult_mat_vec")
prg(queue, a=a, b=b, c=c_dev, alpha=alpha)
return c_dev.get()
def kernel_compute_flux_derivatives(queue, prg, params, arrays):
flux_derivatives_dev = setup_empty_array_on_device(queue, (params.nvars, params.ndim,
params.nx_halo, params.ny_halo, params.nz_halo))
prg(queue, nvars=params.nvars, ndim=params.ndim,
states=arrays.states, fluxes=arrays.fluxes, metrics=arrays.metrics,
metric_jacobians=arrays.metric_jacobians,
flux_derivatives=flux_derivatives_dev)
return flux_derivatives_dev.get()
def compare_arrays(a, b):
assert a == approx(b)
def compare_roe_identity(states, R, Rinv):
dState = states[:,1] - states[:,0]
compare_arrays(R@(Rinv@dState), dState)
def compare_roe_property(states, fluxes, R, Rinv, lam):
dState = states[:,1] - states[:,0]
dFlux = fluxes[:,1] - fluxes[:,0]
temp = Rinv@dState
temp = np.multiply(lam, temp)
compare_arrays(R@temp, dFlux)
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
def transform_compute_flux_derivative_basic(prg):
cfd = prg["compute_flux_derivatives"]
cfd = lp.assume(cfd, "nx > 0 and ny > 0 and nz > 0")
cfd = lp.set_temporary_scope(cfd, "flux_derivatives_generalized",
lp.AddressSpace.GLOBAL)
cfd = lp.set_temporary_scope(cfd, "generalized_fluxes",
lp.AddressSpace.GLOBAL)
cfd = lp.set_temporary_scope(cfd, "weno_flux_tmp",
lp.AddressSpace.GLOBAL)
return prg.with_kernel(cfd)
def transform_weno_for_gpu(prg):
prg = transform_compute_flux_derivative_basic(prg)
cfd = prg["compute_flux_derivatives"]
for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6", "_7"]:
cfd = lp.split_iname(cfd, "i"+suffix, 16,
outer_tag="g.0", inner_tag="l.0")
cfd = lp.split_iname(cfd, "j"+suffix, 16,
outer_tag="g.1", inner_tag="l.1")
for var_name in ["delta_xi", "delta_eta", "delta_zeta"]:
cfd = lp.assignment_to_subst(cfd, var_name)
cfd = lp.add_barrier(cfd, "tag:to_generalized", "tag:flux_x_compute")
cfd = lp.add_barrier(cfd, "tag:flux_x_compute", "tag:flux_x_diff")
cfd = lp.add_barrier(cfd, "tag:flux_x_diff", "tag:flux_y_compute")
cfd = lp.add_barrier(cfd, "tag:flux_y_compute", "tag:flux_y_diff")
cfd = lp.add_barrier(cfd, "tag:flux_y_diff", "tag:flux_z_compute")
cfd = lp.add_barrier(cfd, "tag:flux_z_compute", "tag:flux_z_diff")
cfd = lp.add_barrier(cfd, "tag:flux_z_diff", "tag:from_generalized")
prg = prg.with_kernel(cfd)
# FIXME: These should work, but don't
# FIXME: Undo the hand-inlining in WENO.F90
#prg = lp.inline_callable_kernel(prg, "convert_to_generalized")
#prg = lp.inline_callable_kernel(prg, "convert_from_generalized")
if 0:
print(prg["convert_to_generalized_frozen"])
1/0
return prg
def transform_compute_flux_derivative_gpu(queue, prg):
prg = transform_weno_for_gpu(prg)
prg = prg.copy(target=lp.PyOpenCLTarget(queue.device))
if 1:
with open("gen-code.cl", "w") as outf:
outf.write(lp.generate_code_v2(prg).device_code())
prg = lp.set_options(prg, no_numpy=True)
return prg
@pytest.mark.parametrize("states_str,fluxes_str,direction", [
("2 1,4 1,4 1,4 1,20 5.5", "4 1,11.2 2.6,8 1,8 1,46.4 7.1", "x"),
("2 1,4 1,4 1,4 1,20 5.5", "4 1,8 1,11.2 2.6,8 1,46.4 7.1", "y"),
("2 1,4 1,4 1,4 1,20 5.5", "4 1,8 1,8 1,11.2 2.6,46.4 7.1", "z"),
("1 2,-1 -4,-1 -4,-1 -4,5.5 20", "-1 -4,2.6 11.2,1 8,1 8,-7.1 -46.4", "x"),
("1 2,-1 -4,-1 -4,-1 -4,5.5 20", "-1 -4,1 8,2.6 11.2,1 8,-7.1 -46.4", "y"),
("1 2,-1 -4,-1 -4,-1 -4,5.5 20", "-1 -4,1 8,1 8,2.6 11.2,-7.1 -46.4", "z"),
("2 1,4 1,8 2,12 3,64 11", "4 1,11.2 2.6,16 2,24 3,134.4 12.6", "x"),
("2 1,4 1,8 2,12 3,64 11", "8 2,16 2,35.2 5.6,48 6,268.8 25.2", "y"),
("2 1,4 1,8 2,12 3,64 11", "12 3,24 3,48 6,75.2 10.6,403.2 37.8", "z")
])
def test_roe_uniform_grid(ctx_factory, states_str, fluxes_str, direction):
queue = get_queue(ctx_factory)
prg = get_weno_program()
params = setup_roe_params(nvars=5, ndim=3, direction=direction)
states = array_from_string(states_str)
metrics_frozen = setup_identity(params.ndim)
R, Rinv, lam = kernel_roe_eigensystem(queue, prg, params, states, metrics_frozen)
compare_roe_identity(states, R, Rinv)
compare_roe_property(states, fluxes, R, Rinv, lam)
queue = get_queue(ctx_factory)
prg = get_weno_program()
a = setup_random_array(10, 10)
b = setup_random_array(10)
c = kernel_mult_mat_vec(queue, prg, alpha=1.0, a=a, b=b)
Kaushik Kulkarni
committed
def test_compute_flux_derivatives(ctx_factory):
queue = get_queue(ctx_factory)
prg = get_weno_program()
prg = transform_compute_flux_derivative_basic(prg)
params = setup_flux_derivative_params(ndim=3, nvars=5, n=10)
arrays = setup_random_flux_derivative_arrays(params)
kernel_compute_flux_derivatives(queue, prg, params, arrays)
Kaushik Kulkarni
committed
def test_compute_flux_derivatives_gpu(ctx_factory):
queue = get_queue(ctx_factory)
prg = get_weno_program()
prg = transform_compute_flux_derivative_gpu(queue, prg)
Timothy A. Smith
committed
params = setup_flux_derivative_params(ndim=3, nvars=5, n=10)
arrays = setup_random_flux_derivative_arrays_on_device(ctx_factory, params)
kernel_compute_flux_derivatives(queue, prg, params, arrays)
# This lets you run 'python test.py test_case(cl._csc)' without pytest.
if __name__ == "__main__":
if len(sys.argv) > 1:
Timothy A. Smith
committed
logging.basicConfig(level="INFO")
exec(sys.argv[1])
else: