Skip to content
Snippets Groups Projects
Commit 055f2835 authored by Andreas Klöckner's avatar Andreas Klöckner
Browse files

Add benchmark code

parent 8091ec43
No related branches found
No related tags found
No related merge requests found
......@@ -47,22 +47,7 @@ def test_compute_flux_derivatives(ctx_factory):
metric_jacobians=metric_jacobians)
def test_compute_flux_derivatives_gpu(ctx_factory):
logging.basicConfig(level="INFO")
queue = f.get_queue(ctx_factory)
ndim = 3
nvars = 5
nx = 10
ny = 10
nz = 10
states = f.random_array(nvars, nx+6, ny+6, nz+6)
fluxes = f.random_array(nvars, ndim, nx+6, ny+6, nz+6)
metrics = f.random_array(ndim, ndim, nx+6, ny+6, nz+6)
metric_jacobians = f.random_array(nx+6, ny+6, nz+6)
def get_gpu_transformed_weno():
prg = f.prg
cfd = prg["compute_flux_derivatives"]
......@@ -95,6 +80,8 @@ def test_compute_flux_derivatives_gpu(ctx_factory):
prg = prg.with_kernel(cfd)
# FIXME: These should work, but don't
# FIXME: Undo the hand-inlining in WENO.F90
#prg = lp.inline_callable_kernel(prg, "convert_to_generalized")
#prg = lp.inline_callable_kernel(prg, "convert_from_generalized")
......@@ -102,10 +89,31 @@ def test_compute_flux_derivatives_gpu(ctx_factory):
print(prg["convert_to_generalized_frozen"])
1/0
return prg
def test_compute_flux_derivatives_gpu(ctx_factory):
logging.basicConfig(level="INFO")
prg = get_gpu_transformed_weno()
queue = f.get_queue(ctx_factory)
ndim = 3
nvars = 5
nx = 10
ny = 10
nz = 10
states = f.random_array(nvars, nx+6, ny+6, nz+6)
fluxes = f.random_array(nvars, ndim, nx+6, ny+6, nz+6)
metrics = f.random_array(ndim, ndim, nx+6, ny+6, nz+6)
metric_jacobians = f.random_array(nx+6, ny+6, nz+6)
flux_derivatives_dev = cl.array.empty(queue, (nvars, ndim, nx+6, ny+6,
nz+6), dtype=np.float32, order="F")
if 0:
if 1:
with open("gen-code.cl", "w") as outf:
outf.write(lp.generate_code_v2(prg).device_code())
......@@ -113,11 +121,66 @@ def test_compute_flux_derivatives_gpu(ctx_factory):
states=states, fluxes=fluxes, metrics=metrics,
metric_jacobians=metric_jacobians,
flux_derivatives=flux_derivatives_dev)
return flux_derivatives_dev.get()
# This lets you run 'python test.py test_case(cl._csc)' without pytest.
def benchmark_compute_flux_derivatives_gpu(ctx_factory):
logging.basicConfig(level="INFO")
prg = get_gpu_transformed_weno()
queue = f.get_queue(ctx_factory)
ndim = 3
nvars = 5
n = 100
nx = n
ny = n
nz = n
states = f.random_array(nvars, nx+6, ny+6, nz+6)
fluxes = f.random_array(nvars, ndim, nx+6, ny+6, nz+6)
metrics = f.random_array(ndim, ndim, nx+6, ny+6, nz+6)
metric_jacobians = f.random_array(nx+6, ny+6, nz+6)
flux_derivatives_dev = cl.array.empty(queue, (nvars, ndim, nx+6, ny+6,
nz+6), dtype=np.float32, order="F")
if 0:
with open("gen-code.cl", "w") as outf:
outf.write(lp.generate_code_v2(prg).device_code())
prg = prg.copy(target=lp.PyOpenCLTarget(queue.device))
prg = lp.set_options(prg, ignore_boostable_into=True, write_wrapper=True)
#op_map = lp.get_op_map(prg, count_redundant_work=False)
#print(op_map)
from functools import partial
run = partial(prg, queue, nvars=nvars, ndim=ndim,
states=states, fluxes=fluxes, metrics=metrics,
metric_jacobians=metric_jacobians,
flux_derivatives=flux_derivatives_dev)
print("warmup")
for iwarmup_round in range(2):
run()
nrounds = 10
queue.finish()
print("timing")
from time import time
start = time()
for iround in range(nrounds):
run()
queue.finish()
one_round = (time() - start)/nrounds
print(f"DOFs/s: {n**3/one_round}, elapsed per round: {one_round} s")
# This lets you run 'python test.py test_case(cl._csc)' without pytest.
if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment