Skip to content
Snippets Groups Projects
Commit de8e8429 authored by Andreas Klöckner's avatar Andreas Klöckner
Browse files

Improvements to FEM assembly.

parent 7f257cfb
No related branches found
No related tags found
No related merge requests found
......@@ -443,7 +443,8 @@ def precompute(kernel, subst_name, dtype, sweep_inames=[],
sub_map = SubstitutionCallbackMapper([subst_name], do_substs)
for insn in kernel.instructions:
new_insns.append(insn.copy(expression=sub_map(insn.expression)))
new_insn = insn.copy(expression=sub_map(insn.expression))
new_insns.append(new_insn)
new_substs = dict(
(s.name, s.copy(expression=sub_map(s.expression)))
......
......@@ -167,6 +167,8 @@ def dump_schedule(schedule):
entries.append("</%s>" % sched_item.iname)
elif isinstance(sched_item, RunInstruction):
entries.append(sched_item.insn_id)
elif isinstance(sched_item, Barrier):
entries.append("|")
else:
assert False
......@@ -270,6 +272,7 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b
and len(schedule) >= debug.debug_length):
debug_mode = True
#print dump_schedule(schedule), len(schedule)
if debug_mode:
print kernel
print "--------------------------------------------"
......@@ -293,8 +296,6 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b
reachable_insn_ids = set()
for insn_id in all_insn_ids - scheduled_insn_ids:
if debug_mode:
print insn_id
insn = kernel.id_to_insn[insn_id]
schedule_now = set(insn.insn_deps) <= scheduled_insn_ids
......
from __future__ import division
import numpy as np
import numpy.linalg as la
import pyopencl as cl
import pyopencl.array as cl_array
import loopy as lp
from pyopencl.tools import pytest_generate_tests_for_pyopencl \
......@@ -17,8 +15,6 @@ def test_laplacian_stiffness(ctx_factory):
ctx = ctx_factory()
order = "C"
# FIXME: make dim-independent
dim = 2
Nq = 40 # num. quadrature points
......@@ -32,14 +28,97 @@ def test_laplacian_stiffness(ctx_factory):
knl = lp.make_kernel(ctx.devices[0],
"[Nc] -> {[K,i,j,q, ax_a, ax_b, ax_c]: 0<=K<Nc and 0<=i,j<%(Nb)d and 0<=q<%(Nq)d "
"and 0<= ax_a, ax_b, ax_c < %(dim)d}"
"and 0<= ax_c < %(dim)d}"
% dict(Nb=Nb, Nq=Nq, dim=dim),
[
"dPsi(a, dxi) := sum_float32(ax_c,"
" jacInv[ax_c,dxi,K,q] * DPsi[ax_c,a,q])",
"A[K, i, j] = sum_float32(q, w[q] * jacDet[K,q] * ("
"dPsi(0,0)*dPsi(1,0) + dPsi(0,1)*dPsi(1,1)))"
],
[
lp.ArrayArg("jacInv", dtype, shape=(dim, dim, Nc_sym, Nq), order=order),
lp.ConstantArrayArg("DPsi", dtype, shape=(dim, Nb, Nq), order=order),
lp.ArrayArg("jacDet", dtype, shape=(Nc_sym, Nq), order=order),
lp.ConstantArrayArg("w", dtype, shape=(Nq, dim), order=order),
lp.ArrayArg("A", dtype, shape=(Nc_sym, Nb, Nb), order=order),
lp.ScalarArg("Nc", np.int32, approximately=1000),
],
name="lapquad", assumptions="Nc>=1")
#knl = lp.tag_dimensions(knl, dict(ax_c="unr"))
seq_knl = knl
#print lp.preprocess_kernel(seq_knl)
#1/0
def variant_1(knl):
# no ILP across elements
knl = lp.split_dimension(knl, "K", 16, outer_tag="g.0", slabs=(0,1))
knl = lp.tag_dimensions(knl, {"i": "l.0", "j": "l.1"})
knl = lp.add_prefetch(knl, 'jacInv',
["jacInv_dim_0", "jacInv_dim_1", "K_inner", "q"])
return knl
def variant_2(knl):
# with ILP across elements
knl = lp.split_dimension(knl, "K", 16, outer_tag="g.0", slabs=(0,1))
knl = lp.split_dimension(knl, "K_inner", 4, inner_tag="ilp")
knl = lp.tag_dimensions(knl, {"i": "l.0", "j": "l.1"})
knl = lp.add_prefetch(knl, "jacInv",
["jacInv_dim_0", "jacInv_dim_1", "K_inner_inner", "K_inner_outer", "q"])
return knl
def variant_3(knl):
# no ILP across elements, precompute dPsiTransf
knl = lp.split_dimension(knl, "K", 16, outer_tag="g.0", slabs=(0,1))
knl = lp.tag_dimensions(knl, {"i": "l.0", "j": "l.1"})
knl = lp.precompute(knl, "dPsi", np.float32,)
#default_tag=None)
knl = lp.add_prefetch(knl, "jacInv",
["jacInv_dim_0", "jacInv_dim_1", "K_inner", "q"])
return knl
#for variant in [variant_1, variant_2]:
for variant in [variant_3]:
kernel_gen = lp.generate_loop_schedules(variant(knl),
loop_priority=["jacInv_dim_0", "jacInv_dim_1"])
kernel_gen = lp.check_kernels(kernel_gen, dict(Nc=Nc))
lp.auto_test_vs_seq(seq_knl, ctx, kernel_gen,
op_count=0, op_label="GFlops",
parameters={"Nc": Nc}, print_seq_code=True,
timing_rounds=30)
def test_laplacian_stiffness_nd(ctx_factory):
dtype = np.float32
ctx = ctx_factory()
order = "C"
1/0 # still encounters a dependency bug
dim = 2
Nq = 40 # num. quadrature points
Nc = 1000 # num. cells
Nb = 20 # num. basis functions
# K - run-time symbolic
from pymbolic import var
Nc_sym = var("Nc")
knl = lp.make_kernel(ctx.devices[0],
"[Nc] -> {[K,i,j,q, ax_a, ax_b, ax_c]: 0<=K<Nc and 0<=i,j<%(Nb)d and 0<=q<%(Nq)d "
"and 0<= ax_a, ax_b, ax_c < %(dim)d}"
% dict(Nb=Nb, Nq=Nq, dim=dim),
[
"dPsi(a, dxi) := sum_float32(@ax_c,"
" jacInv[ax_c,dxi,K,q] * DPsi[ax_c,a,q])",
"A[K, i, j] = sum_float32(q, w[q] * jacDet[K,q] * ("
"sum_float32(ax_b, dPsi(0,ax_b)*dPsi(1,ax_b))))"
],
[
lp.ArrayArg("jacInv", dtype, shape=(dim, dim, Nc_sym, Nq), order=order),
......@@ -58,7 +137,7 @@ def test_laplacian_stiffness(ctx_factory):
# no ILP across elements
knl = lp.split_dimension(knl, "K", 16, outer_tag="g.0", slabs=(0,1))
knl = lp.tag_dimensions(knl, {"i": "l.0", "j": "l.1"})
knl = lp.add_prefetch(knl, 'jacInv',
knl = lp.add_prefetch(knl, 'jacInv',
["jacInv_dim_0", "jacInv_dim_1", "K_inner", "q"])
return knl
......@@ -67,7 +146,7 @@ def test_laplacian_stiffness(ctx_factory):
knl = lp.split_dimension(knl, "K", 16, outer_tag="g.0", slabs=(0,1))
knl = lp.split_dimension(knl, "K_inner", 4, inner_tag="ilp")
knl = lp.tag_dimensions(knl, {"i": "l.0", "j": "l.1"})
knl = lp.add_prefetch(knl, "jacInv",
knl = lp.add_prefetch(knl, "jacInv",
["jacInv_dim_0", "jacInv_dim_1", "K_inner_inner", "K_inner_outer", "q"])
return knl
......@@ -77,14 +156,17 @@ def test_laplacian_stiffness(ctx_factory):
knl = lp.tag_dimensions(knl, {"i": "l.0", "j": "l.1"})
knl = lp.precompute(knl, "dPsi",
["a", "dxi"])
knl = lp.add_prefetch(knl, "jacInv",
knl = lp.add_prefetch(knl, "jacInv",
["jacInv_dim_0", "jacInv_dim_1", "K_inner", "q"])
return knl
for variant in [variant_1, variant_2]:
#for variant in [variant_3]:
kernel_gen = lp.generate_loop_schedules(variant(knl),
loop_priority=["jacInv_dim_0", "jacInv_dim_1"])
kernel_gen = lp.generate_loop_schedules(seq_knl,#variant(knl),
loop_priority=["jacInv_dim_0", "jacInv_dim_1"],
debug=15)
for knl in kernel_gen:
pass
kernel_gen = lp.check_kernels(kernel_gen, dict(Nc=Nc))
lp.auto_test_vs_seq(seq_knl, ctx, kernel_gen,
......@@ -94,6 +176,7 @@ def test_laplacian_stiffness(ctx_factory):
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment