Newer
Older
from __future__ import absolute_import, division, print_function
import numpy as np
from scipy.interpolate import BarycentricInterpolator as Interpolator
import volumential.list1_gallery as gallery
import volumential.singular_integral_2d as squad
import loopy as lp
import islpy as isl
import six
# NOTE: Cannot use sumpy kernels at this moment since they
# do not work with multiprocess.
# e.g. with LaplaceKernel it will complain "AttributeError:
# 'LaplaceKernel' object has no attribute 'expression'"
def _self_tp(vec):
assert len(vec.shape) == 1
return vec.reshape([len(vec), 1]) * vec.reshape([1, len(vec)])
def _orthonormal(n, i):
eb = np.zeros(n)
eb[i] = 1
return eb
def constant_one(x, y=None, z=None):
return np.ones(np.array(x).shape)
"Kernel function Laplace" +
str(dim) +
"D not implemented.")
def get_cahn_hilliard(dim):
if dim != 2:
raise NotImplementedError(
"Kernel function Laplace" +
str(dim) +
"D not implemented.")
else:
def cahn_hilliard(x, y):
return 0
return cahn_hilliard
def sumpy_kernel_to_lambda(sknl):
from sympy import Symbol, symbols, lambdify
var_name_prefix = "x"
var_names = " ".join([
var_name_prefix + str(i)
for i in range(sknl.dim)
])
args = [
Symbol(var_name_prefix + str(i))
for i in range(sknl.dim)
]
def func(x, y=None, z=None):
coord = (x, y, z)
lmd = lambdify(
arg_names,
sknl.get_expression(args) *
sknl.
get_global_scaling_const())
return lmd(*coord[:sknl.dim])
return func
# {{{ table data structure
class NearFieldInteractionTable(object):
"""Class for a near-field interaction table.
A near-field interaction table stores precomputed singular integrals
on template boxes and supports transforms to actual boxes on lookup.
The query process is done through scaling the entries based on actual
box sized.
Orientations are ordered counter-clockwise.
A template box is one of [0,1]^dim
"""
# {{{ constructor
def __init__(
self,
quad_order,
method="gauss-legendre",
dim=2,
kernel_func=None,
kernel_type=None,
sumpy_kernel=None,
build_method="Transform",
"""
kernel_type determines how the kernel is scaled w.r.t. box size.
build_method can be "Transform" or "FractalSum".
"""
self.quad_order = quad_order
self.dim = dim
self.dtype = dtype
self.center = np.ones(
self.dim) * 0.5
self.build_method = build_method
if dim == 2:
# Constant kernel can be used for fun/testing
if kernel_func is None:
kernel_func = constant_one
# Kernel function differs from OpenCL's kernels
self.kernel_func = kernel_func
self.integral_knl = sumpy_kernel
if build_method == "FractalSum":
assert sumpy_kernel is not None
else:
raise NotImplemented
# number of quad points per box
# equals to the number of modes per box
self.n_q_points = self.quad_order**dim
self.mode_normalizers = np.zeros(
self.n_q_points,
dtype=self.dtype)
# number of (source_mode, target_point) pairs between two boxes
self.n_pairs = self.n_q_points**2
self.interaction_case_vecs, self.case_encode, self.case_indices \
= gallery.generate_list1_gallery(self.dim)
self.n_cases = len(
self.interaction_case_vecs)
if method == "gauss-legendre":
# quad points in [-1,1]
import volumential.meshgen as mg
q_points, _, _ = mg.make_uniform_cubic_grid(
0.5 *
(qp + np.array([1, 1]))
for qp in q_points
])
# sort in dictionary order, preserve only the leading
# digits to prevent floating point errors from polluting
# the ordering.
q_points_ordering = sorted(range(len(mapped_q_points)),
key=lambda i: list(
np.floor(
mapped_q_points[i] * 10000)
)
)
else:
raise NotImplemented
self.n_pairs * self.n_cases,
dtype=self.dtype)
total_evals = len(
self.data) + self.n_q_points
from pytools import ProgressBar
self.pb = ProgressBar(
"Building table:",
total_evals)
# }}} End constructor
# {{{ encode to table index
def get_entry_index(
self, source_mode_index,
target_point_index,
case_id):
assert (source_mode_index >= 0
and source_mode_index <
self.n_q_points)
and target_point_index <
self.n_q_points)
pair_id = source_mode_index * self.n_q_points + target_point_index
return case_id * self.n_pairs + pair_id
# }}} End encode to table index
# {{{ decode table index to entry info
"""This is the inverse function of get_entry_index()
"""
index_info = dict()
case_id = entry_id // self.n_pairs
pair_id = entry_id % self.n_pairs
source_mode_index = pair_id // self.n_q_points
target_point_index = pair_id % self.n_q_points
index_info[
"case_index"] = case_id
index_info[
"source_mode_index"] = source_mode_index
index_info[
"target_point_index"] = target_point_index
return index_info
# }}} End decode table index to entry info
# {{{ basis modes in the template box
def get_mode(self, mode_index):
assert (mode_index >= 0 and
mode_index <
self.n_q_points)
# NOTE: these two lines should be changed
# in accordance with the mesh generator
idx0 = mode_index // self.quad_order
idx1 = mode_index % self.quad_order
for p in self.
q_points[:self.quad_order]
])
assert (
len(xi) == self.quad_order)
y0i = np.zeros(
self.quad_order,
dtype=self.dtype)
y1i = np.zeros(
self.quad_order,
dtype=self.dtype)
y1i[idx1] = 1
interp_x = Interpolator(xi, y0i)
interp_y = Interpolator(xi, y1i)
def mode(x, y):
return np.multiply(
interp_x(np.array(x)),
interp_y(np.array(y)))
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
def get_mode_cheb_coeffs(self, mode_index, cheb_order):
import scipy.special as sps
cheby_nodes, _, cheby_weights = sps.chebyt(cheb_order).weights.T
window = [0,1]
cheby_nodes = cheby_nodes * (window[1] - window[0]) / 2 + np.mean(window)
cheby_weights = cheby_weights * (window[1] - window[0]) / 2
mode = self.get_mode(mode_index)
xx, yy = np.meshgrid(cheby_nodes, cheby_nodes)
mvals = mode(xx, yy)
from numpy.polynomial.chebyshev import Chebyshev
coef_scale = 2 * np.ones(cheb_order) / cheb_order
coef_scale[0] /= 2
basis_1d = np.array([Chebyshev(
coef=_orthonormal(cheb_order, i),
domain=window
)(cheby_nodes) for i in range(cheb_order)])
from itertools import product
basis_2d = np.array([
b1.reshape([cheb_order, 1]) * b2.reshape([1, cheb_order])
for b1, b2 in product(basis_1d, basis_1d)])
mode_cheb_coeffs = np.array([
np.sum(mvals * basis) for basis in basis_2d
]) * _self_tp(coef_scale).reshape(-1)
# purge small coeffs that are 15 digits away
mm = np.max(np.abs(mode_cheb_coeffs))
mode_cheb_coeffs[np.abs(mode_cheb_coeffs) < mm * 1e-15] = 0
return mode_cheb_coeffs
# }}} End basis modes in the template box
# {{{ build table via transform
def get_symmetry_transform(
self, source_mode_index):
"""Apply proper transforms to map source mode to a reduced region
Returns:
- a transform that can be applied on the interaction case
vectors connection box centers.
- a transform that can be applied to the mode/point indices.
"""
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
# q_points must be sorted in (ascending) dictionary order
k = np.zeros(self.dim)
resid = source_mode_index
for d in range(
-1, -1 - self.dim, -1):
k[d] = resid % self.quad_order
resid = resid // self.quad_order
s1 = np.sign((self.quad_order -
0.5) / 2 - k)
for d in range(self.dim):
if s1[d] < 0:
k[d] = self.quad_order - 1 - k[d]
s2 = sorted(
range(len(k)),
key=lambda i: abs(k[i]))
def symmetry_transform(vec):
nv = np.array(vec) * s1
return nv[s2]
def qpoint_index_transform(
index):
k = np.zeros(
self.dim, dtype=int)
resid = index
for d in range(
-1, -1 - self.dim,
-1):
k[d] = resid % self.quad_order
resid = resid // self.quad_order
assert (resid == 0)
for d in range(self.dim):
if s1[d] < 0:
k[d] = self.quad_order - 1 - k[d]
k = k[s2]
new_id = 0
for d in range(self.dim):
new_id = new_id * int(
self.quad_order
) + k[d]
return new_id
return (symmetry_transform,
qpoint_index_transform)
def find_target_point(
self, target_point_index,
case_index):
"""Apply proper transforms to find the target point's coordinate.
Only translations and scalings are allowed in this step, avoiding the
indices of quad points to be messed up.
"""
assert (target_point_index >= 0
and target_point_index <
self.n_q_points)
# rescale to source box with size 1x1
vec = np.array(
self.interaction_case_vecs[
case_index]) / 4.0
new_cntr = np.ones(
self.dim, dtype=self.dtype
) * 0.5 + vec
if int(max(abs(vec)) * 4) == 0:
new_size = 1
else:
new_size = max([
abs(l) - 0.5
for l in vec
return new_cntr + new_size * (
self.
q_points[target_point_index]
- self.center)
def lookup_by_symmetry(self,
entry_id):
"""Loop up table entry that is mapped to a region where:
- k_i <= q/2 in all direction i
- k_i's are sorted in ascending order
entry_info = self.decode_index(
entry_id)
# target_point_index=
# entry_info[
# "target_point_index"],
# case_index=entry_info[
# "case_index"])
vec_map, qp_map = self.get_symmetry_transform(
entry_info[
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
# mapped (canonical) case_id
case_vec = self.interaction_case_vecs[
entry_info["case_index"]]
cc_vec = vec_map(case_vec)
cc_id = self.case_indices[
self.case_encode(cc_vec)]
cs_id = qp_map(entry_info[
"source_mode_index"])
ct_id = qp_map(entry_info[
"target_point_index"])
centry_id = self.get_entry_index(
cs_id, ct_id, cc_id)
return centry_id
def compute_table_entry(self,
entry_id):
"""Compute one entry in the table indexed by self.data[entry_id]
Input kernel function should be centered at origin.
"""
entry_info = self.decode_index(
entry_id)
source_mode = self.get_mode(
entry_info[
"source_mode_index"])
target_point = self.find_target_point(
"target_point_index"],
case_index=entry_info[
"case_index"])
# source_point = (
# self.q_points[entry_info[
# "source_mode_index"]])
# print(source_mode(source_point[0], source_point[1]))
if self.dim == 2:
def integrand(x, y):
return source_mode(
x, y
) * self.kernel_func(
x - target_point[0],
y - target_point[1])
integral, error = squad.box_quad(
func=integrand,
a=0,
b=1,
c=0,
d=1,
# tol=1e-10,
# rtol=1e-10,
# miniter=300,
maxiter=301)
else:
raise NotImplemented
def compute_nmlz(self, mode_id):
mode_func = self.get_mode(
mode_id)
nmlz, _ = squad.qquad(
func=mode_func,
a=0,
b=1,
c=0,
tol=1e-15,
rtol=1e-15,
minitero=25,
miniteri=25,
def build_normalizer_table(self, pool=None, pb=None):
"""
Build normalizers only
"""
if pool is None:
from multiprocess import Pool
pool = Pool(processes=None)
for mode_id, nmlz in pool.imap_unordered(
self.compute_nmlz, [
i
for i in range(
self.n_q_points)
]):
self.mode_normalizers[
mode_id] = nmlz
if pb is not None:
pb.progress(1)
def build_table_via_transform(self):
"""
Build the full data table using transforms to
remove the singularity.
# multiprocessing cannot handle member functions
from multiprocess import Pool
self.build_normalizer_table(pool, pb=self.pb)
# First compute entries that are invariant under
# symmetry lookup
invariant_entry_ids = [
i
for i in range(
len(self.data))
if self.lookup_by_symmetry(
i) == i
]
for entry_id, entry_val in pool.imap_unordered(
self.
compute_table_entry,
invariant_entry_ids):
self.data[
entry_id] = entry_val
self.pb.progress(1)
# Then complete the table via symmetry lookup
for entry_id, centry_id in enumerate(
i
for i in range(
len(
self.
data))
# }}} End build table via transform
# {{{ build table via adding up fractal bricks
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
def get_sumpy_kernel_insns(self):
from sumpy.symbolic import make_sym_vector
dvec = make_sym_vector("dist", self.dim)
from sumpy.assignment_collection import SymbolicAssignmentCollection
sac = SymbolicAssignmentCollection()
result_name = sac.assign_unique("knl_val",
self.integral_knl.postprocess_at_target(
self.integral_knl.postprocess_at_source(
self.integral_knl.get_expression(dvec),
dvec),
dvec)
)
sac.run_global_cse()
from sumpy.codegen import to_loopy_insns
loopy_insns = to_loopy_insns(six.iteritems(sac.assignments),
vector_names=set(["dist"]),
pymbolic_expr_maps=[
self.integral_knl.get_code_transformer()],
retain_names=[result_name],
complex_dtype=np.complex128 # FIXME
)
# for i in loopy_insns:
# print(i)
return loopy_insns
def build_table_via_fractal_bricks(self,
n_brick_quad_points=15, alpha=0.5, queue=None):
# FIXME: take advantage of symmetry? (Yet bad for vectorization)
if queue == None:
import pyopencl as cl
cl_ctx = cl.create_some_context(interactive=True)
queue = cl.CommandQueue(cl_ctx)
d = self.dim
integral_knl = self.integral_knl # sumpy kernel, the integral kernel
nfunctions = max(self.quad_order, 3) # number of box-wide 1D Cheby modes
ntgt_points = self.quad_order # number of 1D target points
nquad_points = n_brick_quad_points # number of 1D brick quad points
case_vecs = np.array([
[v[0] for v in self.interaction_case_vecs],
[v[1] for v in self.interaction_case_vecs]])
case_scls = np.array([
1 if int(max(abs(np.array(vec)))) == 0
else max([abs(l) - 0.5 for l in np.array(vec)/4]) * 2
for vec in self.interaction_case_vecs])
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
# print(case_vecs, case_scls)
# {{{ build loop domain
tgt_vars = ["t%d" % i for i in range(d)]
basis_vars = ["f%d" % i for i in range(d)]
quad_vars = ["q%d" % i for i in range(d)]
basis_eval_vars = ["p%d" % i for i in range(d)]
pwaffs = isl.make_zero_and_vars(
tgt_vars + basis_vars + basis_eval_vars + quad_vars
+ ["iside", "iaxis", "ibrick_side", "ibrick_axis", "ilevel", "icase"],
["nlevels"])
def make_brick_domain(pwaffs, variables, n, lbound=0):
if isinstance(n, str):
ubound_pwaff = pwaffs[n]
else:
ubound_pwaff = pwaffs[0] + n
from functools import reduce
import operator
return reduce(operator.and_, [
(pwaffs[0]+lbound).le_set(pwaffs[var]) for var in variables
] + [
pwaffs[var].lt_set(ubound_pwaff) for var in variables
])
domain = (
make_brick_domain(pwaffs, tgt_vars, ntgt_points)
& make_brick_domain(pwaffs, quad_vars, nquad_points)
& make_brick_domain(pwaffs, basis_vars, nfunctions)
& make_brick_domain(pwaffs, basis_eval_vars, nfunctions, lbound=2)
& make_brick_domain(pwaffs, ["iside"], 2)
& make_brick_domain(pwaffs, ["iaxis"], d)
& make_brick_domain(pwaffs, ["ibrick_side"], 2)
& make_brick_domain(pwaffs, ["ibrick_axis"], d)
& make_brick_domain(pwaffs, ["ilevel"], "nlevels")
& make_brick_domain(pwaffs, ["icase"], len(case_vecs[0]))
)
# print(domain)
# }}} End build loop domain
# {{{ sumpy kernel eval
# integral kernel evaluation
knl_loopy_insns = self.get_sumpy_kernel_insns()
quad_inames = frozenset(["icase", "t0", "t1",
"f0", "f1", "ilevel", "ibrick_axis", "ibrick_side", "q0", "q1"])
quad_kernel_insns = [
insn.copy(
within_inames=insn.within_inames | quad_inames
)
for insn in knl_loopy_insns]
from sumpy.symbolic import SympyToPymbolicMapper
sympy_conv = SympyToPymbolicMapper()
scaling_assignment = lp.Assignment(id=None,
assignee="knl_scaling",
expression=sympy_conv(integral_knl.get_global_scaling_const()),
temp_var_type=lp.auto)
# }}} End sumpy kernel eval
# {{{ loopy kernel for heavy-lifting
brick_map_knl = lp.make_kernel(
[domain],
["""
for iaxis
<> root_center[iaxis] = 0.5 * (
root_brick[iaxis, 1] + root_brick[iaxis, 0]) {dup=iaxis}
<> root_extent[iaxis] = (root_brick[iaxis, 1]
- root_brick[iaxis, 0]) {dup=iaxis}
end
for ilevel, f0, f1, t0, t1, icase
# Targets outside projected onto the boundary
for iaxis
if iaxis == 0
<> template_target[iaxis] = target_nodes[t0] {id=tplt_tgt1,dup=iaxis}
else
template_target[iaxis] = target_nodes[t1] {id=tplt_tgt2,dup=iaxis}
end
end
... nop {id=tplt_tgt_pre,dep=tplt_tgt1:tplt_tgt2}
# True targets are used for kernel evaluation
for iaxis
<> true_target[iaxis] = ( root_center[iaxis]
+ interaction_case_vecs[iaxis, icase]
* 0.25 * root_extent[iaxis]
+ interaction_case_scls[icase]
* (template_target[iaxis] - 0.5)
* root_extent[iaxis]
) {id=true_targets,dup=iaxis,dep=tplt_tgt_pre}
end
# Projected targets are used for brick construction
for iaxis
<> target[iaxis] = if(
true_target[iaxis] > root_brick[iaxis, 1],
root_brick[iaxis, 1],
if(
true_target[iaxis] < root_brick[iaxis, 0],
root_brick[iaxis, 0],
true_target[iaxis])) {dup=iaxis,dep=true_targets}
end
for iaxis
template_target[iaxis] = (0.5
+ interaction_case_vecs[iaxis, icase] * 0.25
+ interaction_case_scls[icase] * (template_target[iaxis] - 0.5)
) {id=tplt_tgt3,dup=iaxis,dep=tplt_tgt_pre:true_targets}
end
for iaxis
template_target[iaxis] = if(
template_target[iaxis] > 1,
1,
if(
template_target[iaxis] < 0,
0,
template_target[iaxis])) {id=tplt_tgt4,dup=iaxis,dep=tplt_tgt3}
end
... nop {id=tplt_tgt,dep=tplt_tgt_pre:tplt_tgt4:true_targets}
# FIXME: commenting out either of the 2 below ruins the result
# Debug output
for iaxis
projected_template_targets[t0, t1, icase, iaxis] = \
template_target[iaxis] {dup=iaxis,dep=tplt_tgt}
end
# Debug output
for iaxis
target_points[ilevel, f0, f1, t0, t1, icase, iaxis
] = true_target[iaxis] {dup=iaxis,dep=true_targets}
end
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
for iaxis, iside
<> outer_brick[iaxis, iside] = (
(alpha**ilevel)*root_brick[iaxis,iside]
+ (1-alpha**ilevel) * target[iaxis]) {dup=iaxis:iside}
<> inner_brick[iaxis, iside] = (
alpha*outer_brick[iaxis,iside]
+ (1-alpha) * target[iaxis]) {dup=iaxis:iside}
end
for iaxis
<> ob_ext[iaxis] = (outer_brick[iaxis, 1]
- outer_brick[iaxis, 0]) {dup=iaxis}
end
for ibrick_axis, ibrick_side, q0, q1
for iaxis
<> point[iaxis] = \
if(iaxis == 0,
quadrature_nodes[q0],
quadrature_nodes[q1]) {dup=iaxis}
end
<> deform = point[ibrick_axis]*(1-alpha)
for iaxis
if iaxis == ibrick_axis
<> mapped_point_tmp[iaxis] = (
point[ibrick_axis]*(
inner_brick[ibrick_axis, ibrick_side]
- outer_brick[ibrick_axis, ibrick_side])
+ outer_brick[ibrick_axis, ibrick_side]) \
{id=mpoint1}
else
<> pre_scale = (
point[iaxis]
+ deform*(template_target[iaxis]-point[iaxis])
) {dep=tplt_tgt}
mapped_point_tmp[iaxis] = \
ob_ext[iaxis] * pre_scale + outer_brick[iaxis, 0] \
{id=mpoint2}
end
# Re-map the mapped points to [-1,1]^2 for Chebyshev evals
<> template_mapped_point_tmp[iaxis] = 0.0 + (
mapped_point_tmp[iaxis] - root_center[iaxis]
) / root_extent[iaxis] * 2
... nop {id=mpoint,dep=mpoint1:mpoint2}
#mapped_points[ilevel, ibrick_axis,
# ibrick_side, t0, t1, icase, iaxis, q0, q1] = \
# mapped_point_tmp[iaxis] {dep=mpoint}
#mapped_target[t0, t1, icase, iaxis] = target[iaxis]
end
for iaxis
if iaxis == ibrick_axis
<> jac_part = (
inner_brick[ibrick_axis, ibrick_side]
- outer_brick[ibrick_axis, ibrick_side]
) {id=jpart1}
else
jac_part = ob_ext[iaxis] * (1-deform) {id=jpart2}
end
end
<> jacobian = abs(product(iaxis,
jac_part)) {id=jac,dep=jpart1:jpart2}
<> dist[iaxis] = (true_target[iaxis]
- mapped_point_tmp[iaxis]) {dep=mpoint:true_targets}
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
end
end
for f0, f1, t0, t1, icase
for ilevel
for ibrick_axis, ibrick_side, q0, q1
<> T0_0 = 1
<> T1_0 = template_mapped_point_tmp[0] {dep=mpoint}
<> Tprev_0 = T0_0 {id=t0_0}
<> Tcur_0 = T1_0 {id=t1_0,dep=t0_0}
for p0
<> Tnext_0 = (2 * template_mapped_point_tmp[0] * Tcur_0
- Tprev_0) {id=tnext0,dep=t1_0}
Tprev_0 = Tcur_0 {id=tprev_update0,dep=tnext0}
Tcur_0 = Tnext_0 {id=tcur_update0,dep=tprev_update0}
end
<> T0_1 = 1
<> T1_1 = template_mapped_point_tmp[1] {dep=mpoint}
<> Tprev_1 = T0_1 {id=t0_1}
<> Tcur_1 = T1_1 {id=t1_1,dep=t0_1}
for p1
<> Tnext_1 = (2 * template_mapped_point_tmp[1] * Tcur_1
- Tprev_1) {id=tnext1,dep=t1_1}
Tprev_1 = Tcur_1 {id=tprev_update1,dep=tnext1}
Tcur_1 = Tnext_1 {id=tcur_update1,dep=tprev_update1}
end
<> basis_eval0 = (
T0_0 * if(f0 == 0, 1, 0)
+ T1_0 * if(f0 == 1, 1, 0)
+ sum(p0, if(f0 == p0, Tcur_0, 0))
) {id=basis0,dep=tcur_update0}
<> basis_eval1 = (
T0_1 * if(f1 == 0, 1, 0)
+ T1_1 * if(f1 == 1, 1, 0)
+ sum(p1, if(f1 == p1, Tcur_1, 0))
) {id=basis1,dep=tcur_update1}
<> density_val = basis_eval0 * basis_eval1 \
{id=density,dep=basis0:basis1}
end
end
end
for f0, f1, t0, t1, icase
# the first index is contiguous
result[f1, f0, t1, t0, icase] = (sum(
(ilevel, ibrick_axis, ibrick_side, q0, q1),
quadrature_weights[q0]
* quadrature_weights[q1]
* jacobian
* knl_val
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
)
* knl_scaling
) {dep=jac:mpoint:density}
end
"""]
+ quad_kernel_insns + [scaling_assignment]
, [
lp.ValueArg("alpha", np.float64),
lp.ValueArg("n_cases, dim", np.int32),
lp.GlobalArg("interaction_case_vecs", np.float64, "dim, n_cases"),
lp.GlobalArg("interaction_case_scls", np.float64, "n_cases"),
"..."
],
name="brick_map")
brick_map_knl = lp.fix_parameters(brick_map_knl, d=d)
brick_map_knl = lp.set_options(brick_map_knl, write_cl=False)
brick_map_knl = lp.set_options(brick_map_knl, return_dict=True)
# }}} End loopy kernel for heavy-lifting
root_brick = np.zeros((d, 2))
root_brick[:, 1] = 1
root_displacement = 0.5 * sum(root_brick[0])
root_extent = root_brick[0,1] - root_brick[0,0]
# target points in 1D
t = np.array([ pt[1] for pt in self.q_points[:ntgt_points]])
# quad formula for each brick
import scipy.special as sps
legendre_nodes, _, legendre_weights = sps.legendre(
nquad_points).weights.T
legendre_nodes = legendre_nodes*0.5 + 0.5
legendre_weights = legendre_weights * 0.5
# target_nodes, quadrature_nodes and quadrature weights are
# the templates in [0,1] unit interval.
# They will be mapped accordingly to the bricks.
print("Running loopy kernel")
evt, res = brick_map_knl(
queue, alpha=alpha,
root_brick=root_brick,
target_nodes=t.astype(np.float64, copy=True),
quadrature_nodes=legendre_nodes,
quadrature_weights=legendre_weights,
interaction_case_vecs=case_vecs.astype(np.float64, copy=True),
interaction_case_scls=case_scls.reshape(-1).astype(np.float64,
copy=True),
n_cases=len(case_vecs[0]),
nlevels=30,
dim=2)