From 110a0e9e21f7ed86580fd2bd7bfecc9a56fcba76 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 16 Jul 2013 17:28:44 -0400 Subject: [PATCH] Track loopy changes, move toward Helmholtz kernels, modernize code generators --- sumpy/__init__.py | 30 +++++ sumpy/codegen.py | 5 + sumpy/e2e.py | 167 +++++++++++++++++++++++++ sumpy/e2p.py | 23 ++-- sumpy/expansion/__init__.py | 3 - sumpy/kernel.py | 25 ++-- sumpy/p2e.py | 4 +- sumpy/p2p.py | 6 +- sumpy/qbx.py | 3 +- sumpy/tools.py | 27 ++-- test/test_kernels.py | 237 ++++++++++++++++++++---------------- 11 files changed, 365 insertions(+), 165 deletions(-) create mode 100644 sumpy/e2e.py diff --git a/sumpy/__init__.py b/sumpy/__init__.py index e69de29b..279f6683 100644 --- a/sumpy/__init__.py +++ b/sumpy/__init__.py @@ -0,0 +1,30 @@ +from __future__ import division + +__copyright__ = "Copyright (C) 2013 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from sumpy.p2p import P2P +from sumpy.p2e import P2E +from sumpy.e2p import E2P +from sumpy.e2e import E2E + +__all__ = ["P2P", "P2E", "E2P", "E2E"] diff --git a/sumpy/codegen.py b/sumpy/codegen.py index a232bf58..60669057 100644 --- a/sumpy/codegen.py +++ b/sumpy/codegen.py @@ -86,6 +86,11 @@ hank1_01_result hank1_01(cdouble_t z) } """ + +def bessel_preamble_generator(seen_dtypes, seen_functions): + if "hank1_01" in seen_functions: + yield ("sumpy-bessel", BESSEL_PREAMBLE) + hank1_01_result_dtype = cl.tools.get_or_register_dtype("hank1_01_result", np.dtype([ ("order0", np.complex128), diff --git a/sumpy/e2e.py b/sumpy/e2e.py new file mode 100644 index 00000000..f6b3b293 --- /dev/null +++ b/sumpy/e2e.py @@ -0,0 +1,167 @@ +from __future__ import division + +__copyright__ = "Copyright (C) 2013 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import numpy as np +import loopy as lp +import sympy as sp +from pytools import memoize_method + + +class E2E(object): + def __init__(self, ctx, src_expansion, tgt_expansion, + options=[], name="e2e", device=None): + """ + :arg expansion: a subclass of :class:`sympy.expansion.ExpansionBase` + :arg strength_usage: A list of integers indicating which expression + uses which source strength indicator. This implicitly specifies the + number of strength arrays that need to be passed. + Default: all kernels use the same strength. + """ + + if device is None: + device = ctx.devices[0] + + self.ctx = ctx + self.src_expansion = src_expansion + self.tgt_expansion = tgt_expansion + self.options = options + self.name = name + self.device = device + + if src_expansion.dim != tgt_expansion.dim: + raise ValueError("source and target expansions must have " + "same dimensionality") + + self.dim = src_expansion.dim + + @memoize_method + def get_kernel(self): + from sumpy.symbolic import make_sym_vector + dvec = make_sym_vector("d", self.dim) + + ncoeff_src = len(self.src_expansion.get_coefficient_identifiers()) + ncoeff_tgt = len(self.tgt_expansion.get_coefficient_identifiers()) + + src_coeff_exprs = [sp.Symbol("src_coeff%d" % i) + for i in xrange(ncoeff_src)] + + from sumpy.assignment_collection import SymbolicAssignmentCollection + sac = SymbolicAssignmentCollection() + tgt_coeff_names = [ + sac.assign_unique("coeff%d" % i, coeff_i) + for i, coeff_i in enumerate( + self.tgt_expansion.translate_from( + self.src_expansion, src_coeff_exprs, dvec))] + + sac.run_global_cse() + + from sumpy.symbolic import kill_trivial_assignments + assignments = kill_trivial_assignments([ + (name, expr) + for name, expr in sac.assignments.iteritems()], + retain_names=tgt_coeff_names) + + from sumpy.codegen import to_loopy_insns + loopy_insns = to_loopy_insns(assignments, + vector_names=set(["d"]), + pymbolic_expr_maps=[self.tgt_expansion.transform_to_code], + complex_dtype=np.complex128 # FIXME + ) + + from sumpy.tools import gather_arguments + arguments = ( + [ + lp.GlobalArg("centers", None, shape="dim, nboxes"), + lp.GlobalArg("src_expansions", None, + shape=("nboxes", ncoeff_src)), + lp.GlobalArg("tgt_expansions", None, + shape=("nboxes", ncoeff_tgt)), + lp.GlobalArg("src_box_starts, src_box_lists", + None, shape=None, strides=(1,)), + lp.ValueArg("nboxes", np.int32), + "..." + ] + gather_arguments([self.src_expansion, self.tgt_expansion]) + ) + + loopy_knl = lp.make_kernel(self.device, + [ + "{[itgt_box]: 0<=itgt_box tgt_ibox = target_boxes[itgt_box] + <> tgt_center[idim] = centers[idim, tgt_ibox] \ + {id=fetch_tgt_center} + + <> isrc_start = src_box_starts[itgt_box] + <> isrc_stop = src_box_starts[itgt_box+1] + + <> src_ibox = source_boxes[isrc_box] + <> src_center[idim] = centers[idim, src_ibox] \ + {id=fetch_src_center} + <> d[idim] = tgt_center[idim] - src_center[idim] + + src_coeff${SRC_COEFFIDX} = \ + src_expansions[src_ibox, ${SRC_COEFFIDX}]\ + tgt_expansions[tgt_ibox, ${TGT_COEFFIDX}] = \ + coeff${TGT_COEFFIDX} + """, + arguments, + name=self.name, assumptions="nsrc_boxes>=1", + preambles=self.expansion.get_preambles(), + defines=dict( + dim=self.dim, + SRC_COEFFIDX=[str(i) for i in xrange(ncoeff_src)], + TGT_COEFFIDX=[str(i) for i in xrange(ncoeff_tgt)], + ), + silenced_warnings="write_race(write_expn*)") + + for expn in [self.src_expansion, self.tgt_expansion]: + loopy_knl = expn.prepare_loopy_kernel(loopy_knl) + + loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "fetch_tgt_center", + tags={"idim": "unr"}) + loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) + + return loopy_knl + + @memoize_method + def get_optimized_kernel(self): + # FIXME + knl = self.get_kernel() + knl = lp.split_iname(knl, "itgt_box", 16, outer_tag="g.0") + return knl + + def __call__(self, queue, **kwargs): + """ + :arg src_expansions: + :arg src_box_starts: + :arg src_box_lists: + :arg centers: + """ + knl = self.get_optimized_kernel() + + return knl(queue, **kwargs) diff --git a/sumpy/e2p.py b/sumpy/e2p.py index 9d8ab4a4..8f40cfd0 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -81,7 +81,6 @@ class E2P(object): for name, expr in sac.assignments.iteritems()], retain_names=result_names) - from sumpy.codegen import to_loopy_insns loopy_insns = to_loopy_insns(assignments, vector_names=set(["b"]), @@ -104,7 +103,7 @@ class E2P(object): None, shape=None), lp.GlobalArg("centers", None, shape="dim, nboxes"), lp.GlobalArg("expansions", None, - shape=(len(coeff_exprs), "nboxes")), + shape=("nboxes", len(coeff_exprs))), lp.ValueArg("nboxes", np.int32), lp.ValueArg("ntargets", np.int32), "..." @@ -120,18 +119,18 @@ class E2P(object): "{[itgt,idim]: itgt_start<=itgt tgt_ibox = target_boxes[itgt_box]", - "<> itgt_start = box_target_starts[tgt_ibox]", - "<> itgt_end = itgt_start+box_target_counts_nonchild[tgt_ibox]", - "<> center[idim] = centers[idim, tgt_ibox] {id=fetch_center}", - "<> b[idim] = targets[idim, itgt] - center[idim]", - "<> coeff${COEFFIDX} = expansions[tgt_ibox, ${COEFFIDX}]", - "result_${RESULTIDX}[itgt] = kernel_scaling * result_${RESULTIDX}_p" - ], + + [""" + <> tgt_ibox = target_boxes[itgt_box] + <> itgt_start = box_target_starts[tgt_ibox] + <> itgt_end = itgt_start+box_target_counts_nonchild[tgt_ibox] + <> center[idim] = centers[idim, tgt_ibox] {id=fetch_center} + <> b[idim] = targets[idim, itgt] - center[idim] + <> coeff${COEFFIDX} = expansions[tgt_ibox, ${COEFFIDX}] + result_${RESULTIDX}[itgt] = \ + kernel_scaling * result_${RESULTIDX}_p + """], arguments, name=self.name, assumptions="ntgt_boxes>=1", - preambles=self.expansion.get_preambles(), defines=dict( dim=self.dim, COEFFIDX=[str(i) for i in xrange(len(coeff_exprs))], diff --git a/sumpy/expansion/__init__.py b/sumpy/expansion/__init__.py index 3d4ae792..ad10563b 100644 --- a/sumpy/expansion/__init__.py +++ b/sumpy/expansion/__init__.py @@ -57,9 +57,6 @@ class ExpansionBase(object): def get_args(self): return self.kernel.get_args() - def get_preambles(self): - return self.kernel.get_preambles() - # }}} def coefficients_from_source(self, expr, avec, bvec): diff --git a/sumpy/kernel.py b/sumpy/kernel.py index 1fb00031..d9133300 100644 --- a/sumpy/kernel.py +++ b/sumpy/kernel.py @@ -127,9 +127,6 @@ class Kernel(object): """ return [] - def get_preambles(self): - return [] - def __sub__(self, other): return DifferenceKernel(self, other) @@ -195,14 +192,13 @@ class HelmholtzKernel(Kernel): is_complex_valued = True def prepare_loopy_kernel(self, loopy_knl): - # does loopy_knl already know about hank1_01? - mangle_result = loopy_knl.mangle_function( - "hank1_01", (np.dtype(np.complex128),)) - from sumpy.codegen import hank1_01_result_dtype, bessel_mangler - if mangle_result is not hank1_01_result_dtype: - return loopy_knl.register_function_mangler(bessel_mangler) - else: - return loopy_knl + from sumpy.codegen import bessel_preamble_generator, bessel_mangler + loopy_knl = lp.register_function_manglers(loopy_knl, + [bessel_mangler]) + loopy_knl = lp.register_preamble_generators(loopy_knl, + [bessel_preamble_generator]) + + return loopy_knl def get_expression(self, dist_vec): assert self.dim == len(dist_vec) @@ -237,10 +233,6 @@ class HelmholtzKernel(Kernel): return [lp.ValueArg(self.helmholtz_k_name, k_dtype)] - def get_preambles(self): - from sumpy.codegen import BESSEL_PREAMBLE - return [("sumpy-bessel", BESSEL_PREAMBLE)] - mapper_method = "map_helmholtz_kernel" @@ -313,9 +305,6 @@ class KernelWrapper(Kernel): def get_args(self): return self.inner_kernel.get_args() - def get_preambles(self): - return self.inner_kernel.get_preambles() - # }}} diff --git a/sumpy/p2e.py b/sumpy/p2e.py index 547cb4ce..8114845a 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -28,7 +28,7 @@ from pytools import memoize_method class P2E(object): - def __init__(self, ctx, expansion, coeff_dtype=None, + def __init__(self, ctx, expansion, options=[], name="p2e", device=None): """ :arg expansion: a subclass of :class:`sympy.expansion.ExpansionBase` @@ -43,7 +43,6 @@ class P2E(object): self.ctx = ctx self.expansion = expansion - self.coeff_dtype = coeff_dtype self.options = options self.name = name self.device = device @@ -111,7 +110,6 @@ class P2E(object): ], arguments, name=self.name, assumptions="nsrc_boxes>=1", - preambles=self.expansion.get_preambles(), defines=dict( dim=self.dim, COEFFIDX=[str(i) for i in xrange(len(coeff_names))] diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 129591e5..a581a45e 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -83,6 +83,7 @@ class P2P(KernelComputation): * var("strength_%d" % self.strength_usage[i])[var("isrc")] for i, name in enumerate(result_names)] + from sumpy.tools import gather_arguments arguments = ( [ lp.GlobalArg("src", None, @@ -92,12 +93,12 @@ class P2P(KernelComputation): lp.ValueArg("nsrc", None), lp.ValueArg("ntgt", np.int32), ]+[ - lp.GlobalArg("strength_%d" % i, dtype, shape="nsrc", order="C") + lp.GlobalArg("strength_%d" % i, None, shape="nsrc", order="C") for i, dtype in enumerate(self.strength_dtypes) ]+[ lp.GlobalArg("result_%d" % i, dtype, shape="ntgt", order="C") for i, dtype in enumerate(self.value_dtypes) - ] + self.gather_kernel_arguments()) + ] + gather_arguments(self.kernels)) if self.exclude_self: from pymbolic.primitives import If, ComparisonOperator, Variable @@ -125,7 +126,6 @@ class P2P(KernelComputation): ], arguments, name=self.name, assumptions="nsrc>=1 and ntgt>=1", - preambles=self.gather_kernel_preambles(), defines=dict(KNLIDX=range(len(exprs)))) for where in ["compute_a", "compute_b"]: diff --git a/sumpy/qbx.py b/sumpy/qbx.py index e8e03b0a..ad005967 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -118,6 +118,7 @@ class LayerPotentialBase(KernelComputation): * self.get_strength_or_not(isrc_sym, i) for i, name in enumerate(result_names)] + from sumpy.tools import gather_arguments arguments = ( [ lp.GlobalArg("src", None, @@ -129,7 +130,7 @@ class LayerPotentialBase(KernelComputation): lp.ValueArg("nsrc", None), lp.ValueArg("ntgt", None), ] + self.get_input_and_output_arguments() - + self.gather_kernel_arguments()) + + gather_arguments(self.kernels)) loopy_knl = lp.make_kernel(self.device, "{[isrc,itgt,idim]: 0<=itgt order + 0.5 + print expn_class, order + print("POTENTIAL:") + print(eoc_rec_pot) + print("X TARGET DERIVATIVE:") + print(eoc_rec_grad_x) + + tgt_order = order + 1 + + assert eoc_rec_pot.order_estimate() > tgt_order - 0.5 + assert eoc_rec_grad_x.order_estimate() > tgt_order - 1.5 @pytools.test.mark_test.opencl -def no_test_p2m2m2p(ctx_getter): +@pytest.mark.parametrize("order", [2, 3, 4, 5]) +def no_test_translations(ctx_getter, order): + logging.basicConfig(level=logging.INFO) + ctx = ctx_getter() queue = cl.CommandQueue(ctx) - res = 100 - - dimensions = 3 - sources = np.random.rand(dimensions, 5).astype(np.float32) - #targets = np.random.rand(dimensions, 10).astype(np.float32) - targets = np.mgrid[-2:2:res*1j, -2:2:res*1j, 2:2:1j].reshape(3, -1).astype(np.float32) - centers = np.array([[0.5]]*dimensions).astype(np.float32) - - from sumpy.tools import vector_to_device - targets_dev = vector_to_device(queue, targets) - sources_dev = vector_to_device(queue, sources) - centers_dev = vector_to_device(queue, centers) - strengths_dev = cl_array.empty(queue, sources.shape[1], dtype=np.float32) - strengths_dev.fill(1) + np.random.seed(17) - cell_idx_to_particle_offset = np.array([0], dtype=np.uint32) - cell_idx_to_particle_cnt_src = np.array([sources.shape[1]], dtype=np.uint32) - cell_idx_to_particle_cnt_tgt = np.array([targets.shape[1]], dtype=np.uint32) + res = 100 + nsources = 500 - from pyopencl.array import to_device - cell_idx_to_particle_offset_dev = to_device(queue, cell_idx_to_particle_offset) - cell_idx_to_particle_cnt_src_dev = to_device(queue, cell_idx_to_particle_cnt_src) - cell_idx_to_particle_cnt_tgt_dev = to_device(queue, cell_idx_to_particle_cnt_tgt) + dim = 2 - from sumpy.symbolic import make_coulomb_kernel_in - from sumpy.expansion import TaylorMultipoleExpansion - texp = TaylorMultipoleExpansion( - make_coulomb_kernel_in("b", dimensions), - order=3, dimensions=dimensions) + from sumpy.kernel import LaplaceKernel + knl = LaplaceKernel(dim) + out_kernels = [knl] + #m_expn = VolumeTaylorMultipoleExpansion(knl, order=order) + l_expn = VolumeTaylorLocalExpansion(knl, order=order) + + from sumpy import P2E, E2P, P2P, E2E + p2l = P2E(ctx, l_expn, out_kernels) + l2l = E2E(ctx, l_expn, l_expn) + l2p = E2P(ctx, l_expn, out_kernels) + p2p = P2P(ctx, out_kernels, exclude_self=False) - def get_coefficient_expr(idx): - return sp.Symbol("coeff%d" % idx) + from pytools.convergence import EOCRecorder + eoc_rec = EOCRecorder() - for i in texp.m2m_exprs(get_coefficient_expr): - print i + for dist in [3, 5, 7]: + l_centers = np.array( + [ + [0.4+dist, 0.6], + [0.5+dist, 0.5], + ], + dtype=np.float64).T.copy() - print "-------------------------------------" - from sumpy.expansion import TaylorLocalExpansion - locexp = TaylorLocalExpansion(order=2, dimensions=3) + sources = np.random.rand(dim, nsources).astype(np.float64) + strengths = np.ones(nsources, dtype=np.float64) + targets = np.mgrid[dist:dist + 1:res*1j, 0:1:res*1j] \ + .reshape(dim, -1).astype(np.float64) - for i in texp.m2l_exprs(locexp, get_coefficient_expr): - print i + source_boxes = np.array([0], dtype=np.int32) + box_source_starts = np.array([0], dtype=np.int32) + box_source_counts_nonchild = np.array([nsources], dtype=np.int32) - 1/0 + # {{{ apply P2L - coeff_dtype = np.float32 + evt, (mpoles,) = p2l(queue, + source_boxes=source_boxes, + box_source_starts=box_source_starts, + box_source_counts_nonchild=box_source_counts_nonchild, + centers=l_centers, + sources=sources, + strengths=strengths, + #iflags="print_hl_wrapper", + out_host=True) - # {{{ apply P2M + # }}} - from sumpy.p2m import P2MKernel - p2m = P2MKernel(ctx, texp) - mpole_coeff = p2m(cell_idx_to_particle_offset_dev, - cell_idx_to_particle_cnt_src_dev, - sources_dev, strengths_dev, centers_dev, coeff_dtype) + # {{{ apply L2L - # }}} + src_box_starts = np.array([0, 0, 1], dtype=np.int32) + src_box_lists = np.array([0], dtype=np.int32) - # {{{ apply M2P + box_source_starts = np.array([0], dtype=np.int32) + evt, (mpoles,) = l2l(queue, + src_expansions=mpoles, + src_box_starts=src_box_starts, + src_box_lists=src_box_lists, + l_centers=l_centers, + #iflags="print_hl_wrapper", + out_host=True) - output_maps = [ - lambda expr: expr, - lambda expr: sp.diff(expr, sp.Symbol("t0")) - ] + # }}} - m2p_ilist_starts = np.array([0, 1], dtype=np.uint32) - m2p_ilist_mpole_offsets = np.array([0], dtype=np.uint32) + # {{{ apply L2P - m2p_ilist_starts_dev = to_device(queue, m2p_ilist_starts) - m2p_ilist_mpole_offsets_dev = to_device(queue, m2p_ilist_mpole_offsets) + ntargets = targets.shape[-1] - from sumpy.m2p import M2PKernel - m2p = M2PKernel(ctx, texp, output_maps=output_maps) - potential_dev, x_derivative = m2p(targets_dev, m2p_ilist_starts_dev, m2p_ilist_mpole_offsets_dev, mpole_coeff, - cell_idx_to_particle_offset_dev, - cell_idx_to_particle_cnt_tgt_dev) + box_target_starts = np.array([0], dtype=np.int32) + box_target_counts_nonchild = np.array([ntargets], dtype=np.int32) - # }}} + evt, (pot,) = l2p( + queue, + expansions=mpoles, + target_boxes=source_boxes, + box_target_starts=box_target_starts, + box_target_counts_nonchild=box_target_counts_nonchild, + centers=l_centers, + targets=targets, + #iflags="print_hl", + out_host=True, + ) - # {{{ compute (direct) reference solution + # }}} - from sumpy.p2p import P2PKernel - from sumpy.symbolic import make_coulomb_kernel_ts - coulomb_knl = make_coulomb_kernel_ts(dimensions) + # {{{ compute (direct) reference solution - knl = P2PKernel(ctx, dimensions, - exprs=[f(coulomb_knl) for f in output_maps], exclude_self=False) + evt, (pot_direct,) = p2p( + queue, + targets, sources, (strengths,), + out_host=True) - potential_dev_direct, x_derivative_dir = knl(targets_dev, sources_dev, strengths_dev) + err = la.norm((pot - pot_direct)/res**2) - if 0: - import matplotlib.pyplot as pt - pt.imshow((potential_dev-potential_dev_direct).get().reshape(res, res)) - pt.colorbar() - pt.show() + # }}} - assert la.norm((potential_dev-potential_dev_direct).get())/res**2 < 1e-3 + eoc_rec.add_data_point(1/dist, err) - # }}} + print eoc_rec + tgt_order = order + 1 + assert eoc_rec.order_estimate() > tgt_order - 0.5 # You can test individual routines by typing -- GitLab