diff --git a/examples/curve-pot.py b/examples/curve-pot.py index 4a09b403f755d9bdf35a250aff75453a7b60047c..310a11315efb1fe4d2c92bd0ef97fcc20f22d8dc 100644 --- a/examples/curve-pot.py +++ b/examples/curve-pot.py @@ -3,7 +3,6 @@ import pyopencl as cl import numpy as np import numpy.linalg as la import matplotlib.pyplot as pt -from pytools import Record def process_kernel(knl, what_operator): @@ -16,8 +15,8 @@ def process_kernel(knl, what_operator): from sumpy.kernel import TargetDerivative knl = TargetDerivative(1, knl) elif what_operator == "D": - from sumpy.kernel import SourceDerivative - knl = SourceDerivative(knl) + from sumpy.kernel import DirectionalSourceDerivative + knl = DirectionalSourceDerivative(knl) # DirectionalTargetDerivative (temporarily?) removed # elif what_operator == "S'": # from sumpy.kernel import DirectionalTargetDerivative @@ -49,7 +48,7 @@ def draw_pot_figure(aspect_ratio, center = np.asarray([0, 0], dtype=np.float64) from sumpy.visualization import FieldPlotter - fp = FieldPlotter(center, points=1000, extent=3) + fp = FieldPlotter(center, npoints=1000, extent=3) # }}} @@ -57,7 +56,7 @@ def draw_pot_figure(aspect_ratio, from sumpy.p2p import P2P from sumpy.kernel import LaplaceKernel, HelmholtzKernel - from sumpy.local_expansion import H2DLocalExpansion, LineTaylorLocalExpansion + from sumpy.expansion.local import H2DLocalExpansion, LineTaylorLocalExpansion if helmholtz_k: knl = HelmholtzKernel(2) expn_class = H2DLocalExpansion @@ -73,14 +72,11 @@ def draw_pot_figure(aspect_ratio, lpot_knl = process_kernel(knl, what_operator_lpot) - from sumpy.layerpot import LayerPotential, JumpTermApplier + from sumpy.qbx import LayerPotential lpot = LayerPotential(ctx, [ expn_class(lpot_knl, order=order)], value_dtypes=np.complex128) - jt = JumpTermApplier(ctx, [lpot_knl], - value_dtypes=np.complex128) - # }}} # {{{ set up geometry @@ -155,16 +151,7 @@ def draw_pot_figure(aspect_ratio, evt, (y,) = lpot(queue, native_curve.pos, ovsmp_curve.pos, centers, [xovsmp], ovsmp_curve.speed, ovsmp_weights, **lpot_kwargs) - if 0: - class JumpTermArgs(Record): - pass - evt, (y,) = jt(queue, [y], JumpTermArgs( - src_derivative_dir=native_curve.normal, - tgt_derivative_dir=native_curve.normal, - density_0=x, - side=center_side, - normal=native_curve.normal, - )) + return y op = LinearOperator((nsrc, nsrc), apply_lpot) @@ -189,21 +176,6 @@ def draw_pot_figure(aspect_ratio, # }}} - if 0: - # {{{ apply jump terms - - class JumpTermArgs(Record): - pass - evt, (curve_pot,) = jt(queue, [curve_pot], JumpTermArgs( - src_derivative_dir=native_curve.normal, - tgt_derivative_dir=native_curve.normal, - density_0=density, - side=center_side, - normal=native_curve.normal, - )) - - # }}} - if 0: # {{{ plot on-surface potential in 2D @@ -273,6 +245,7 @@ def draw_pot_figure(aspect_ratio, if __name__ == "__main__": + 1/0 # FIXME update to new conventions draw_pot_figure(aspect_ratio=1, nsrc=100, novsmp=100, helmholtz_k=0, what_operator="D", what_operator_lpot="D", force_center_side=1) pt.savefig("eigvals-ext-nsrc100-novsmp100.pdf") diff --git a/sumpy/__init__.py b/sumpy/__init__.py index 279f6683ae96480ca398bdc64c1464315a056950..372d58c88e3978052bcc9c781f7c39ae4b4412a0 100644 --- a/sumpy/__init__.py +++ b/sumpy/__init__.py @@ -22,9 +22,13 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -from sumpy.p2p import P2P -from sumpy.p2e import P2E -from sumpy.e2p import E2P -from sumpy.e2e import E2E +from sumpy.p2p import P2P, P2PFromCSR +from sumpy.p2e import P2EFromLocal, P2EFromCSR +from sumpy.e2p import E2PFromLocal, E2PFromCSR +from sumpy.e2e import E2EFromCSR, E2EFromChildren, E2EFromParent -__all__ = ["P2P", "P2E", "E2P", "E2E"] +__all__ = [ + "P2P", "P2PFromCSR", + "P2EFromLocal", "P2EFromCSR", + "E2PFromLocal", "E2PFromCSR", + "E2EFromCSR", "E2EFromChildren", "E2EFromParent"] diff --git a/sumpy/e2e.py b/sumpy/e2e.py index bc6f96f97805c5a48c040641b5c321b09078ea81..3168c87a7f135346bd7cc36804916e754998dc20 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -28,15 +28,17 @@ import sympy as sp from pytools import memoize_method -class E2E(object): +# {{{ translation base class + +class E2EBase(object): def __init__(self, ctx, src_expansion, tgt_expansion, options=[], name="e2e", device=None): """ :arg expansion: a subclass of :class:`sympy.expansion.ExpansionBase` :arg strength_usage: A list of integers indicating which expression - uses which source strength indicator. This implicitly specifies the - number of strength arrays that need to be passed. - Default: all kernels use the same strength. + uses which source strength indicator. This implicitly specifies the + number of strength arrays that need to be passed. + Default: all kernels use the same strength. """ if device is None: @@ -55,16 +57,12 @@ class E2E(object): self.dim = src_expansion.dim - @memoize_method - def get_kernel(self): + def get_translation_loopy_insns(self): from sumpy.symbolic import make_sym_vector dvec = make_sym_vector("d", self.dim) - ncoeff_src = len(self.src_expansion.get_coefficient_identifiers()) - ncoeff_tgt = len(self.tgt_expansion.get_coefficient_identifiers()) - src_coeff_exprs = [sp.Symbol("src_coeff%d" % i) - for i in xrange(ncoeff_src)] + for i in xrange(len(self.src_expansion))] from sumpy.assignment_collection import SymbolicAssignmentCollection sac = SymbolicAssignmentCollection() @@ -83,34 +81,49 @@ class E2E(object): retain_names=tgt_coeff_names) from sumpy.codegen import to_loopy_insns - loopy_insns = to_loopy_insns(assignments, + return to_loopy_insns( + assignments, vector_names=set(["d"]), pymbolic_expr_maps=[self.tgt_expansion.transform_to_code], complex_dtype=np.complex128 # FIXME ) - from sumpy.tools import gather_arguments - arguments = ( - [ - lp.GlobalArg("centers", None, shape="dim, nboxes"), - lp.GlobalArg("src_expansions", None, - shape=("nboxes", ncoeff_src)), - lp.GlobalArg("tgt_expansions", None, - shape=("nboxes", ncoeff_tgt)), - lp.GlobalArg("src_box_starts, src_box_lists", - None, shape=None, strides=(1,)), - lp.ValueArg("nboxes", np.int32), - "..." - ] + gather_arguments([self.src_expansion, self.tgt_expansion]) - ) + @memoize_method + def get_optimized_kernel(self): + # FIXME + knl = self.get_kernel() + knl = lp.split_iname(knl, "itgt_box", 16, outer_tag="g.0") + return knl + +# }}} + + +# {{{ translation from "compressed sparse row"-like source box lists + +class E2EFromCSR(E2EBase): + """Implements translation from a "compressed sparse row"-like source box + list. + """ + + def get_kernel(self): + ncoeff_src = len(self.src_expansion) + ncoeff_tgt = len(self.tgt_expansion) + + # To clarify terminology: + # + # isrc_box -> The index in a list of (in this case, source) boxes + # src_ibox -> The (global) box number for the (in this case, source) box + # + # (same for itgt_box, tgt_ibox) + from sumpy.tools import gather_arguments loopy_knl = lp.make_kernel(self.device, [ "{[itgt_box]: 0<=itgt_box<ntgt_boxes}", "{[isrc_box]: isrc_start<=isrc_box<isrc_stop}", "{[idim]: 0<=idim<dim}", ], - loopy_insns + self.get_translation_loopy_insns() + [""" <> tgt_ibox = target_boxes[itgt_box] <> tgt_center[idim] = centers[idim, tgt_ibox] \ @@ -123,13 +136,22 @@ class E2E(object): <> src_center[idim] = centers[idim, src_ibox] \ {id=fetch_src_center} <> d[idim] = tgt_center[idim] - src_center[idim] - <> src_coeff${SRC_COEFFIDX} = \ src_expansions[src_ibox, ${SRC_COEFFIDX}] tgt_expansions[tgt_ibox, ${TGT_COEFFIDX}] = \ - coeff${TGT_COEFFIDX} {id_prefix=write_expn} + coeff${TGT_COEFFIDX} {id_prefix=write_expn} """], - arguments, + [ + lp.GlobalArg("centers", None, shape="dim, nboxes"), + lp.GlobalArg("src_box_starts, src_box_lists", + None, shape=None, strides=(1,)), + lp.ValueArg("nboxes", np.int32), + lp.GlobalArg("src_expansions", None, + shape=("nboxes", ncoeff_src)), + lp.GlobalArg("tgt_expansions", None, + shape=("nboxes", ncoeff_tgt)), + "..." + ] + gather_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, assumptions="ntgt_boxes>=1", defines=dict( dim=self.dim, @@ -147,12 +169,174 @@ class E2E(object): return loopy_knl - @memoize_method - def get_optimized_kernel(self): - # FIXME - knl = self.get_kernel() - knl = lp.split_iname(knl, "itgt_box", 16, outer_tag="g.0") - return knl + def __call__(self, queue, **kwargs): + """ + :arg src_expansions: + :arg src_box_starts: + :arg src_box_lists: + :arg centers: + """ + knl = self.get_optimized_kernel() + + return knl(queue, **kwargs) + +# }}} + + +# {{{ translation from a box's children + +class E2EFromChildren(E2EBase): + def get_kernel(self): + if self.src_expansion is not self.tgt_expansion: + raise RuntimeError("%s requires that the source " + "and target expansion are the same object") + + ncoeffs = len(self.src_expansion) + + # To clarify terminology: + # + # isrc_box -> The index in a list of (in this case, source) boxes + # src_ibox -> The (global) box number for the (in this case, source) box + # + # (same for itgt_box, tgt_ibox) + + loopy_insns = [ + insn.copy(predicates=insn.predicates + | frozenset(["is_src_box_valid"])) + for insn in self.get_translation_loopy_insns()] + + from sumpy.tools import gather_arguments + loopy_knl = lp.make_kernel(self.device, + [ + "{[itgt_box]: 0<=itgt_box<ntgt_boxes}", + "{[isrc_box]: 0<=isrc_box<nchildren}", + "{[idim]: 0<=idim<dim}", + ], + loopy_insns + + [""" + <> tgt_ibox = target_boxes[itgt_box] + <> tgt_center[idim] = centers[idim, tgt_ibox] \ + {id=fetch_tgt_center} + + <> src_ibox = box_child_ids[isrc_box,itgt_box] + <> is_src_box_valid = src_ibox != 0 + + <> src_center[idim] = centers[idim, src_ibox] \ + {id=fetch_src_center,if=is_src_box_valid} + <> d[idim] = tgt_center[idim] - src_center[idim] \ + {if=is_src_box_valid} + + <> src_coeff${COEFFIDX} = \ + expansions[src_ibox, ${COEFFIDX}] \ + {if=is_src_box_valid} + expansions[tgt_ibox, ${COEFFIDX}] = \ + expansions[tgt_ibox, ${COEFFIDX}] + coeff${COEFFIDX} \ + {id_prefix=write_expn,if=is_src_box_valid,dep=} + """], + [ + lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), + lp.GlobalArg("box_child_ids", None, + shape="nchildren, aligned_nboxes"), + lp.GlobalArg("expansions", None, + shape=("nboxes", ncoeffs)), + lp.ValueArg("nboxes", np.int32), + lp.ValueArg("aligned_nboxes", np.int32), + "..." + ] + gather_arguments([self.src_expansion, self.tgt_expansion]), + name=self.name, assumptions="ntgt_boxes>=1", + defines=dict( + dim=self.dim, + nchildren=2**self.dim, + COEFFIDX=[str(i) for i in xrange(ncoeffs)], + ), + silenced_warnings="write_race(write_expn*)") + + for expn in [self.src_expansion, self.tgt_expansion]: + loopy_knl = expn.prepare_loopy_kernel(loopy_knl) + + loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "fetch_tgt_center", + tags={"idim": "unr"}) + loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) + + return loopy_knl + + def __call__(self, queue, **kwargs): + """ + :arg src_expansions: + :arg src_box_starts: + :arg src_box_lists: + :arg centers: + """ + knl = self.get_optimized_kernel() + + return knl(queue, **kwargs) + +# }}} + + +# {{{ translation from a box's parent + +class E2EFromParent(E2EBase): + def get_kernel(self): + if self.src_expansion is not self.tgt_expansion: + raise RuntimeError("%s requires that the source " + "and target expansion are the same object") + + ncoeffs = len(self.src_expansion) + + # To clarify terminology: + # + # isrc_box -> The index in a list of (in this case, source) boxes + # src_ibox -> The (global) box number for the (in this case, source) box + # + # (same for itgt_box, tgt_ibox) + + from sumpy.tools import gather_arguments + loopy_knl = lp.make_kernel(self.device, + [ + "{[itgt_box]: 0<=itgt_box<ntgt_boxes}", + "{[idim]: 0<=idim<dim}", + ], + self.get_translation_loopy_insns() + + [""" + <> tgt_ibox = target_boxes[itgt_box] + <> tgt_center[idim] = centers[idim, tgt_ibox] \ + {id=fetch_tgt_center} + + <> src_ibox = box_parent_ids[tgt_ibox] + <> src_center[idim] = centers[idim, src_ibox] \ + {id=fetch_src_center} + <> d[idim] = tgt_center[idim] - src_center[idim] + + <> src_coeff${COEFFIDX} = \ + expansions[src_ibox, ${COEFFIDX}] + expansions[tgt_ibox, ${COEFFIDX}] = \ + expansions[tgt_ibox, ${COEFFIDX}] + coeff${COEFFIDX} \ + {id_prefix=write_expn,if=is_src_box_valid} + """], + [ + lp.GlobalArg("centers", None, shape="dim, nboxes"), + lp.ValueArg("nboxes", np.int32), + lp.GlobalArg("expansions", None, + shape=("nboxes", ncoeffs)), + "..." + ] + gather_arguments([self.src_expansion, self.tgt_expansion]), + name=self.name, assumptions="ntgt_boxes>=1", + defines=dict( + dim=self.dim, + nchildren=2**self.dim, + COEFFIDX=[str(i) for i in xrange(ncoeffs)], + ), + silenced_warnings="write_race(write_expn*)") + + for expn in [self.src_expansion, self.tgt_expansion]: + loopy_knl = expn.prepare_loopy_kernel(loopy_knl) + + loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "fetch_tgt_center", + tags={"idim": "unr"}) + loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) + + return loopy_knl def __call__(self, queue, **kwargs): """ @@ -164,3 +348,7 @@ class E2E(object): knl = self.get_optimized_kernel() return knl(queue, **kwargs) + +# }}} + +# vim: foldmethod=marker diff --git a/sumpy/e2p.py b/sumpy/e2p.py index 00c760781c93c7b11d6d965985392acd6ed34ed0..18cc8f8ea764afc6ea2bc665a0519aed3b5bab3d 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -28,7 +28,9 @@ import sympy as sp from pytools import memoize_method -class E2P(object): +# {{{ E2P base class + +class E2PBase(object): def __init__(self, ctx, expansion, kernels, options=[], name="e2p", device=None): """ @@ -56,8 +58,7 @@ class E2P(object): for knl in kernels: assert tdr(knl) == expansion.kernel - @memoize_method - def get_kernel(self): + def get_loopy_insns_and_result_names(self): from sumpy.symbolic import make_sym_vector bvec = make_sym_vector("b", self.dim) @@ -96,6 +97,20 @@ class E2P(object): expression=sympy_conv(self.expansion.kernel.get_scaling()), temp_var_type=lp.auto)) + return loopy_insns, result_names + +# }}} + + +# {{{ box-local E2P (L2P, likely) + +class E2PFromLocal(E2PBase): + @memoize_method + def get_kernel(self): + ncoeffs = len(self.expansion) + + loopy_insns, result_names = self.get_loopy_insns_and_result_names() + arguments = ( [ lp.GlobalArg("targets", None, shape=(self.dim, "ntargets")), @@ -103,7 +118,7 @@ class E2P(object): None, shape=None), lp.GlobalArg("centers", None, shape="dim, nboxes"), lp.GlobalArg("expansions", None, - shape=("nboxes", len(coeff_exprs))), + shape=("nboxes", ncoeffs)), lp.ValueArg("nboxes", np.int32), lp.ValueArg("ntargets", np.int32), "..." @@ -133,7 +148,7 @@ class E2P(object): name=self.name, assumptions="ntgt_boxes>=1", defines=dict( dim=self.dim, - COEFFIDX=[str(i) for i in xrange(len(coeff_exprs))], + COEFFIDX=[str(i) for i in xrange(ncoeffs)], RESULTIDX=[str(i) for i in xrange(len(result_names))], ) ) @@ -163,3 +178,89 @@ class E2P(object): knl = self.get_optimized_kernel() return knl(queue, **kwargs) + +# }}} + + +# {{{ E2P from CSR-like interaction list + +class E2PFromCSR(E2PBase): + @memoize_method + def get_kernel(self): + ncoeffs = len(self.expansion) + + loopy_insns, result_names = self.get_loopy_insns_and_result_names() + + arguments = ( + [ + lp.GlobalArg("targets", None, shape=(self.dim, "ntargets")), + lp.GlobalArg("box_target_starts,box_target_counts_nonchild", + None, shape=None), + lp.GlobalArg("centers", None, shape="dim, nboxes"), + lp.GlobalArg("expansions", None, + shape=("nboxes", ncoeffs)), + lp.ValueArg("nboxes", np.int32), + lp.ValueArg("ntargets", np.int32), + "..." + ] + [ + lp.GlobalArg("result_%d" % i, None, shape="ntargets") + for i in range(len(result_names)) + ] + self.expansion.get_args() + ) + + loopy_knl = lp.make_kernel(self.device, + [ + "{[itgt_box]: 0<=itgt_box<ntgt_boxes}", + "{[itgt]: itgt_start<=itgt<itgt_end}", + "{[isrc_box,idim]: isrc_box_start<=isrc_box<=isrc_box_end \ + and 0<=idim<dim}", + ], + loopy_insns + + [""" + <> tgt_ibox = target_boxes[itgt_box] + <> itgt_start = box_target_starts[itgt_box] + <> itgt_end = itgt_start+box_target_counts_nonchild[itgt_box] + <> tgt[idim] = targets[idim] {id=fetch_tgt} + + <> isrc_box_start = source_box_starts[isrc_box] + <> isrc_box_end = source_box_starts[isrc_box+1] + + <> src_ibox = source_box_lists[isrc_box] + <> coeff${COEFFIDX} = expansions[src_ibox, ${COEFFIDX}] + <> center[idim] = centers[idim, src_ibox] {id=fetch_center} + + <> b[idim] = tgt[idim] - center[idim] + result[${RESULTIDX, itgt] = \ + kernel_scaling * sum(isrc_box, result_${RESULTIDX}_p) + """], + arguments, + name=self.name, assumptions="ntgt_boxes>=1", + defines=dict( + dim=self.dim, + COEFFIDX=[str(i) for i in xrange(ncoeffs)], + RESULTIDX=[str(i) for i in xrange(len(result_names))], + ) + ) + + loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "fetch_tgt", + tags={"idim": "unr"}) + loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "fetch_center", + tags={"idim": "unr"}) + loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) + + return loopy_knl + + @memoize_method + def get_optimized_kernel(self): + # FIXME + knl = self.get_kernel() + #knl = lp.split_iname(knl, "itgt_box", 16, outer_tag="g.0") + return knl + + def __call__(self, queue, **kwargs): + knl = self.get_optimized_kernel() + return knl(queue, **kwargs) + +# }}} + +# vim: foldmethod=marker diff --git a/sumpy/expansion/__init__.py b/sumpy/expansion/__init__.py index 6477b9ab4d9b7de6ba5b3a096ee57de42e29b51d..6d1d540c643be004d48c6809b62bf95243302229 100644 --- a/sumpy/expansion/__init__.py +++ b/sumpy/expansion/__init__.py @@ -63,6 +63,9 @@ class ExpansionBase(object): # }}} + def __len__(self): + return len(self.get_coefficient_identifiers()) + def coefficients_from_source(self, expr, avec, bvec): """ :arg bvec: vector from center to target. Not usually necessary, diff --git a/sumpy/fmm.py b/sumpy/fmm.py new file mode 100644 index 0000000000000000000000000000000000000000..10de0de3745a65013255010e73de668ba543e8e3 --- /dev/null +++ b/sumpy/fmm.py @@ -0,0 +1,280 @@ +from __future__ import division + +__copyright__ = "Copyright (C) 2013 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +__doc__ = """Integrates :mod:`boxtree` with :mod:`sumpy`. +""" + + +import pyopencl as cl +import pyopencl.array # noqa + + +class SumpyExpansionWranglerCodeContainer(object): + """Objects of this type serve as a place to keep the code needed + for :class:`SumpyExpansionWrangler`. Since :class:`SumpyExpansionWrangler` + necessarily must have a :class:`pyopencl.CommandQueue`, but this queue + is allowed to be more ephemeral than the code, the code's lifetime + is decoupled by storing it in this object. + """ + + def __init__(self, cl_context, tree, + multipole_expansion, local_expansion, out_kernels): + self.tree = tree + m_expn = self.multipole_expansion = multipole_expansion + l_expn = self.local_expansion = local_expansion + + self.cl_context = cl_context + + from sumpy import ( + P2EFromLocal, P2EFromCSR, + E2PFromLocal, E2PFromCSR, + P2PFromCSR, + E2EFromCSR, E2EFromChildren, E2EFromParent) + self.p2m = P2EFromLocal(cl_context, m_expn) + self.p2l = P2EFromCSR(cl_context, l_expn) + self.m2m = E2EFromChildren(cl_context, m_expn, m_expn) + self.m2l = E2EFromCSR(cl_context, m_expn, l_expn) + self.l2l = E2EFromParent(cl_context, l_expn, l_expn) + self.m2p = E2PFromCSR(cl_context, m_expn, out_kernels) + self.l2p = E2PFromLocal(cl_context, l_expn, out_kernels) + self.p2p = P2PFromCSR(cl_context, out_kernels, exclude_self=False) + + def get_wrangler(self, queue, dtype, extra_kwargs={}): + return SumpyExpansionWrangler(self, queue, dtype, extra_kwargs) + + +class SumpyExpansionWrangler(object): + """Implements the :class:`boxtree.fmm.ExpansionWranglerInterface` + by using :mod:`sumpy` expansions/translations. + """ + + def __init__(self, code_container, queue, dtype, extra_kwargs): + self.code = code_container + self.queue = queue + self.dtype = dtype + self.extra_kwargs = extra_kwargs + + @property + def tree(self): + return self.code.tree + + @property + def multipole_expansion(self): + return self.code.multipole_expansion + + @property + def local_expansion(self): + return self.code.local_expansion + + def multipole_expansion_zeros(self): + return cl.array.zeros( + self.queue, + (self.tree.nboxes, len(self.multipole_expansion)), + dtype=self.dtype) + + def local_expansion_zeros(self): + return cl.array.zeros( + self.queue, + (self.tree.nboxes, len(self.local_expansion)), + dtype=self.dtype) + + def potential_zeros(self): + return tuple( + cl.array.zeros( + self.queue, + self.tree.ntargets, + dtype=self.dtype) + for k in self.out_kernels) + + def reorder_src_weights(self, src_weights): + return src_weights.with_queue(self.queue)[self.tree.user_source_ids] + + def reorder_potentials(self, potentials): + return potentials.with_queue(self.queue)[self.tree.sorted_target_ids] + + def form_multipoles(self, source_boxes, src_weights): + mpoles = self.multipole_expansion_zeros() + + evt, (mpoles_res,) = self.code.p2m(self.queue, + source_boxes=source_boxes, + box_source_starts=self.tree.box_source_starts, + box_source_counts_nonchild=self.tree.box_source_counts_nonchild, + centers=self.tree.box_centers, + sources=self.tree.sources, + strengths=src_weights, + expansions=mpoles, + + **self.extra_kwargs) + + assert mpoles_res is mpoles + + return mpoles + + def coarsen_multipoles(self, parent_boxes, mpoles): + evt, (mpoles_res,) = self.code.m2m(self.queue, + expansions=mpoles, + target_boxes=parent_boxes, + box_child_ids=self.tree.box_child_ids, + centers=self.tree.box_centers, + **self.extra_kwargs) + + assert mpoles_res is mpoles + return mpoles + + def eval_direct(self, target_boxes, source_box_starts, + source_box_lists, src_weights): + pot = self.potential_zeros() + + evt, pot_res = self.code.p2p(self.queue, + target_boxes=target_boxes, + source_box_starts=source_box_starts, + source_box_lists=source_box_lists, + strengths=(src_weights,), + box_target_starts=self.tree.box_target_starts, + box_target_counts_nonchild=self.tree.box_target_counts_nonchild, + sources=self.tree.sources, + targets=self.tree.targets, + result=pot, + + **self.extra_kwargs) + + for pot_i, pot_res_i in zip(pot, pot_res): + assert pot_i is pot_res_i + + return pot_res + + def multipole_to_local(self, target_or_target_parent_boxes, + starts, lists, mpole_exps): + local_exps = self.local_expansion_zeros() + + evt, (local_exps_res,) = self.m2l(self.queue, + src_expansions=mpole_exps, + target_boxes=target_or_target_parent_boxes, + src_box_starts=starts, + src_box_lists=lists, + centers=self.tree.centers, + tgt_expansions=local_exps, + + **self.extra_kwargs) + + assert local_exps_res is local_exps + + return local_exps + + def eval_multipoles(self, target_boxes, sep_smaller_nonsiblings_starts, + sep_smaller_nonsiblings_lists, mpole_exps): + pot = self.potential_zeros() + + evt, pot_res = self.m2p(self.queue, + expansions=mpole_exps, + target_boxes=target_boxes, + box_target_starts=self.tree.box_target_starts, + box_target_counts_nonchild=self.tree.box_target_counts_nonchild, + source_box_starts=sep_smaller_nonsiblings_starts, + source_box_lists=sep_smaller_nonsiblings_lists, + centers=self.tree.centers, + targets=self.tree.targets, + + **self.extra_kwargs) + + for pot_i, pot_res_i in zip(pot, pot_res): + assert pot_i is pot_res_i + + return pot + + def form_locals(self, target_or_target_parent_boxes, starts, lists, src_weights): + local_exps = self.expansion_zeros() + + evt, (local_exps,) = self.p2l(self.queue, + source_boxes=source_boxes, + box_source_starts=box_source_starts, + box_source_counts_nonchild=box_source_counts_nonchild, + centers=self.tree.centers, + sources=self.tree.sources, + strengths=src_weights, + + **self.extra_kwargs) + from pyfmmlib import h2dformta + + for itgt_box, tgt_ibox in enumerate(target_or_target_parent_boxes): + start, end = starts[itgt_box:itgt_box+2] + + contrib = 0 + + for src_ibox in lists[start:end]: + src_pslice = self._get_source_slice(src_ibox) + tgt_center = self.tree.box_centers[:, tgt_ibox] + + if src_pslice.stop - src_pslice.start == 0: + continue + + ier, mpole = h2dformta( + self.helmholtz_k, rscale, + self._get_sources(src_pslice), src_weights[src_pslice], + tgt_center, self.nterms) + if ier: + raise RuntimeError("h2dformta failed") + + contrib = contrib + mpole + + local_exps[tgt_ibox] = contrib + + return local_exps + + def refine_locals(self, child_boxes, local_exps): + from pyfmmlib import h2dlocloc_vec + + for tgt_ibox in child_boxes: + tgt_center = self.tree.box_centers[:, tgt_ibox] + src_ibox = self.tree.box_parent_ids[tgt_ibox] + src_center = self.tree.box_centers[:, src_ibox] + + tmp_loc_exp = h2dlocloc_vec( + self.helmholtz_k, + rscale, src_center, local_exps[src_ibox], + rscale, tgt_center, self.nterms)[:, 0] + + local_exps[tgt_ibox] += tmp_loc_exp + + return local_exps + + def eval_locals(self, target_boxes, local_exps): + pot = self.potential_zeros() + rscale = 1 # FIXME + + from pyfmmlib import h2dtaeval_vec + + for tgt_ibox in target_boxes: + tgt_pslice = self._get_target_slice(tgt_ibox) + + if tgt_pslice.stop - tgt_pslice.start == 0: + continue + + tmp_pot, _, _ = h2dtaeval_vec(self.helmholtz_k, rscale, + self.tree.box_centers[:, tgt_ibox], local_exps[tgt_ibox], + self._get_targets(tgt_pslice), ifgrad=False, ifhess=False) + + pot[tgt_pslice] += tmp_pot + + return pot diff --git a/sumpy/kernel.py b/sumpy/kernel.py index 0f9a9a474251584b04885d76e2ac34e56550656a..9ec5acf13681e8bfa0e8e15f12b32e66520ebb00 100644 --- a/sumpy/kernel.py +++ b/sumpy/kernel.py @@ -389,7 +389,7 @@ class DirectionalDerivative(DerivativeBase): def get_source_args(self): return [ - lp.GlobalArg(self.dir_vec_name, None, shape=(self.dim, "nsrc"), + lp.GlobalArg(self.dir_vec_name, None, shape=(self.dim, "nsources"), dim_tags="sep,C")] + self.inner_kernel.get_source_args() diff --git a/sumpy/p2e.py b/sumpy/p2e.py index 6f9287619c35e18b5ddd28d6a1f1d88f358b2f9f..9bd731d6a7e3bdab6beb8e4563d47d0abb277f94 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -27,7 +27,9 @@ import loopy as lp from pytools import memoize_method -class P2E(object): +# {{{ P2E base class + +class P2EBase(object): def __init__(self, ctx, expansion, options=[], name="p2e", device=None): """ @@ -49,8 +51,7 @@ class P2E(object): self.dim = expansion.dim - @memoize_method - def get_kernel(self): + def get_looy_instructions(self): from sumpy.symbolic import make_sym_vector avec = make_sym_vector("a", self.dim) @@ -71,55 +72,147 @@ class P2E(object): retain_names=coeff_names) from sumpy.codegen import to_loopy_insns - loopy_insns = to_loopy_insns(assignments, + return to_loopy_insns( + assignments, vector_names=set(["a"]), pymbolic_expr_maps=[self.expansion.transform_to_code], complex_dtype=np.complex128 # FIXME ) +# }}} + + +# {{{ P2E from local boxes + +class P2EFromLocal(P2EBase): + @memoize_method + def get_kernel(self): + ncoeffs = len(self.expansion) + + from sumpy.tools import gather_source_arguments + loopy_knl = lp.make_kernel(self.device, + [ + "{[isrc_box]: 0<=isrc_box<nsrc_boxes}", + "{[isrc,idim]: isrc_start<=isrc<isrc_end and 0<=idim<dim}", + ], + self.get_looy_instructions() + + [""" + <> src_ibox = source_boxes[isrc_box] + <> isrc_start = box_source_starts[isrc_box] + <> isrc_end = isrc_start+box_source_counts_nonchild[isrc_box] + <> center[idim] = centers[idim, src_ibox] {id=fetch_center} + <> a[idim] = center[idim] - sources[idim, isrc] \ + {id=compute_a} + <> strength = strengths[isrc] + expansions[src_ibox, ${COEFFIDX}] = \ + sum(isrc, strength*coeff${COEFFIDX}) \ + {id_prefix=write_expn} + """], + [ + lp.GlobalArg("sources", None, shape=(self.dim, "nsources")), + lp.GlobalArg("strengths", None, shape="nsources"), + lp.GlobalArg("box_source_starts,box_source_counts_nonchild", + None, shape=None), + lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), + lp.GlobalArg("expansions", None, + shape=("nboxes", ncoeffs)), + lp.ValueArg("nboxes,aligned_nboxes", np.int32), + lp.ValueArg("nsources", np.int32), + "..." + ] + gather_source_arguments([self.expansion]), + name=self.name, assumptions="nsrc_boxes>=1", + defines=dict( + dim=self.dim, + COEFFIDX=[str(i) for i in xrange(ncoeffs)] + ), + silenced_warnings="write_race(write_expn*)") + + loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) + loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "fetch_center", + tags={"idim": "unr"}) + loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) + loopy_knl = lp.tag_data_axes(loopy_knl, "sources", "sep,c") + + return loopy_knl + + @memoize_method + def get_optimized_kernel(self): + # FIXME + knl = self.get_kernel() + knl = lp.split_iname(knl, "isrc_box", 16, outer_tag="g.0") + return knl + + def __call__(self, queue, **kwargs): + """ + :arg expansions: + :arg source_boxes: + :arg box_source_starts: + :arg box_source_counts_nonchild: + :arg centers: + :arg sources: + :arg strengths: + """ + knl = self.get_optimized_kernel() + + return knl(queue, **kwargs) + +# }}} + + +# {{{ P2E from CSR-like interaction list + +class P2EFromCSR(P2EBase): + @memoize_method + def get_kernel(self): + ncoeffs = len(self.expansion) + + # FIXXME from sumpy.tools import gather_source_arguments arguments = ( [ - lp.GlobalArg("sources", None, shape=(self.dim, "nsrc")), - lp.GlobalArg("strengths", None, shape="nsrc"), + lp.GlobalArg("sources", None, shape=(self.dim, "nsources")), + lp.GlobalArg("strengths", None, shape="nsources"), lp.GlobalArg("box_source_starts,box_source_counts_nonchild", None, shape=None), lp.GlobalArg("centers", None, shape="dim, nboxes"), lp.GlobalArg("expansions", None, - shape=("nboxes", len(coeff_names))), + shape=("nboxes", ncoeffs)), lp.ValueArg("nboxes", np.int32), - lp.ValueArg("nsrc", np.int32), + lp.ValueArg("nsources", np.int32), "..." ] + gather_source_arguments([self.expansion])) loopy_knl = lp.make_kernel(self.device, [ + "{[itgt_box]: 0<=itgt_box<ntgt_boxes}", "{[isrc_box]: 0<=isrc_box<nsrc_boxes}", "{[isrc,idim]: isrc_start<=isrc<isrc_end and 0<=idim<dim}", ], - loopy_insns - + [ - "<> src_ibox = source_boxes[isrc_box]", - "<> isrc_start = box_source_starts[isrc_box]", - "<> isrc_end = isrc_start+box_source_counts_nonchild[isrc_box]", - "<> center[idim] = centers[idim, src_ibox] {id=fetch_center}", - "<> a[idim] = center[idim] - sources[idim, isrc]", - "<> strength = strengths[isrc]", - "expansions[src_ibox, ${COEFFIDX}] = " - "sum(isrc, strength*coeff${COEFFIDX})" - "{id_prefix=write_expn}" - ], + self.get_looy_instructions() + + [""" + <> src_ibox = source_boxes[isrc_box] + <> isrc_start = box_source_starts[isrc_box] + <> isrc_end = isrc_start+box_source_counts_nonchild[isrc_box] + <> center[idim] = centers[idim, src_ibox] {id=fetch_center} + <> a[idim] = center[idim] - sources[idim, isrc] {id=compute_a} + <> strength = strengths[isrc] + expansions[src_ibox, ${COEFFIDX}] = \ + sum(isrc, strength*coeff${COEFFIDX}) \ + {id_prefix=write_expn} + """], arguments, name=self.name, assumptions="nsrc_boxes>=1", defines=dict( dim=self.dim, - COEFFIDX=[str(i) for i in xrange(len(coeff_names))] + COEFFIDX=[str(i) for i in xrange(ncoeffs)] ), silenced_warnings="write_race(write_expn*)") loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "fetch_center", tags={"idim": "unr"}) + loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) + loopy_knl = lp.tag_data_axes(loopy_knl, "sources", "sep,c") return loopy_knl @@ -143,3 +236,7 @@ class P2E(object): knl = self.get_optimized_kernel() return knl(queue, **kwargs) + +# }}} + +# vim: foldmethod=marker diff --git a/sumpy/p2p.py b/sumpy/p2p.py index e7129684e3336fdbe268b4e52fd98acacf082d07..20c133650388d5da639891c7d3e22ddab772bd94 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -32,9 +32,11 @@ from sumpy.tools import KernelComputation # LATER: # - Optimization for source == target (postpone) -class P2P(KernelComputation): +# {{{ p2p base class + +class P2PBase(KernelComputation): def __init__(self, ctx, kernels, exclude_self, strength_usage=None, - value_dtypes=None, strength_dtypes=None, + value_dtypes=None, options=[], name="p2p", device=None): """ :arg kernels: list of :class:`sumpy.kernel.Kernel` instances @@ -44,7 +46,7 @@ class P2P(KernelComputation): Default: all kernels use the same strength. """ KernelComputation.__init__(self, ctx, kernels, strength_usage, - value_dtypes, strength_dtypes, + value_dtypes, name, options, device) self.exclude_self = exclude_self @@ -52,8 +54,7 @@ class P2P(KernelComputation): from pytools import single_valued self.dim = single_valued(knl.dim for knl in self.kernels) - @memoize_method - def get_kernel(self): + def get_loopy_insns_and_result_names(self): from sumpy.symbolic import make_sym_vector dvec = make_sym_vector("d", self.dim) @@ -77,6 +78,18 @@ class P2P(KernelComputation): complex_dtype=np.complex128 # FIXME ) + return loopy_insns, result_names + +# }}} + + +# {{{ P2P with list of sources and list of targets + +class P2P(P2PBase): + @memoize_method + def get_kernel(self): + loopy_insns, result_names = self.get_loopy_insns_and_result_names() + from pymbolic import var exprs = [ var(name) @@ -87,16 +100,16 @@ class P2P(KernelComputation): arguments = ( [ lp.GlobalArg("src", None, - shape=(self.dim, "nsrc"), order="C"), + shape=(self.dim, "nsources"), order="C"), lp.GlobalArg("tgt", None, - shape=(self.dim, "ntgt"), order="C"), - lp.ValueArg("nsrc", None), - lp.ValueArg("ntgt", np.int32), + shape=(self.dim, "ntargets"), order="C"), + lp.ValueArg("nsources", None), + lp.ValueArg("ntargets", np.int32), ]+[ - lp.GlobalArg("strength_%d" % i, None, shape="nsrc", order="C") - for i, dtype in enumerate(self.strength_dtypes) + lp.GlobalArg("strength_%d" % i, None, shape="nsources", order="C") + for i in xrange(self.strength_count) ]+[ - lp.GlobalArg("result_%d" % i, dtype, shape="ntgt", order="C") + lp.GlobalArg("result_%d" % i, dtype, shape="ntargets", order="C") for i, dtype in enumerate(self.value_dtypes) ] + gather_source_arguments(self.kernels)) @@ -109,8 +122,7 @@ class P2P(KernelComputation): for expr in exprs] loopy_knl = lp.make_kernel(self.device, - "{[isrc,itgt,idim]: 0<=itgt<ntgt and 0<=isrc<nsrc " - "and 0<=idim<%d}" % self.dim, + "{[isrc,itgt,idim]: 0<=itgt<ntargets and 0<=isrc<nsources and 0<=idim<dim}", self.get_kernel_scaling_assignments() + loopy_insns + [ @@ -119,14 +131,17 @@ class P2P(KernelComputation): lp.ExpressionInstruction(id=None, assignee="pair_result_%d" % i, expression=expr, temp_var_type=lp.auto) - for i, (expr, dtype) in enumerate(zip(exprs, self.value_dtypes)) + for i, expr in enumerate(exprs) ]+[ "result_${KNLIDX}[itgt] = knl_${KNLIDX}_scaling \ * sum(isrc, pair_result_${KNLIDX})" ], arguments, - name=self.name, assumptions="nsrc>=1 and ntgt>=1", - defines=dict(KNLIDX=range(len(exprs)))) + name=self.name, assumptions="nsources>=1 and ntargets>=1", + defines=dict( + KNLIDX=range(len(exprs)), + dim=self.dim, + )) for where in ["compute_a", "compute_b"]: loopy_knl = lp.duplicate_inames(loopy_knl, "idim", where) @@ -149,3 +164,105 @@ class P2P(KernelComputation): kwargs["strength_%d" % i] = sstr return knl(queue, src=sources, tgt=targets, **kwargs) + +# }}} + + +# {{{ P2P from CSR-like interaction list + +class P2PFromCSR(P2PBase): + # FIXME: exclude_self ...? + + @memoize_method + def get_kernel(self): + loopy_insns, result_names = self.get_loopy_insns_and_result_names() + + from pymbolic import var + exprs = [ + var(name) + * var("strength_%d" % self.strength_usage[i])[var("isrc")] + for i, name in enumerate(result_names)] + + from sumpy.tools import gather_source_arguments + arguments = ( + [ + lp.GlobalArg("box_target_starts,box_target_counts_nonchild," + "box_source_starts,box_source_counts_nonchild", + None, shape=None) + ]+[ + lp.GlobalArg("strength_%d" % i, None, shape="nsources") + for i in xrange(self.strength_count) + ]+[ + lp.GlobalArg("result_%d" % i, dtype, shape="ntargets") + for i, dtype in enumerate(self.value_dtypes) + ] + gather_source_arguments(self.kernels)) + + loopy_knl = lp.make_kernel(self.device, + [ + "{[itgt_box]: 0<=itgt_box<ntgt_boxes}", + "{[isrc_box]: isrc_box_start<=isrc_box<=isrc_box_end}", + "{[itgt,isrc,idim]: \ + itgt_start<=itgt<itgt_end and \ + isrc_start<=isrc<isrc_end and \ + 0<=idim<dim }", + ], + self.get_kernel_scaling_assignments() + + loopy_insns + [ + """ + <> tgt_ibox = target_boxes[itgt_box] + <> itgt_start = box_target_starts[itgt_box] + <> itgt_end = itgt_start+box_target_counts_nonchild[itgt_box] + + <> isrc_box_start = source_box_starts[itgt_box] + <> isrc_box_end = source_box_starts[itgt_box+1] + + <> src_ibox = source_box_lists[isrc_box] + <> isrc_start = box_source_starts[isrc_box] + <> isrc_end = isrc_start+box_source_counts_nonchild[isrc_box] + + <> d[idim] = targets[idim,itgt] - sources[idim,isrc] \ + {id=compute_d} + + result[${KNLIDX}, itgt] = result[${KNLIDX}, itgt] + \ + knl_${KNLIDX}_scaling \ + * sum(isrc, pair_result_${KNLIDX}) + """ + ]+[ + lp.ExpressionInstruction(id=None, + assignee="pair_result_%d" % i, expression=expr, + temp_var_type=lp.auto) + for i, expr in enumerate(exprs) + ]+[ + ], + arguments, + name=self.name, assumptions="nsources>=1 and ntargets>=1", + defines=dict( + KNLIDX=range(len(exprs)), + dim=self.dim, + )) + + for where in ["compute_a", "compute_b"]: + loopy_knl = lp.duplicate_inames(loopy_knl, "idim", where) + + for knl in self.kernels: + loopy_knl = knl.prepare_loopy_kernel(loopy_knl) + + return loopy_knl + + def get_optimized_kernel(self): + # FIXME + knl = self.get_kernel() + knl = lp.split_iname(knl, "itgt", 1024, outer_tag="g.0") + return knl + + def __call__(self, queue, targets, sources, src_strengths, **kwargs): + knl = self.get_optimized_kernel() + + for i, sstr in enumerate(src_strengths): + kwargs["strength_%d" % i] = sstr + + return knl(queue, src=sources, tgt=targets, **kwargs) + +# }}} + +# vim: foldmethod=marker diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 8ed6382711fda7e1db472c27daac86f247c61edd..99ad35e31571e8d7e7bc49a896af3b8cffa3866c 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -66,10 +66,10 @@ def expand(expansion_nr, sac, expansion, avec, bvec): class LayerPotentialBase(KernelComputation): def __init__(self, ctx, expansions, strength_usage=None, - value_dtypes=None, strength_dtypes=None, + value_dtypes=None, options=[], name="layerpot", device=None): KernelComputation.__init__(self, ctx, expansions, strength_usage, - value_dtypes, strength_dtypes, + value_dtypes, name, options, device) from pytools import single_valued @@ -122,18 +122,18 @@ class LayerPotentialBase(KernelComputation): arguments = ( [ lp.GlobalArg("src", None, - shape=(self.dim, "nsrc"), order="C"), + shape=(self.dim, "nsources"), order="C"), lp.GlobalArg("tgt", None, - shape=(self.dim, "ntgt"), order="C"), + shape=(self.dim, "ntargets"), order="C"), lp.GlobalArg("center", None, - shape=(self.dim, "ntgt"), order="C"), - lp.ValueArg("nsrc", None), - lp.ValueArg("ntgt", None), + shape=(self.dim, "ntargets"), order="C"), + lp.ValueArg("nsources", None), + lp.ValueArg("ntargets", None), ] + self.get_input_and_output_arguments() + gather_source_arguments(self.kernels)) loopy_knl = lp.make_kernel(self.device, - "{[isrc,itgt,idim]: 0<=itgt<ntgt and 0<=isrc<nsrc " + "{[isrc,itgt,idim]: 0<=itgt<ntargets and 0<=isrc<nsources " "and 0<=idim<%d}" % self.dim, [ "<> a[idim] = center[idim,itgt] - src[idim,isrc] {id=compute_a}", @@ -146,7 +146,7 @@ class LayerPotentialBase(KernelComputation): ]+self.get_result_store_instructions(), arguments, defines=dict(KNLIDX=range(len(exprs))), - name=self.name, assumptions="nsrc>=1 and ntgt>=1", + name=self.name, assumptions="nsources>=1 and ntargets>=1", ) for where in ["compute_a", "compute_b"]: @@ -193,11 +193,11 @@ class LayerPotential(LayerPotentialBase): def get_input_and_output_arguments(self): return [ - lp.GlobalArg("strength_%d" % i, None, shape="nsrc", order="C") - for i, dtype in enumerate(self.strength_dtypes) + lp.GlobalArg("strength_%d" % i, None, shape="nsources", order="C") + for i in xrange(self.strength_count) ]+[ - lp.GlobalArg("result_%d" % i, None, shape="ntgt", order="C") - for i, dtype in enumerate(self.value_dtypes) + lp.GlobalArg("result_%d" % i, None, shape="ntargets", order="C") + for i in xrange(len(self.kernels)) ] def get_result_store_instructions(self): @@ -232,7 +232,7 @@ class LayerPotentialMatrixGenerator(LayerPotentialBase): def get_input_and_output_arguments(self): return [ - lp.GlobalArg("result_%d" % i, dtype, shape="ntgt,nsrc", order="C") + lp.GlobalArg("result_%d" % i, dtype, shape="ntargets,nsources") for i, dtype in enumerate(self.value_dtypes) ] @@ -361,7 +361,7 @@ class _JumpTermSymbolicArgumentProvider(object): def density(self): self.arguments[self.density_var_name] = \ lp.GlobalArg(self.density_var_name, self.density_dtype, - shape="ntgt", order="C") + shape="ntargets", order="C") return parse("%s[itgt]" % self.density_var_name) @property @@ -370,14 +370,14 @@ class _JumpTermSymbolicArgumentProvider(object): prime_var_name = self.density_var_name+"_prime" self.arguments[prime_var_name] = \ lp.GlobalArg(prime_var_name, self.density_dtype, - shape="ntgt", order="C") + shape="ntargets", order="C") return parse("%s[itgt]" % prime_var_name) @property @memoize_method def side(self): self.arguments["side"] = \ - lp.GlobalArg("side", self.geometry_dtype, shape="ntgt", order="C") + lp.GlobalArg("side", self.geometry_dtype, shape="ntargets") return parse("side[itgt]") @property @@ -385,7 +385,7 @@ class _JumpTermSymbolicArgumentProvider(object): def normal(self): self.arguments["normal"] = \ lp.GlobalArg("normal", self.geometry_dtype, - shape=("ntgt", self.dim), order="C") + shape=("ntargets", self.dim), order="C") from pytools.obj_array import make_obj_array return make_obj_array([ parse("normal[itgt, %d]" % i) @@ -396,7 +396,7 @@ class _JumpTermSymbolicArgumentProvider(object): def tangent(self): self.arguments["tangent"] = \ lp.GlobalArg("tangent", self.geometry_dtype, - shape=("ntgt", self.dim), order="C") + shape=("ntargets", self.dim), order="C") from pytools.obj_array import make_obj_array return make_obj_array([ parse("tangent[itgt, %d]" % i) @@ -407,7 +407,7 @@ class _JumpTermSymbolicArgumentProvider(object): def mean_curvature(self): self.arguments["mean_curvature"] = \ lp.GlobalArg("mean_curvature", - self.geometry_dtype, shape="ntgt", + self.geometry_dtype, shape="ntargets", order="C") return parse("mean_curvature[itgt]") @@ -416,7 +416,7 @@ class _JumpTermSymbolicArgumentProvider(object): def src_derivative_dir(self): self.arguments["src_derivative_dir"] = \ lp.GlobalArg("src_derivative_dir", - self.geometry_dtype, shape=("ntgt", self.dim), + self.geometry_dtype, shape=("ntargets", self.dim), order="C") from pytools.obj_array import make_obj_array return make_obj_array([ @@ -428,7 +428,7 @@ class _JumpTermSymbolicArgumentProvider(object): def tgt_derivative_dir(self): self.arguments["tgt_derivative_dir"] = \ lp.GlobalArg("tgt_derivative_dir", - self.geometry_dtype, shape=("ntgt", self.dim), + self.geometry_dtype, shape=("ntargets", self.dim), order="C") from pytools.obj_array import make_obj_array return make_obj_array([ diff --git a/sumpy/tools.py b/sumpy/tools.py index f45a443ef5590b71e77f947a08bbb6a34c3f3bbc..df6dbf426ae1f597aa763f74030cf86ebd125d32 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -110,16 +110,15 @@ class KernelComputation(object): """Common input processing for kernel computations.""" def __init__(self, ctx, kernels, strength_usage, - value_dtypes, strength_dtypes, - name="kernel", options=[], device=None): + value_dtypes, name="kernel", options=[], device=None): """ :arg kernels: list of :class:`sumpy.kernel.Kernel` instances :class:`sumpy.kernel.TargetDerivative` wrappers should be the outermost kernel wrappers, if present. :arg strength_usage: A list of integers indicating which expression - uses which density. This implicitly specifies the - number of density arrays that need to be passed. - Default: all kernels use the same density. + uses which density. This implicitly specifies the + number of density arrays that need to be passed. + Default: all kernels use the same density. """ # {{{ process value_dtypes @@ -149,21 +148,6 @@ class KernelComputation(object): # }}} - # {{{ process strength_dtypes - - if strength_dtypes is None: - strength_dtypes = value_dtypes[0] - - if not isinstance(strength_dtypes, (list, tuple)): - strength_dtypes = [np.dtype(strength_dtypes)] * strength_count - - if len(strength_dtypes) != strength_count: - raise ValueError("exprs and strength_usage must have the same length") - - strength_dtypes = [np.dtype(dtype) for dtype in strength_dtypes] - - # }}} - if device is None: device = ctx.devices[0] @@ -173,7 +157,6 @@ class KernelComputation(object): self.kernels = kernels self.value_dtypes = value_dtypes self.strength_usage = strength_usage - self.strength_dtypes = strength_dtypes self.strength_count = strength_count self.name = name diff --git a/test/test_fmm.py b/test/test_fmm.py new file mode 100644 index 0000000000000000000000000000000000000000..800682af99f38b21558f7a3e7747617a72c78ae6 --- /dev/null +++ b/test/test_fmm.py @@ -0,0 +1,115 @@ +from __future__ import division + +__copyright__ = "Copyright (C) 2013 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import sys +import numpy as np +import numpy.linalg as la +import pyopencl as cl +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + +import logging +logger = logging.getLogger(__name__) + + +def no_test_sumpy_fmm(ctx_getter): + logging.basicConfig(level=logging.INFO) + + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + nsources = 3000 + ntargets = 1000 + dim = 2 + dtype = np.float64 + + from boxtree.tools import ( + make_normal_particle_array as p_normal) + + sources = p_normal(queue, nsources, dim, dtype, seed=15) + targets = ( + p_normal(queue, ntargets, dim, dtype, seed=18) + + np.array([2, 0])) + + from boxtree import TreeBuilder + tb = TreeBuilder(ctx) + + tree, _ = tb(queue, sources, targets=targets, + max_particles_in_box=30, debug=True) + + from boxtree.traversal import FMMTraversalBuilder + tbuild = FMMTraversalBuilder(ctx) + trav, _ = tbuild(queue, tree, debug=True) + + trav = trav.get() + + from pyopencl.clrandom import RanluxGenerator + rng = RanluxGenerator(queue, seed=20) + + weights = rng.uniform(queue, nsources, dtype=np.float64) + #weights = np.ones(nsources) + + logger.info("computing direct (reference) result") + + from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansion + from sumpy.expansion.local import VolumeTaylorLocalExpansion + from sumpy.kernel import LaplaceKernel + + order = 3 + knl = LaplaceKernel(dim) + mpole_expn = VolumeTaylorMultipoleExpansion(knl, order) + local_expn = VolumeTaylorLocalExpansion(knl, order) + out_kernels = [knl] + + from sumpy.fmm import SumpyExpansionWranglerCodeContainer + wcc = SumpyExpansionWranglerCodeContainer( + ctx, tree, mpole_expn, local_expn, out_kernels) + wrangler = wcc.get_wrangler(queue, np.float64) + + from boxtree.fmm import drive_fmm + pot = drive_fmm(trav, wrangler, weights) + + from sumpy import P2P + p2p = P2P(ctx, out_kernels, exclude_self=False) + ref_pot = p2p(queue, sources, targets) + + pot = pot.get() + ref_pot = ref_pot.get() + + rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot) + logger.info("relative l2 error: %g" % rel_err) + assert rel_err < 1e-5 + + +# You can test individual routines by typing +# $ python test_fmm.py 'test_sumpy_fmm(cl.create_some_context)' + +if __name__ == "__main__": + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from py.test.cmdline import main + main([__file__]) + +# vim: fdm=marker diff --git a/test/test_kernels.py b/test/test_kernels.py index d40911446bd570cc3bb310748dbfa6c126b24190..6ab925436ba0374e77c1b9cc95aff3c8d34281e7 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -25,7 +25,6 @@ THE SOFTWARE. import numpy as np import numpy.linalg as la import sys -import pytools.test import pytest import pyopencl as cl @@ -48,7 +47,6 @@ else: faulthandler.enable() -@pytest.mark.opencl def test_p2p(ctx_getter): ctx = ctx_getter() queue = cl.CommandQueue(ctx) @@ -88,7 +86,6 @@ def test_p2p(ctx_getter): assert rel_err < 1e-3 -@pytools.test.mark_test.opencl @pytest.mark.parametrize("order", [2, 3, 4, 5]) @pytest.mark.parametrize(("knl", "expn_class"), [ (LaplaceKernel(2), VolumeTaylorLocalExpansion), @@ -129,9 +126,9 @@ def test_p2e2p(ctx_getter, knl, expn_class, order, with_source_derivative): ] expn = expn_class(knl, order=order) - from sumpy import P2E, E2P, P2P - p2e = P2E(ctx, expn, out_kernels) - e2p = E2P(ctx, expn, out_kernels) + from sumpy import P2EFromLocal, E2PFromLocal, P2P + p2e = P2EFromLocal(ctx, expn, out_kernels) + e2p = E2PFromLocal(ctx, expn, out_kernels) p2p = P2P(ctx, out_kernels, exclude_self=False) from pytools.convergence import EOCRecorder @@ -185,6 +182,8 @@ def test_p2e2p(ctx_getter, knl, expn_class, order, with_source_derivative): centers=centers, sources=sources, strengths=strengths, + nboxes=1, + #flags="print_hl_cl", out_host=True, **extra_source_kwargs) @@ -325,7 +324,6 @@ def test_p_convergence_verifier(): pconv_verifier() -@pytools.test.mark_test.opencl @pytest.mark.parametrize("knl", [ LaplaceKernel(2), HelmholtzKernel(2) @@ -385,17 +383,18 @@ def test_translations(ctx_getter, knl): orders = [2, 3] else: orders = [2, 3, 4, 5] + nboxes = centers.shape[-1] for order in orders: m_expn = VolumeTaylorMultipoleExpansion(knl, order=order) l_expn = VolumeTaylorLocalExpansion(knl, order=order) - from sumpy import P2E, E2P, P2P, E2E - p2m = P2E(ctx, m_expn) - m2m = E2E(ctx, m_expn, m_expn) - m2l = E2E(ctx, m_expn, l_expn) - l2l = E2E(ctx, l_expn, l_expn) - l2p = E2P(ctx, l_expn, out_kernels) + from sumpy import P2EFromLocal, E2PFromLocal, P2P, E2EFromCSR + p2m = P2EFromLocal(ctx, m_expn) + m2m = E2EFromCSR(ctx, m_expn, m_expn) + m2l = E2EFromCSR(ctx, m_expn, l_expn) + l2l = E2EFromCSR(ctx, l_expn, l_expn) + l2p = E2PFromLocal(ctx, l_expn, out_kernels) p2p = P2P(ctx, out_kernels, exclude_self=False) fp = FieldPlotter(centers[:, -1], extent=0.3, npoints=res) @@ -414,6 +413,8 @@ def test_translations(ctx_getter, knl): centers=centers, sources=sources, strengths=strengths, + nboxes=nboxes, + #flags="print_hl_wrapper", out_host=True, **extra_kwargs)