diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4691a6cc652d936c9fc126ac947c737e1866e33a..e99a76b0c69f98d9cb560cb4744a8bcd8b4e33db 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -88,8 +88,8 @@ jobs: env: DOWNSTREAM_PROJECT: ${{ matrix.downstream_project }} run: | - if [[ "$DOWNSTREAM_PROJECT" = "pytential" ]] && [[ "$GITHUB_HEAD_REF" = "derivtaker" ]]; then - git clone "https://github.com/isuruf/$DOWNSTREAM_PROJECT.git" -b "$GITHUB_HEAD_REF" + if [[ "$DOWNSTREAM_PROJECT" = "pytential" ]] && [[ "$GITHUB_HEAD_REF" = "multiplier" ]]; then + git clone "https://github.com/isuruf/$DOWNSTREAM_PROJECT.git" -b "remover" else git clone "https://github.com/inducer/$DOWNSTREAM_PROJECT.git" fi diff --git a/sumpy/e2e.py b/sumpy/e2e.py index c3d7aca6301de8882699cbd56935e7a3916a9b5c..dabb35b292bffb94500cbec052d12c36591ba8cd 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -62,20 +62,22 @@ class E2EBase(KernelCacheWrapper): device = ctx.devices[0] if src_expansion is tgt_expansion: - from sumpy.kernel import TargetDerivativeRemover, SourceDerivativeRemover + from sumpy.kernel import (TargetTransformationRemover, + SourceTransformationRemover) tgt_expansion = src_expansion = src_expansion.with_kernel( - SourceDerivativeRemover()( - TargetDerivativeRemover()(src_expansion.kernel))) + SourceTransformationRemover()( + TargetTransformationRemover()(src_expansion.kernel))) else: - from sumpy.kernel import TargetDerivativeRemover, SourceDerivativeRemover + from sumpy.kernel import (TargetTransformationRemover, + SourceTransformationRemover) src_expansion = src_expansion.with_kernel( - SourceDerivativeRemover()( - TargetDerivativeRemover()(src_expansion.kernel))) + SourceTransformationRemover()( + TargetTransformationRemover()(src_expansion.kernel))) tgt_expansion = tgt_expansion.with_kernel( - SourceDerivativeRemover()( - TargetDerivativeRemover()(tgt_expansion.kernel))) + SourceTransformationRemover()( + TargetTransformationRemover()(tgt_expansion.kernel))) self.ctx = ctx self.src_expansion = src_expansion diff --git a/sumpy/e2p.py b/sumpy/e2p.py index a1bbda8774d416150fc1278d453791e206a1736c..2a4ff531c0522514bb43cbb8267bc411ea9e05fa 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -56,15 +56,16 @@ class E2PBase(KernelCacheWrapper): if device is None: device = ctx.devices[0] - from sumpy.kernel import SourceDerivativeRemover, TargetDerivativeRemover - sdr = SourceDerivativeRemover() - tdr = TargetDerivativeRemover() + from sumpy.kernel import (SourceTransformationRemover, + TargetTransformationRemover) + sxr = SourceTransformationRemover() + txr = TargetTransformationRemover() expansion = expansion.with_kernel( - sdr(expansion.kernel)) + sxr(expansion.kernel)) - kernels = [sdr(knl) for knl in kernels] + kernels = [sxr(knl) for knl in kernels] for knl in kernels: - assert tdr(knl) == expansion.kernel + assert txr(knl) == expansion.kernel self.ctx = ctx self.expansion = expansion @@ -99,7 +100,8 @@ class E2PBase(KernelCacheWrapper): loopy_insns = to_loopy_insns( sac.assignments.items(), vector_names={"b"}, - pymbolic_expr_maps=[self.expansion.get_code_transformer()], + pymbolic_expr_maps=[ + knl.get_code_transformer() for knl in self.kernels], retain_names=result_names, complex_dtype=np.complex128 # FIXME ) diff --git a/sumpy/kernel.py b/sumpy/kernel.py index b7559c2b9db8bedaeffaa28c28346c6dd125010d..af771ceb560cadf7a585d42ed18cd0e45c5506ef 100644 --- a/sumpy/kernel.py +++ b/sumpy/kernel.py @@ -1203,6 +1203,70 @@ class DirectionalSourceDerivative(DirectionalDerivative): mapper_method = "map_directional_source_derivative" + +class TargetPointMultiplier(KernelWrapper): + """Wraps a kernel :math:`G(x, y)` and outputs :math:`x_j G(x, y)` + where :math:`x, y` are targets and sources respectively. + """ + + init_arg_names = ("axis", "inner_kernel") + target_array_name = "targets" + + def __init__(self, axis, inner_kernel): + KernelWrapper.__init__(self, inner_kernel) + self.axis = axis + + def __getinitargs__(self): + return (self.axis, self.inner_kernel) + + def __str__(self): + return "x%d %s" % (self.axis, self.inner_kernel) + + def __repr__(self): + return "TargetPointMultiplier(%d, %r)" % (self.axis, self.inner_kernel) + + def replace_base_kernel(self, new_base_kernel): + return type(self)(self.axis, + self.inner_kernel.replace_base_kernel(new_base_kernel)) + + def replace_inner_kernel(self, new_inner_kernel): + return type(self)(self.axis, new_inner_kernel) + + def postprocess_at_target(self, expr, avec): + from sumpy.symbolic import make_sym_vector as make_sympy_vector + from sumpy.tools import (ExprDerivativeTaker, + DifferentiatedExprDerivativeTaker) + + expr = self.inner_kernel.postprocess_at_target(expr, avec) + target_vec = make_sympy_vector(self.target_array_name, self.dim) + + if isinstance(expr, + (ExprDerivativeTaker, DifferentiatedExprDerivativeTaker)): + class DerivativeTakerWrapper: + def __init__(self, taker, vec, axis): + self.taker = taker + self.vec = vec + self.axis = axis + + def diff(self, *args, **kwargs): + return self.vec[self.axis] * self.taker.diff(*args, **kwargs) + + return DerivativeTakerWrapper(expr, target_vec, self.axis) + return target_vec[self.axis] * expr + + def get_code_transformer(self): + from sumpy.codegen import VectorComponentRewriter + vcr = VectorComponentRewriter([self.target_array_name]) + from pymbolic.primitives import Variable + via = _VectorIndexAdder(self.target_array_name, (Variable("itgt"),)) + + def transform(expr): + return via(vcr(expr)) + + return transform + + mapper_method = "map_target_point_multiplier" + # }}} @@ -1233,6 +1297,7 @@ class KernelCombineMapper(KernelMapper): map_directional_target_derivative = map_axis_target_derivative map_directional_source_derivative = map_axis_target_derivative map_axis_source_derivative = map_axis_target_derivative + map_target_point_multiplier = map_axis_target_derivative class KernelIdentityMapper(KernelMapper): @@ -1251,6 +1316,7 @@ class KernelIdentityMapper(KernelMapper): return type(kernel)(kernel.axis, self.rec(kernel.inner_kernel)) map_axis_source_derivative = map_axis_target_derivative + map_target_point_multiplier = map_axis_target_derivative def map_directional_target_derivative(self, kernel): return type(kernel)( @@ -1280,6 +1346,14 @@ class SourceDerivativeRemover(AxisSourceDerivativeRemover): return self.rec(kernel.inner_kernel) +class TargetTransformationRemover(TargetDerivativeRemover): + def map_target_point_multiplier(self, kernel): + return self.rec(kernel.inner_kernel) + + +SourceTransformationRemover = SourceDerivativeRemover + + class DerivativeCounter(KernelCombineMapper): def combine(self, values): return max(values) diff --git a/sumpy/p2e.py b/sumpy/p2e.py index 6b149c7f2b5e398e4bf3135255a7f26dccd55e4e..b1214603d3a84f3c67ca274d798cd943b28a539b 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -56,20 +56,21 @@ class P2EBase(KernelComputation, KernelCacheWrapper): number of strength arrays that need to be passed. Default: all kernels use the same strength. """ - from sumpy.kernel import TargetDerivativeRemover, SourceDerivativeRemover - tdr = TargetDerivativeRemover() - sdr = SourceDerivativeRemover() + from sumpy.kernel import (TargetTransformationRemover, + SourceTransformationRemover) + txr = TargetTransformationRemover() + sxr = SourceTransformationRemover() if kernels is None: - kernels = [tdr(expansion.kernel)] + kernels = [txr(expansion.kernel)] else: kernels = kernels - expansion = expansion.with_kernel(sdr(tdr(expansion.kernel))) + expansion = expansion.with_kernel(sxr(txr(expansion.kernel))) for knl in kernels: - assert tdr(knl) == knl - assert sdr(knl) == expansion.kernel + assert txr(knl) == knl + assert sxr(knl) == expansion.kernel KernelComputation.__init__(self, ctx=ctx, target_kernels=[], source_kernels=kernels, diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 3fef8c1edbb50edaf052e80c6ba7a8f93cc77433..2ccfff3695def7bcc7337271e32a0f4fd0b0fcb5 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -64,22 +64,22 @@ class P2PBase(KernelComputation, KernelCacheWrapper): Default: all kernels use the same strength. """ from pytools import single_valued - from sumpy.kernel import (TargetDerivativeRemover, - SourceDerivativeRemover) - tdr = TargetDerivativeRemover() - sdr = SourceDerivativeRemover() + from sumpy.kernel import (TargetTransformationRemover, + SourceTransformationRemover) + txr = TargetTransformationRemover() + sxr = SourceTransformationRemover() if source_kernels is None: - source_kernels = [single_valued(tdr(knl) for knl in target_kernels)] - target_kernels = [sdr(knl) for knl in target_kernels] + source_kernels = [single_valued(txr(knl) for knl in target_kernels)] + target_kernels = [sxr(knl) for knl in target_kernels] else: for knl in source_kernels: - assert(tdr(knl) == knl) + assert(txr(knl) == knl) for knl in target_kernels: - assert(sdr(knl) == knl) + assert(sxr(knl) == knl) - base_source_kernel = single_valued(sdr(knl) for knl in source_kernels) - base_target_kernel = single_valued(tdr(knl) for knl in target_kernels) + base_source_kernel = single_valued(sxr(knl) for knl in source_kernels) + base_target_kernel = single_valued(txr(knl) for knl in target_kernels) assert base_source_kernel == base_target_kernel KernelComputation.__init__(self, ctx=ctx, target_kernels=target_kernels, diff --git a/sumpy/qbx.py b/sumpy/qbx.py index f530cd2b9f484efc2e9b0f434e7b41c0cd95aa6c..c6fc4383986bea787010550ca2ab255247bd660d 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -167,9 +167,9 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper): def get_default_src_tgt_arguments(self): from sumpy.tools import gather_loopy_source_arguments return ([ - lp.GlobalArg("src", None, + lp.GlobalArg("sources", None, shape=(self.dim, "nsources"), order="C"), - lp.GlobalArg("tgt", None, + lp.GlobalArg("targets", None, shape=(self.dim, "ntargets"), order="C"), lp.GlobalArg("center", None, shape=(self.dim, "ntargets"), order="C"), @@ -183,29 +183,33 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper): raise NotImplementedError def get_optimized_kernel(self, - targets_is_obj_array, sources_is_obj_array, centers_is_obj_array): + targets_is_obj_array, sources_is_obj_array, centers_is_obj_array, + # Used by pytential to override the name of the loop to be + # parallelized. In the case of QBX, that's the loop over QBX + # targets (not global targets). + itgt_name="itgt"): # FIXME specialize/tune for GPU/CPU loopy_knl = self.get_kernel() if targets_is_obj_array: - loopy_knl = lp.tag_array_axes(loopy_knl, "tgt", "sep,C") + loopy_knl = lp.tag_array_axes(loopy_knl, "targets", "sep,C") if sources_is_obj_array: - loopy_knl = lp.tag_array_axes(loopy_knl, "src", "sep,C") + loopy_knl = lp.tag_array_axes(loopy_knl, "sources", "sep,C") if centers_is_obj_array: loopy_knl = lp.tag_array_axes(loopy_knl, "center", "sep,C") import pyopencl as cl dev = self.context.devices[0] if dev.type & cl.device_type.CPU: - loopy_knl = lp.split_iname(loopy_knl, "itgt", 16, outer_tag="g.0", + loopy_knl = lp.split_iname(loopy_knl, itgt_name, 16, outer_tag="g.0", inner_tag="l.0") loopy_knl = lp.split_iname(loopy_knl, "isrc", 256) loopy_knl = lp.prioritize_loops(loopy_knl, - ["isrc_outer", "itgt_inner"]) + ["isrc_outer", f"{itgt_name}_inner"]) else: from warnings import warn warn("don't know how to tune layer potential computation for '%s'" % dev) - loopy_knl = lp.split_iname(loopy_knl, "itgt", 128, outer_tag="g.0") + loopy_knl = lp.split_iname(loopy_knl, itgt_name, 128, outer_tag="g.0") loopy_knl = self._allow_redundant_execution_of_knl_scaling(loopy_knl) return loopy_knl @@ -244,8 +248,8 @@ class LayerPotential(LayerPotentialBase): """], self.get_kernel_scaling_assignments() + ["for itgt, isrc"] - + ["<> a[idim] = center[idim, itgt] - src[idim, isrc] {dup=idim}"] - + ["<> b[idim] = tgt[idim, itgt] - center[idim, itgt] {dup=idim}"] + + ["<> a[idim] = center[idim, itgt] - sources[idim, isrc] {dup=idim}"] + + ["<> b[idim] = targets[idim, itgt] - center[idim, itgt] {dup=idim}"] + ["<> rscale = expansion_radii[itgt]"] + [f"<> strength_{i}_isrc = strength_{i}[isrc]" for i in range(self.strength_count)] @@ -286,7 +290,7 @@ class LayerPotential(LayerPotentialBase): for i, dens in enumerate(strengths): kwargs["strength_%d" % i] = dens - return knl(queue, src=sources, tgt=targets, center=centers, + return knl(queue, sources=sources, targets=targets, center=centers, expansion_radii=expansion_radii, **kwargs) # }}} @@ -320,8 +324,8 @@ class LayerPotentialMatrixGenerator(LayerPotentialBase): """], self.get_kernel_scaling_assignments() + ["for itgt, isrc"] - + ["<> a[idim] = center[idim, itgt] - src[idim, isrc] {dup=idim}"] - + ["<> b[idim] = tgt[idim, itgt] - center[idim, itgt] {dup=idim}"] + + ["<> a[idim] = center[idim, itgt] - sources[idim, isrc] {dup=idim}"] + + ["<> b[idim] = targets[idim, itgt] - center[idim, itgt] {dup=idim}"] + ["<> rscale = expansion_radii[itgt]"] + loopy_insns + kernel_exprs + [""" @@ -350,7 +354,7 @@ class LayerPotentialMatrixGenerator(LayerPotentialBase): sources_is_obj_array=is_obj_array_like(sources), centers_is_obj_array=is_obj_array_like(centers)) - return knl(queue, src=sources, tgt=targets, center=centers, + return knl(queue, sources=sources, targets=targets, center=centers, expansion_radii=expansion_radii, **kwargs) # }}} @@ -395,8 +399,8 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): <> itgt = tgtindices[imat] <> isrc = srcindices[imat] - <> a[idim] = center[idim, itgt] - src[idim, isrc] {dup=idim} - <> b[idim] = tgt[idim, itgt] - center[idim, itgt] {dup=idim} + <> a[idim] = center[idim, itgt] - sources[idim, isrc] {dup=idim} + <> b[idim] = targets[idim, itgt] - center[idim, itgt] {dup=idim} <> rscale = expansion_radii[itgt] """] + loopy_insns + kernel_exprs @@ -428,9 +432,9 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): loopy_knl = self.get_kernel() if targets_is_obj_array: - loopy_knl = lp.tag_array_axes(loopy_knl, "tgt", "sep,C") + loopy_knl = lp.tag_array_axes(loopy_knl, "targets", "sep,C") if sources_is_obj_array: - loopy_knl = lp.tag_array_axes(loopy_knl, "src", "sep,C") + loopy_knl = lp.tag_array_axes(loopy_knl, "sources", "sep,C") if centers_is_obj_array: loopy_knl = lp.tag_array_axes(loopy_knl, "center", "sep,C") @@ -456,8 +460,8 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): centers_is_obj_array=is_obj_array_like(centers)) return knl(queue, - src=sources, - tgt=targets, + sources=sources, + targets=targets, center=centers, expansion_radii=expansion_radii, tgtindices=index_set.linear_row_indices, diff --git a/sumpy/toys.py b/sumpy/toys.py index 759a765a38e15ee4ff15ce93ae91d7419fcb2afb..50b4f2926042bec451c1abd92c37def0a2ffa46d 100644 --- a/sumpy/toys.py +++ b/sumpy/toys.py @@ -25,7 +25,7 @@ THE SOFTWARE. from pytools import memoize_method from numbers import Number -from sumpy.kernel import TargetDerivativeRemover +from sumpy.kernel import TargetTransformationRemover import numpy as np # noqa: F401 import loopy as lp # noqa: F401 @@ -94,7 +94,7 @@ class ToyContext: self.queue = cl.CommandQueue(self.cl_context) self.kernel = kernel - self.no_target_deriv_kernel = TargetDerivativeRemover()(kernel) + self.no_target_deriv_kernel = TargetTransformationRemover()(kernel) if expansion_factory is None: from sumpy.expansion import DefaultExpansionFactory diff --git a/test/test_fmm.py b/test/test_fmm.py index e411b71994faa997122735bd9adc221b89fea2e9..6b3bac39d106f53807ab8013f465449b2d249408 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -484,6 +484,73 @@ def test_sumpy_axis_source_derivative(ctx_factory): assert np.isclose(rel_err, 0, atol=1e-5) +def test_sumpy_target_point_multiplier(ctx_factory): + logging.basicConfig(level=logging.INFO) + + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + nsources = 500 + dtype = np.float64 + + from boxtree.tools import ( + make_normal_particle_array as p_normal) + + knl = LaplaceKernel(2) + local_expn_class = VolumeTaylorLocalExpansion + mpole_expn_class = VolumeTaylorMultipoleExpansion + order = 5 + + sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) + + from boxtree import TreeBuilder + tb = TreeBuilder(ctx) + + tree, _ = tb(queue, sources, + max_particles_in_box=30, debug=True) + + from boxtree.traversal import FMMTraversalBuilder + tbuild = FMMTraversalBuilder(ctx) + trav, _ = tbuild(queue, tree, debug=True) + + from pyopencl.clrandom import PhiloxGenerator + rng = PhiloxGenerator(ctx, seed=12) + weights = rng.uniform(queue, nsources, dtype=np.float64) + + target_to_source = np.arange(tree.ntargets, dtype=np.int32) + self_extra_kwargs = {"target_to_source": target_to_source} + + from functools import partial + + from sumpy.fmm import SumpyExpansionWranglerCodeContainer + from sumpy.kernel import TargetPointMultiplier + + tgt_knls = [TargetPointMultiplier(0, knl), knl] + + wcc = SumpyExpansionWranglerCodeContainer( + ctx, + partial(mpole_expn_class, knl), + partial(local_expn_class, knl), + target_kernels=tgt_knls, + source_kernels=[knl], + exclude_self=True) + + wrangler = wcc.get_wrangler(queue, tree, dtype, + fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order, + self_extra_kwargs=self_extra_kwargs) + + from boxtree.fmm import drive_fmm + + pot0, pot1 = drive_fmm(trav, wrangler, (weights,)) + pot0, pot1 = pot0.get(), pot1.get() + pot1 = pot1 * sources[0].get() + + rel_err = la.norm(pot0 - pot1) / la.norm(pot1) + logger.info("order %d -> relative l2 error: %g" % (order, rel_err)) + + assert np.isclose(rel_err, 0, atol=1e-5) + + # You can test individual routines by typing # $ python test_fmm.py 'test_sumpy_fmm(cl.create_some_context)'