diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 4b91b85eb8dc663693868f53e5b53eb8c9447690..96ae097c8a41d02abd3ddeb81d68f9751520ec03 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -27,6 +27,7 @@ from six.moves import range import numpy as np import loopy as lp +from pymbolic import var from sumpy.tools import KernelComputation, KernelCacheWrapper @@ -48,7 +49,7 @@ Particle-to-particle # {{{ p2p base class -class P2PBase(KernelComputation, KernelCacheWrapper): +class P2PComputationBase(KernelComputation, KernelCacheWrapper): def __init__(self, ctx, kernels, exclude_self, strength_usage=None, value_dtypes=None, options=[], name=None, device=None): @@ -107,16 +108,24 @@ class P2PBase(KernelComputation, KernelCacheWrapper): # {{{ P2P with list of sources and list of targets -class P2P(P2PBase): - default_name = "p2p" +class P2PBase(P2PComputationBase): + def get_src_tgt_arguments(self): + return [ + lp.GlobalArg("sources", None, + shape=(self.dim, "nsources")), + lp.GlobalArg("targets", None, + shape=(self.dim, "ntargets")), + lp.ValueArg("nsources", None), + lp.ValueArg("ntargets", None) + ] def get_kernel(self): loopy_insns, result_names = self.get_loopy_insns_and_result_names() - from pymbolic import var + isrc_sym = var("isrc") exprs = [ var(name) - * var("strength").index((self.strength_usage[i], var("isrc"))) + * self.get_strength_or_not(isrc_sym, i) for i, name in enumerate(result_names)] if self.exclude_self: @@ -124,46 +133,31 @@ class P2P(P2PBase): exprs = [If(Variable("is_self"), 0, expr) for expr in exprs] from sumpy.tools import gather_loopy_source_arguments + arguments = ( + self.get_src_tgt_arguments() + + self.get_input_and_output_arguments() + + ([lp.GlobalArg("target_to_source", np.int32, shape=("ntargets",))] + if self.exclude_self else []) + + gather_loopy_source_arguments(self.kernels)) + loopy_knl = lp.make_kernel( "{[isrc,itgt,idim]: 0<=itgt d[idim] = targets[idim,itgt] - sources[idim,isrc] - """] + [""" - <> is_self = (isrc == target_to_source[itgt]) - """ if self.exclude_self else ""] + [ - lp.Assignment(id=None, - assignee="pair_result_%d" % i, expression=expr, - temp_var_type=lp.auto) - for i, expr in enumerate(exprs) - ] + [""" - end - """] + [""" - result[KNLIDX, itgt] = knl_KNLIDX_scaling \ - * simul_reduce(sum, isrc, pair_result_KNLIDX) - """.replace("KNLIDX", str(iknl)) - for iknl in range(len(exprs))] + [ - ] + [""" - end - """], - [ - lp.GlobalArg("sources", None, - shape=(self.dim, "nsources")), - lp.GlobalArg("targets", None, - shape=(self.dim, "ntargets")), - lp.ValueArg("nsources", None), - lp.ValueArg("ntargets", None), - lp.GlobalArg("strength", None, shape="nstrengths,nsources"), - lp.GlobalArg("result", None, - shape="nresults,ntargets", dim_tags="sep,C") - ] + ( - [lp.GlobalArg("target_to_source", np.int32, shape=("ntargets",))] - if self.exclude_self else [] - ) + gather_loopy_source_arguments(self.kernels), + + ["for itgt, isrc"] + + loopy_insns + + ["<> d[idim] = targets[idim,itgt] - sources[idim,isrc]"] + + ["<> is_self = (isrc == target_to_source[itgt])" + if self.exclude_self else ""] + + [ + lp.Assignment(id=None, + assignee="pair_result_%d" % i, expression=expr, + temp_var_type=lp.auto) + for i, expr in enumerate(exprs) + ] + + ["end"] + + self.get_result_store_instructions(), + arguments, name=self.name, assumptions="nsources>=1 and ntargets>=1", fixed_parameters=dict( @@ -176,8 +170,6 @@ class P2P(P2PBase): for knl in self.kernels: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) - loopy_knl = lp.tag_array_axes(loopy_knl, "strength", "sep,C") - return loopy_knl def get_optimized_kernel(self, targets_is_obj_array, sources_is_obj_array): @@ -192,9 +184,35 @@ class P2P(P2PBase): knl = lp.split_iname(knl, "itgt", 1024, outer_tag="g.0") return knl + +# }}} + +# {{{ + +class P2P(P2PBase): + default_name = "p2p_apply" + + def get_strength_or_not(self, isrc, kernel_idx): + return var("strength").index((self.strength_usage[kernel_idx], isrc)) + + def get_input_and_output_arguments(self): + return [ + lp.GlobalArg("strength", None, + shape="nstrengths,nsources", dim_tags="sep,C"), + lp.GlobalArg("result", None, + shape="nresults,ntargets", dim_tags="sep,C") + ] + + def get_result_store_instructions(self): + return [""" + result[{i}, itgt] = knl_{i}_scaling \ + * simul_reduce(sum, isrc, pair_result_{i}) + """.format(i=iknl) + for iknl in range(len(self.kernels))] + def __call__(self, queue, targets, sources, strength, **kwargs): from pytools.obj_array import is_obj_array - knl = self.get_cached_optimized_kernel( + knl = self.get_optimized_kernel( targets_is_obj_array=( is_obj_array(targets) or isinstance(targets, (tuple, list))), sources_is_obj_array=( @@ -205,10 +223,44 @@ class P2P(P2PBase): # }}} +# {{{ P2P Matrix Writer + +class P2PMatrixGenerator(P2PBase): + default_name = "p2p_matrix" + + def get_strength_or_not(self, isrc, kernel_idx): + return 1 + + def get_input_and_output_arguments(self): + return [ + lp.GlobalArg("result_%d" % i, dtype, shape="ntargets,nsources") + for i, dtype in enumerate(self.value_dtypes) + ] + + def get_result_store_instructions(self): + return [ + """ + result_{i}[itgt, isrc] = \ + knl_{i}_scaling * pair_result_{i} + """.format(i=iknl) + for iknl in range(len(self.kernels)) + ] + + def __call__(self, queue, targets, sources, **kwargs): + from pytools.obj_array import is_obj_array + knl = self.get_optimized_kernel( + targets_is_obj_array=( + is_obj_array(targets) or isinstance(targets, (tuple, list))), + sources_is_obj_array=( + is_obj_array(sources) or isinstance(sources, (tuple, list)))) + + return knl(queue, sources=sources, targets=targets, **kwargs) + +# }}} # {{{ P2P from CSR-like interaction list -class P2PFromCSR(P2PBase): +class P2PFromCSR(P2PComputationBase): default_name = "p2p_from_csr" def get_kernel(self): diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py new file mode 100644 index 0000000000000000000000000000000000000000..161ec6f14620ed2c386ae4650d785cf6c5e7b801 --- /dev/null +++ b/test/test_matrixgen.py @@ -0,0 +1,171 @@ +from __future__ import division, absolute_import, print_function + +__copyright__ = "Copyright (C) 2017 Matt Wala" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import numpy as np +import sys + +import pytest +import pyopencl as cl +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + +import logging +logger = logging.getLogger(__name__) + +try: + import faulthandler +except ImportError: + pass +else: + faulthandler.enable() + + +def create_arguments(n, mode): + # parametrize circle + t = np.linspace(0.0, 2.0 * np.pi, n, endpoint=False) + unit_circle = np.exp(1j * t) + unit_circle = np.array([unit_circle.real, unit_circle.imag]) + + # create density + sigma = np.cos(mode * t) + + # create sources and targets + h = 2.0 * np.pi / n + targets = unit_circle + sources = unit_circle + + radius = 7.0 * h + centers = unit_circle * (1.0 - radius) + expansion_radii = radius * np.ones(n) + + return targets, sources, centers, sigma, expansion_radii + + +def test_qbx_direct(ctx_getter): + # This evaluates a single layer potential on a circle. + logging.basicConfig(level=logging.INFO) + + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + from sumpy.kernel import LaplaceKernel + lknl = LaplaceKernel(2) + + order = 12 + mode_nr = 25 + + from sumpy.qbx import LayerPotential + from sumpy.qbx import LayerPotentialMatrixGenerator + from sumpy.expansion.local import LineTaylorLocalExpansion + mat_gen = LayerPotentialMatrixGenerator(ctx, + [LineTaylorLocalExpansion(lknl, order)]) + lpot = LayerPotential(ctx, [LineTaylorLocalExpansion(lknl, order)]) + + from pytools.convergence import EOCRecorder + eocrec = EOCRecorder() + + for n in [200, 300, 400]: + targets, sources, centers, sigma, expansion_radii = \ + create_arguments(n, mode_nr) + + eigval = 1.0 / (2.0 * mode_nr) + result_ref = eigval * sigma + + h = 2 * np.pi / n + strengths = (sigma * h,) + + _, (mat,) = mat_gen(queue, targets, sources, centers, + expansion_radii=expansion_radii) + result_mat = mat @ strengths[0] + + _, (result_lpot,) = lpot(queue, targets, sources, centers, strengths, + expansion_radii=expansion_radii) + + assert np.max(np.abs(result_mat - result_lpot)) < 1.0e-14 + eocrec.add_data_point(h, np.max(np.abs(result_ref - result_lpot))) + + print(eocrec) + + slack = 1.5 + assert eocrec.order_estimate() > order - slack + + +@pytest.mark.parametrize("exclude_self", [True, False]) +def test_p2p_direct(ctx_getter, exclude_self): + # This evaluates a single layer potential on a circle. + logging.basicConfig(level=logging.INFO) + + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + from sumpy.kernel import LaplaceKernel + lknl = LaplaceKernel(2) + + mode_nr = 25 + + from sumpy.p2p import P2P + from sumpy.p2p import P2PMatrixGenerator + mat_gen = P2PMatrixGenerator(ctx, [lknl], exclude_self=exclude_self) + lpot = P2P(ctx, [lknl], exclude_self=exclude_self) + + from pytools.convergence import EOCRecorder + eocrec = EOCRecorder() + + for n in [200, 300, 400]: + targets, sources, _, sigma, _ = create_arguments(n, mode_nr) + + eigval = 1.0 / (2.0 * mode_nr) + result_ref = eigval * sigma + + h = 2 * np.pi / n + strengths = (sigma * h,) + + extra_kwargs = {} + if exclude_self: + extra_kwargs["target_to_source"] = np.arange(n, dtype=np.int32) + + _, (mat,) = mat_gen(queue, targets, sources, **extra_kwargs) + result_mat = mat @ strengths[0] + + _, (result_lpot,) = lpot(queue, targets, sources, strengths, + **extra_kwargs) + + assert np.max(np.abs(result_mat - result_lpot)) < 1.0e-14 + eocrec.add_data_point(h, np.max(np.abs(result_ref - result_lpot))) + + print(eocrec) + + assert eocrec.order_estimate() > 0.8 + +# You can test individual routines by typing +# $ python test_kernels.py 'test_p2p(cl.create_some_context)' + +if __name__ == "__main__": + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from py.test.cmdline import main + main([__file__]) + +# vim: fdm=marker