diff --git a/sumpy/p2e.py b/sumpy/p2e.py index b1214603d3a84f3c67ca274d798cd943b28a539b..8105ac5388f7f6afb6b6ef9c92a4ba2afc1c1bc3 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -32,29 +32,35 @@ logger = logging.getLogger(__name__) __doc__ = """ -Particle-to-expansion +Particle-to-Expansion --------------------- .. autoclass:: P2EBase .. autoclass:: P2EFromSingleBox .. autoclass:: P2EFromCSR - """ # {{{ P2E base class class P2EBase(KernelComputation, KernelCacheWrapper): - """Common input processing for kernel computations.""" + """Common input processing for kernel computations. + + .. automethod:: __init__ + """ def __init__(self, ctx, expansion, kernels=None, name=None, device=None, strength_usage=None): """ - :arg expansion: a subclass of :class:`sympy.expansion.ExpansionBase` - :arg strength_usage: A list of integers indicating which expression - uses which source strength indicator. This implicitly specifies the - number of strength arrays that need to be passed. - Default: all kernels use the same strength. + :arg expansion: a subclass of :class:`sumpy.expansion.ExpansionBase` + :arg kernels: if not provided, the kernel of the *expansion* is used. + The base kernel (after source or target transformation + removal) of each kernel in the list should match the base kernel of + the expansion. + :arg strength_usage: a list of integers indicating which expression + uses which source strength indicator. This implicitly specifies the + number of strength arrays that need to be passed in. + By default all kernels use the same strength. """ from sumpy.kernel import (TargetTransformationRemover, SourceTransformationRemover) @@ -117,59 +123,88 @@ class P2EBase(KernelComputation, KernelCacheWrapper): return (type(self).__name__, self.name, self.expansion, tuple(self.source_kernels), tuple(self.strength_usage)) + def get_optimized_kernel(self, sources_is_obj_array, centers_is_obj_array): + knl = self.get_kernel() + + if sources_is_obj_array: + knl = lp.tag_array_axes(knl, "sources", "sep,C") + if centers_is_obj_array: + knl = lp.tag_array_axes(knl, "centers", "sep,C") + + knl = self._allow_redundant_execution_of_knl_scaling(knl) + return knl + + def __call__(self, queue, **kwargs): + from sumpy.tools import is_obj_array_like + sources = kwargs.pop("sources") + centers = kwargs.pop("centers") + knl = self.get_cached_optimized_kernel( + sources_is_obj_array=is_obj_array_like(sources), + centers_is_obj_array=is_obj_array_like(centers)) + + # "1" may be passed for rscale, which won't have its type + # meaningfully inferred. Make the type of rscale explicit. + rscale = centers.dtype.type(kwargs.pop("rscale")) + + return knl(queue, sources=sources, centers=centers, rscale=rscale, **kwargs) # }}} + # {{{ P2E from single box (P2M, likely) class P2EFromSingleBox(P2EBase): + """ + .. automethod:: __call__ + """ + default_name = "p2e_from_single_box" def get_kernel(self): ncoeffs = len(self.expansion) from sumpy.tools import gather_loopy_source_arguments - loopy_knl = lp.make_kernel( - [ - "{[isrc_box]: 0<=isrc_box src_ibox = source_boxes[isrc_box] <> isrc_start = box_source_starts[src_ibox] - <> isrc_end = isrc_start+box_source_counts_nonchild[src_ibox] + <> isrc_end = isrc_start + box_source_counts_nonchild[src_ibox] <> center[idim] = centers[idim, src_ibox] {id=fetch_center} for isrc <> a[idim] = center[idim] - sources[idim, isrc] {dup=idim} - """] + self.get_loopy_instructions() + [""" + """] + [ + f"<> strength_{i} = strengths[{i}, isrc]" + for i in set(self.strength_usage) + ] + self.get_loopy_instructions() + [""" end - """] + [f"<> strength_{i} = strengths[{i}, isrc]" for - i in set(self.strength_usage)] + [f""" - tgt_expansions[src_ibox-tgt_base_ibox, {coeffidx}] = \ + """] + [f""" + tgt_expansions[src_ibox - tgt_base_ibox, {coeffidx}] = \ simul_reduce(sum, isrc, coeff{coeffidx}) \ {{id_prefix=write_expn}} """ for coeffidx in range(ncoeffs)] + [""" end """], [ - lp.GlobalArg("sources", None, shape=(self.dim, "nsources"), - dim_tags="sep,c"), + lp.GlobalArg("sources", None, + shape=(self.dim, "nsources"), order="C"), lp.GlobalArg("strengths", None, - shape="strength_count, nsources", dim_tags="sep,C"), - lp.GlobalArg("box_source_starts,box_source_counts_nonchild", + shape=("strength_count", "nsources"), dim_tags="sep,C"), + lp.GlobalArg("box_source_starts, box_source_counts_nonchild", None, shape=None), lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), lp.ValueArg("rscale", None), lp.GlobalArg("tgt_expansions", None, shape=("nboxes", ncoeffs), offset=lp.auto), - lp.ValueArg("nboxes,aligned_nboxes,tgt_base_ibox", np.int32), + lp.ValueArg("nboxes, aligned_nboxes, tgt_base_ibox", np.int32), lp.ValueArg("nsources", np.int32), - "..." - ] + gather_loopy_source_arguments(self.source_kernels - + (self.expansion,)), + ... + ] + gather_loopy_source_arguments( + self.source_kernels + (self.expansion,)), name=self.name, assumptions="nsrc_boxes>=1", silenced_warnings="write_race(write_expn*)", @@ -184,33 +219,36 @@ class P2EFromSingleBox(P2EBase): return loopy_knl - def get_optimized_kernel(self): + def get_optimized_kernel(self, sources_is_obj_array, centers_is_obj_array): + knl = super().get_optimized_kernel( + sources_is_obj_array=sources_is_obj_array, + centers_is_obj_array=centers_is_obj_array) + # FIXME - knl = self.get_kernel() knl = lp.split_iname(knl, "isrc_box", 16, outer_tag="g.0") - knl = self._allow_redundant_execution_of_knl_scaling(knl) - return knl def __call__(self, queue, **kwargs): """ - :arg expansions: - :arg source_boxes: - :arg box_source_starts: - :arg box_source_counts_nonchild: - :arg centers: - :arg sources: - :arg strengths: - :arg rscale: + :arg source_boxes: an array of integer indices into *box_source_starts* + and *box_source_counts_nonchild*. + :arg box_source_starts: an array of integer indices into *sources*. + :arg box_source_counts_nonchild: an array of integer sizes of each box. + :arg centers: expansion centers. + :arg sources: source points. + :arg strengths: strengths at each source point. the strength count + is given by the *strength_usage* list passed in to + :meth:`P2EBase.__init__`. + :arg nboxes: number of boxes. + :arg tgt_base_ibox: integer for the base index of the target box. + :arg rscale: expansion scale. + :arg tgt_expansions: if given as an input, the array will be filled + in with the expansions at boxes indexed by + ``source_boxes[i] - tgt_base_ibox``. + + :returns: an array of *tgt_expansions*. """ - centers = kwargs.pop("centers") - # "1" may be passed for rscale, which won't have its type - # meaningfully inferred. Make the type of rscale explicit. - rscale = centers.dtype.type(kwargs.pop("rscale")) - - knl = self.get_cached_optimized_kernel() - - return knl(queue, centers=centers, rscale=rscale, **kwargs) + return super().__call__(queue, **kwargs) # }}} @@ -218,6 +256,10 @@ class P2EFromSingleBox(P2EBase): # {{{ P2E from CSR-like interaction list class P2EFromCSR(P2EBase): + """ + .. automethod:: __call__ + """ + default_name = "p2e_from_csr" def get_kernel(self): @@ -226,10 +268,10 @@ class P2EFromCSR(P2EBase): from sumpy.tools import gather_loopy_source_arguments arguments = ( [ - lp.GlobalArg("sources", None, shape=(self.dim, "nsources"), - dim_tags="sep,c"), - lp.GlobalArg("strengths", None, shape="strength_count, nsources", - dim_tags="sep,C"), + lp.GlobalArg("sources", None, + shape=(self.dim, "nsources"), order="C"), + lp.GlobalArg("strengths", None, + shape=("strength_count", "nsources"), dim_tags="sep,C"), lp.GlobalArg("source_box_starts,source_box_lists", None, shape=None, offset=lp.auto), lp.GlobalArg("box_source_starts,box_source_counts_nonchild", @@ -240,37 +282,39 @@ class P2EFromCSR(P2EBase): lp.ValueArg("naligned_boxes,ntgt_level_boxes,tgt_base_ibox", np.int32), lp.ValueArg("nsources", np.int32), - "..." - ] + gather_loopy_source_arguments(self.source_kernels - + (self.expansion,))) + ... + ] + gather_loopy_source_arguments( + self.source_kernels + (self.expansion,))) loopy_knl = lp.make_kernel( [ - "{[itgt_box]: 0<=itgt_box tgt_ibox = target_boxes[itgt_box] - <> center[idim] = centers[idim, tgt_ibox] {id=fetch_center} <> isrc_box_start = source_box_starts[itgt_box] - <> isrc_box_stop = source_box_starts[itgt_box+1] + <> isrc_box_stop = source_box_starts[itgt_box + 1] for isrc_box <> src_ibox = source_box_lists[isrc_box] <> isrc_start = box_source_starts[src_ibox] - <> isrc_end = isrc_start+box_source_counts_nonchild[src_ibox] + <> isrc_end = isrc_start \ + + box_source_counts_nonchild[src_ibox] for isrc <> a[idim] = center[idim] - sources[idim, isrc] \ {dup=idim} - """] + [f""" + """] + [ + f""" <> strength_{i} = strengths[{i}, isrc] - """ for i in set(self.strength_usage)] + self.get_loopy_instructions() + [""" + """ for i in set(self.strength_usage) + ] + self.get_loopy_instructions() + [""" end end"""] + [f""" tgt_expansions[tgt_ibox - tgt_base_ibox, {coeffidx}] = \ @@ -294,33 +338,35 @@ class P2EFromCSR(P2EBase): return loopy_knl - def get_optimized_kernel(self): + def get_optimized_kernel(self, sources_is_obj_array, centers_is_obj_array): + knl = super().get_optimized_kernel( + sources_is_obj_array=sources_is_obj_array, + centers_is_obj_array=centers_is_obj_array) + # FIXME - knl = self.get_kernel() knl = lp.split_iname(knl, "itgt_box", 16, outer_tag="g.0") - knl = self._allow_redundant_execution_of_knl_scaling(knl) - return knl def __call__(self, queue, **kwargs): """ - :arg expansions: - :arg source_boxes: - :arg box_source_starts: - :arg box_source_counts_nonchild: - :arg centers: - :arg sources: - :arg strengths: - :arg rscale: + :arg target_boxes: array of integer indices into *source_box_starts* + and *centers*. + :arg source_box_starts: array of integer indices into *source_box_lists*, + i.e. `source_box_starts[i]:source_box_starts[i + 1]` gives all the + source boxes for a given target box ``i = target_boxes[itgt_box]``. + :arg source_box_lists: an array of integer indices into *box_source_starts* + and *box_source_counts_nonchild*. + + :arg box_source_starts: see :meth:`P2EFromSingleBox.__call__`. + :arg box_source_counts_nonchild: see :meth:`P2EFromSingleBox.__call__`. + :arg centers: see :meth:`P2EFromSingleBox.__call__`. + :arg sources: see :meth:`P2EFromSingleBox.__call__`. + :arg strengths: see :meth:`P2EFromSingleBox.__call__`. + :arg rscale: see :meth:`P2EFromSingleBox.__call__`. + :arg tgt_base_ibox: see :meth:`P2EFromSingleBox.__call__`. + :arg tgt_expansion: see :meth:`P2EFromSingleBox.__call__`. """ - knl = self.get_cached_optimized_kernel() - - centers = kwargs.pop("centers") - # "1" may be passed for rscale, which won't have its type - # meaningfully inferred. Make the type of rscale explicit. - rscale = centers.dtype.type(kwargs.pop("rscale")) - - return knl(queue, centers=centers, rscale=rscale, **kwargs) + return super().__call__(queue, **kwargs) # }}} diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py index 6fd929e5567ac31c00e2ec9eede330fbad53b67e..a6bc52d5aefbfca85ac6e6aeaa2d21683b192c6c 100644 --- a/test/test_matrixgen.py +++ b/test/test_matrixgen.py @@ -37,31 +37,27 @@ from pyopencl.tools import ( # noqa import logging logger = logging.getLogger(__name__) -try: - import faulthandler -except ImportError: - pass -else: - faulthandler.enable() +import faulthandler +faulthandler.enable() -def _build_geometry(queue, n, mode, target_radius=1.0): - # parametrize circle - t = np.linspace(0.0, 2.0 * np.pi, n, endpoint=False) - unit_circle = np.exp(1j * t) - unit_circle = np.array([unit_circle.real, unit_circle.imag]) +def _build_geometry(queue, ntargets, nsources, mode, target_radius=1.0): + # source points + t = np.linspace(0.0, 2.0 * np.pi, nsources, endpoint=False) + sources = np.array([np.cos(t), np.sin(t)]) - # create density + # density sigma = np.cos(mode * t) - # create sources and targets - h = 2.0 * np.pi / n - targets = target_radius * unit_circle - sources = unit_circle + # target points + t = np.linspace(0.0, 2.0 * np.pi, ntargets, endpoint=False) + targets = target_radius * np.array([np.cos(t), np.sin(t)]) + # target centers and expansion radii + h = 2.0 * np.pi * target_radius / ntargets radius = 7.0 * h - centers = unit_circle * (1.0 - radius) - expansion_radii = radius * np.ones(n) + centers = (1.0 - radius) * targets + expansion_radii = np.full(ntargets, radius) return (cl.array.to_device(queue, targets), cl.array.to_device(queue, sources), @@ -84,9 +80,10 @@ def _build_subset_indices(queue, ntargets, nsources, factor): rng.shuffle(tgtindices) rng.shuffle(srcindices) + tgtindices, srcindices = np.meshgrid(tgtindices, srcindices) return ( - cl.array.to_device(queue, tgtindices).with_queue(None), - cl.array.to_device(queue, srcindices).with_queue(None)) + cl.array.to_device(queue, tgtindices.ravel()).with_queue(None), + cl.array.to_device(queue, srcindices.ravel()).with_queue(None)) @pytest.mark.parametrize("factor", [1.0, 0.6]) @@ -128,7 +125,7 @@ def test_qbx_direct(ctx_factory, factor, lpot_id): for n in [200, 300, 400]: targets, sources, centers, expansion_radii, sigma = \ - _build_geometry(queue, n, mode_nr, target_radius=1.2) + _build_geometry(queue, n, n, mode_nr, target_radius=1.2) h = 2 * np.pi / n strengths = (sigma * h,) @@ -206,7 +203,7 @@ def test_p2p_direct(ctx_factory, exclude_self, factor, lpot_id): for n in [200, 300, 400]: targets, sources, _, _, sigma = \ - _build_geometry(queue, n, mode_nr, target_radius=1.2) + _build_geometry(queue, n, n, mode_nr, target_radius=1.2) h = 2 * np.pi / n strengths = (sigma * h,)