diff --git a/sumpy/p2p.py b/sumpy/p2p.py index e3b457dd52e885bf66462e475b3b58edb78bd098..c585d0eae7ec73de1231850b3b3be628ba9e3d4d 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -33,7 +33,8 @@ import loopy as lp from loopy.version import MOST_RECENT_LANGUAGE_VERSION from pymbolic import var -from sumpy.tools import KernelComputation, KernelCacheWrapper +from sumpy.tools import ( + KernelComputation, KernelCacheWrapper, is_obj_array_like) __doc__ = """ @@ -214,12 +215,9 @@ class P2P(P2PBase): return loopy_knl def __call__(self, queue, targets, sources, strength, **kwargs): - from pytools.obj_array import is_obj_array knl = self.get_cached_optimized_kernel( - targets_is_obj_array=( - is_obj_array(targets) or isinstance(targets, (tuple, list))), - sources_is_obj_array=( - is_obj_array(sources) or isinstance(sources, (tuple, list)))) + targets_is_obj_array=is_obj_array_like(targets), + sources_is_obj_array=is_obj_array_like(sources)) return knl(queue, sources=sources, targets=targets, strength=strength, **kwargs) @@ -278,12 +276,9 @@ class P2PMatrixGenerator(P2PBase): return loopy_knl def __call__(self, queue, targets, sources, **kwargs): - from pytools.obj_array import is_obj_array knl = self.get_cached_optimized_kernel( - targets_is_obj_array=( - is_obj_array(targets) or isinstance(targets, (tuple, list))), - sources_is_obj_array=( - is_obj_array(sources) or isinstance(sources, (tuple, list)))) + targets_is_obj_array=is_obj_array_like(targets), + sources_is_obj_array=is_obj_array_like(sources)) return knl(queue, sources=sources, targets=targets, **kwargs) @@ -390,12 +385,9 @@ class P2PMatrixBlockGenerator(P2PBase): :return: a tuple of one-dimensional arrays of kernel evaluations at target-source pairs described by `index_set`. """ - from pytools.obj_array import is_obj_array knl = self.get_cached_optimized_kernel( - targets_is_obj_array=( - is_obj_array(targets) or isinstance(targets, (tuple, list))), - sources_is_obj_array=( - is_obj_array(sources) or isinstance(sources, (tuple, list)))) + targets_is_obj_array=is_obj_array_like(targets), + sources_is_obj_array=is_obj_array_like(sources)) return knl(queue, targets=targets, diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 9708764c01d0448d7c7e8314efb461989c838acd..dd6a827928832aadf74d6108df0489950836cc59 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -35,7 +35,8 @@ import sumpy.symbolic as sym from pytools import memoize_method from pymbolic import parse, var -from sumpy.tools import KernelComputation, KernelCacheWrapper +from sumpy.tools import ( + KernelComputation, KernelCacheWrapper, is_obj_array_like) import logging logger = logging.getLogger(__name__) @@ -154,7 +155,7 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper): lp.GlobalArg("tgt", None, shape=(self.dim, "ntargets"), order="C"), lp.GlobalArg("center", None, - shape=(self.dim, "ntargets"), dim_tags="sep,C"), + shape=(self.dim, "ntargets"), order="C"), lp.GlobalArg("expansion_radii", None, shape="ntargets"), lp.ValueArg("nsources", None), @@ -164,10 +165,18 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper): def get_kernel(self): raise NotImplementedError - def get_optimized_kernel(self): + def get_optimized_kernel(self, + targets_is_obj_array, sources_is_obj_array, centers_is_obj_array): # FIXME specialize/tune for GPU/CPU loopy_knl = self.get_kernel() + if targets_is_obj_array: + loopy_knl = lp.tag_array_axes(loopy_knl, "tgt", "sep,C") + if sources_is_obj_array: + loopy_knl = lp.tag_array_axes(loopy_knl, "src", "sep,C") + if centers_is_obj_array: + loopy_knl = lp.tag_array_axes(loopy_knl, "center", "sep,C") + import pyopencl as cl dev = self.context.devices[0] if dev.type & cl.device_type.CPU: @@ -249,7 +258,10 @@ class LayerPotential(LayerPotentialBase): already multiplied in. """ - knl = self.get_cached_optimized_kernel() + knl = self.get_cached_optimized_kernel( + targets_is_obj_array=is_obj_array_like(targets), + sources_is_obj_array=is_obj_array_like(sources), + centers_is_obj_array=is_obj_array_like(centers)) for i, dens in enumerate(strengths): kwargs["strength_%d" % i] = dens @@ -313,7 +325,10 @@ class LayerPotentialMatrixGenerator(LayerPotentialBase): return loopy_knl def __call__(self, queue, targets, sources, centers, expansion_radii, **kwargs): - knl = self.get_cached_optimized_kernel() + knl = self.get_cached_optimized_kernel( + targets_is_obj_array=is_obj_array_like(targets), + sources_is_obj_array=is_obj_array_like(sources), + centers_is_obj_array=is_obj_array_like(centers)) return knl(queue, src=sources, tgt=targets, center=centers, expansion_radii=expansion_radii, **kwargs) @@ -388,9 +403,17 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): return loopy_knl - def get_optimized_kernel(self): + def get_optimized_kernel(self, + targets_is_obj_array, sources_is_obj_array, centers_is_obj_array): loopy_knl = self.get_kernel() + if targets_is_obj_array: + loopy_knl = lp.tag_array_axes(loopy_knl, "tgt", "sep,C") + if sources_is_obj_array: + loopy_knl = lp.tag_array_axes(loopy_knl, "src", "sep,C") + if centers_is_obj_array: + loopy_knl = lp.tag_array_axes(loopy_knl, "center", "sep,C") + loopy_knl = lp.split_iname(loopy_knl, "imat", 1024, outer_tag="g.0") return loopy_knl @@ -406,8 +429,10 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): :return: a tuple of one-dimensional arrays of kernel evaluations at target-source pairs described by `index_set`. """ - - knl = self.get_cached_optimized_kernel() + knl = self.get_cached_optimized_kernel( + targets_is_obj_array=is_obj_array_like(targets), + sources_is_obj_array=is_obj_array_like(sources), + centers_is_obj_array=is_obj_array_like(centers)) return knl(queue, src=sources, diff --git a/sumpy/tools.py b/sumpy/tools.py index 4d1098429d1c9db43af4a6e26fbb63509438a375..353ca9dac95673863fa735bd685267257443acb1 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -717,4 +717,9 @@ def my_syntactic_subs(expr, subst_dict): return expr +def is_obj_array_like(ary): + return ( + isinstance(ary, (tuple, list)) + or (isinstance(ary, np.ndarray) and ary.dtype.char == "O")) + # vim: fdm=marker diff --git a/sumpy/version.py b/sumpy/version.py index f44cfe20283522f87e8ebb10d41fff268d205080..ced3319c9da32decaffd911398df47c239e034b9 100644 --- a/sumpy/version.py +++ b/sumpy/version.py @@ -43,7 +43,7 @@ else: # }}} -VERSION = (2020, 1) +VERSION = (2020, 2) VERSION_STATUS = "beta1" VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS