diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 84c8f390d9299e4bbf9d0583b864e7efd6382895..efdbdcd4078c4f63adc242efe938f25faa8b56f6 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -132,7 +132,7 @@ class P2PBase(KernelComputation, KernelCacheWrapper): from sumpy.tools import gather_loopy_source_arguments return ([ lp.GlobalArg("sources", None, - shape=(self.dim, "nsources")), + shape=(self.dim, "nsources")), lp.GlobalArg("targets", None, shape=(self.dim, "ntargets")), lp.ValueArg("nsources", None), @@ -179,11 +179,12 @@ class P2P(P2PBase): shape="nresults, ntargets", dim_tags="sep,C") ]) - loopy_knl = lp.make_kernel([ - "{[itgt]: 0 <= itgt < ntargets}", - "{[isrc]: 0 <= isrc < nsources}", - "{[idim]: 0 <= idim < dim}" - ], + loopy_knl = lp.make_kernel([""" + {[itgt, isrc, idim]: \ + 0 <= itgt < ntargets and \ + 0 <= isrc < nsources and \ + 0 <= idim < dim} + """], self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + ["<> d[idim] = targets[idim, itgt] - sources[idim, isrc]"] @@ -245,11 +246,12 @@ class P2PMatrixGenerator(P2PBase): shape="ntargets,nsources") for i, dtype in enumerate(self.value_dtypes)]) - loopy_knl = lp.make_kernel([ - "{[itgt]: 0 <= itgt < ntargets}", - "{[isrc]: 0 <= isrc < nsources}", - "{[idim]: 0 <= idim < dim}" - ], + loopy_knl = lp.make_kernel([""" + {[itgt, isrc, idim]: \ + 0 <= itgt < ntargets and \ + 0 <= isrc < nsources and \ + 0 <= idim < dim} + """], self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + ["<> d[idim] = targets[idim, itgt] - sources[idim, isrc]"] @@ -318,8 +320,10 @@ class P2PMatrixBlockGenerator(P2PBase): loopy_knl = lp.make_kernel([ "{[irange]: 0 <= irange < nranges}", - "{[itgt, isrc]: 0 <= itgt < ntgtblock and 0 <= isrc < nsrcblock}", - "{[idim]: 0 <= idim < dim}" + "{[itgt, isrc, idim]: \ + 0 <= itgt < ntgtblock and \ + 0 <= isrc < nsrcblock and \ + 0 <= idim < dim}", ], self.get_kernel_scaling_assignments() + [""" diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 5d4e1105eee6245ddcf38a1066b5851d5486ba4c..f3b2d6f78964d3d4f5a88248f7b9a08d32b722eb 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -193,6 +193,7 @@ class LayerPotential(LayerPotentialBase): default_name = "qbx_apply" + @memoize_method def get_kernel(self): loopy_insns, result_names = self.get_loopy_insns_and_result_names() kernel_exprs = self.get_kernel_exprs(result_names) @@ -203,13 +204,14 @@ class LayerPotential(LayerPotentialBase): for i in range(self.strength_count)] + [lp.GlobalArg("result_%d" % i, None, shape="ntargets", order="C") - for i, _ in enumerate(self.value_dtypes)]) + for i in range(len(self.kernels))]) - loopy_knl = lp.make_kernel([ - "{[itgt]: 0 <= itgt < ntargets}", - "{[isrc]: 0 <= isrc < nsources}", - "{[idim]: 0 <= idim < dim}" - ], + loopy_knl = lp.make_kernel([""" + {[itgt, isrc, idim]: \ + 0 <= itgt < ntargets and \ + 0 <= isrc < nsources and \ + 0 <= idim < dim} + """], self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + ["<> a[idim] = center[idim, itgt] - src[idim, isrc] {dup=idim}"] @@ -265,6 +267,7 @@ class LayerPotentialMatrixGenerator(LayerPotentialBase): def get_strength_or_not(self, isrc, kernel_idx): return 1 + @memoize_method def get_kernel(self): loopy_insns, result_names = self.get_loopy_insns_and_result_names() kernel_exprs = self.get_kernel_exprs(result_names) @@ -274,11 +277,12 @@ class LayerPotentialMatrixGenerator(LayerPotentialBase): dtype, shape="ntargets, nsources", order="C") for i, dtype in enumerate(self.value_dtypes)]) - loopy_knl = lp.make_kernel([ - "{[itgt]: 0 <= itgt < ntargets}", - "{[isrc]: 0 <= isrc < nsources}", - "{[idim]: 0 <= idim < dim}" - ], + loopy_knl = lp.make_kernel([""" + {[itgt, isrc, idim]: \ + 0 <= itgt < ntargets and \ + 0 <= isrc < nsources and \ + 0 <= idim < dim} + """], self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + ["<> a[idim] = center[idim, itgt] - src[idim, isrc] {dup=idim}"] @@ -324,6 +328,7 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): def get_strength_or_not(self, isrc, kernel_idx): return 1 + @memoize_method def get_kernel(self): loopy_insns, result_names = self.get_loopy_insns_and_result_names() kernel_exprs = self.get_kernel_exprs(result_names)