diff --git a/sumpy/p2p.py b/sumpy/p2p.py index d1903a282aa0e8c6cd958af8a20819d3eed1cd75..9503a2a1899969ebd19f72f312498e7c4a195922 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -307,15 +307,15 @@ class P2PMatrixBlockGenerator(P2PBase): arguments = ( self.get_default_src_tgt_arguments() + [ - lp.GlobalArg("srcindices", None, shape="nresults"), - lp.GlobalArg("tgtindices", None, shape="nresults"), - lp.ValueArg("nresults", None) + lp.GlobalArg("srcindices", None, shape="nresult"), + lp.GlobalArg("tgtindices", None, shape="nresult"), + lp.ValueArg("nresult", None) ] + - [lp.GlobalArg("result_%d" % i, dtype, shape="nresults") + [lp.GlobalArg("result_%d" % i, dtype, shape="nresult") for i, dtype in enumerate(self.value_dtypes)]) loopy_knl = lp.make_kernel( - "{[imat, idim]: 0 <= imat < nresults and 0 <= idim < dim}", + "{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}", self.get_kernel_scaling_assignments() + [""" for imat @@ -335,7 +335,7 @@ class P2PMatrixBlockGenerator(P2PBase): for iknl in range(len(self.kernels))] + ["end"], arguments, - assumptions="nresults>=1", + assumptions="nresult>=1", silenced_warnings="write_race(write_p2p*)", name=self.name, fixed_parameters=dict(dim=self.dim), @@ -392,9 +392,9 @@ class P2PMatrixBlockGenerator(P2PBase): sources_is_obj_array=( is_obj_array(sources) or isinstance(sources, (tuple, list)))) - rowindices, colindices = index_set.linear_indices() + tgtindices, srcindices = index_set.linear_indices() return knl(queue, targets=targets, sources=sources, - tgtindices=rowindices, srcindices=colindices, **kwargs) + tgtindices=tgtindices, srcindices=srcindices, **kwargs) # }}} diff --git a/sumpy/qbx.py b/sumpy/qbx.py index f3b2d6f78964d3d4f5a88248f7b9a08d32b722eb..73f7ec3d12e818cd2b111332d8285a930f306d3e 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -335,68 +335,44 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): arguments = ( self.get_default_src_tgt_arguments() + [ - lp.GlobalArg("srcindices", None, shape="nsrcindices"), - lp.GlobalArg("tgtindices", None, shape="ntgtindices"), - lp.GlobalArg("srcranges", None, shape="nranges + 1"), - lp.GlobalArg("tgtranges", None, shape="nranges + 1"), - lp.ValueArg("nsrcindices", None), - lp.ValueArg("ntgtindices", None), - lp.ValueArg("nranges", None) + lp.GlobalArg("srcindices", None, shape="nresult"), + lp.GlobalArg("tgtindices", None, shape="nresult"), + lp.ValueArg("nresult", None) ] + - [lp.GlobalArg("result_%d" % i, - dtype, shape="ntgtindices, nsrcindices") - for i, dtype in enumerate(self.value_dtypes)]) + [lp.GlobalArg("result_%d" % i, dtype, shape="nresult") + for i, dtype in enumerate(self.value_dtypes)]) loopy_knl = lp.make_kernel([ - "{[irange]: 0 <= irange < nranges}", - "{[itgt, isrc]: 0 <= itgt < ntgtblock and 0 <= isrc < nsrcblock}", - "{[idim]: 0 <= idim < dim}" + "{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}" ], self.get_kernel_scaling_assignments() + [""" - for irange - <> tgtstart = tgtranges[irange] - <> tgtend = tgtranges[irange + 1] - <> ntgtblock = tgtend - tgtstart - <> srcstart = srcranges[irange] - <> srcend = srcranges[irange + 1] - <> nsrcblock = srcend - srcstart - - for itgt, isrc - <> a[idim] = center[idim, tgtindices[tgtstart + itgt]] - \ - src[idim, srcindices[srcstart + isrc]] \ - {dup=idim} - <> b[idim] = tgt[idim, tgtindices[tgtstart + itgt]] - \ - center[idim, tgtindices[tgtstart + itgt]] \ - {dup=idim} - <> rscale = expansion_radii[tgtindices[tgtstart + itgt]] + for imat + <> a[idim] = center[idim, tgtindices[imat]] - \ + src[idim, srcindices[imat]] {dup=idim} + <> b[idim] = tgt[idim, tgtindices[imat]] - \ + center[idim, tgtindices[imat]] {dup=idim} + <> rscale = expansion_radii[tgtindices[imat]] """] + loopy_insns + kernel_exprs + [""" - result_{i}[tgtstart + itgt, srcstart + isrc] = \ - knl_{i}_scaling * pair_result_{i} \ - {{id_prefix=write_lpot,inames=irange}} + result_{i}[imat] = knl_{i}_scaling * pair_result_{i} \ + {{id_prefix=write_lpot}} """.format(i=iknl) for iknl in range(len(self.expansions))] - + [""" - end - end - """], + + ["end"], arguments, name=self.name, - assumptions="nranges>=1", + assumptions="nresult>=1", default_offset=lp.auto, silenced_warnings="write_race(write_lpot*)", fixed_parameters=dict(dim=self.dim), lang_version=MOST_RECENT_LANGUAGE_VERSION) - loopy_knl = lp.add_dtypes(loopy_knl, dict( - nsources=np.int32, - ntargets=np.int32, - ntgtindices=np.int32, - nsrcindices=np.int32)) - loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") + loopy_knl = lp.add_dtypes(loopy_knl, + dict(nsources=np.int32, ntargets=np.int32)) + for expn in self.expansions: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) @@ -405,17 +381,17 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): def get_optimized_kernel(self): loopy_knl = self.get_kernel() - loopy_knl = lp.split_iname(loopy_knl, "irange", 128, outer_tag="g.0") + loopy_knl = lp.split_iname(loopy_knl, "imat", 1024, outer_tag="g.0") return loopy_knl def __call__(self, queue, targets, sources, centers, expansion_radii, - tgtindices, srcindices, tgtranges, srcranges, **kwargs): + index_set, **kwargs): knl = self.get_cached_optimized_kernel() + tgtindices, srcindices = index_set.linear_indices() return knl(queue, src=sources, tgt=targets, center=centers, expansion_radii=expansion_radii, - tgtindices=tgtindices, srcindices=srcindices, - tgtranges=tgtranges, srcranges=srcranges, **kwargs) + tgtindices=tgtindices, srcindices=srcindices, **kwargs) # }}} diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py index d6e85c23dc835034f99b8c16d9f422730da7d650..a687ee16b04ad2af406ceff02aa54e2279404346 100644 --- a/test/test_matrixgen.py +++ b/test/test_matrixgen.py @@ -31,6 +31,8 @@ import pyopencl as cl from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) +from sumpy.tools import MatrixBlockIndex + import logging logger = logging.getLogger(__name__) @@ -82,8 +84,8 @@ def create_index_subset(nnodes, nblks, factor): return indices_, ranges_ - -def test_qbx_direct(ctx_getter): +@pytest.mark.parametrize('factor', [1.0, 0.6]) +def test_qbx_direct(ctx_getter, factor): # This evaluates a single layer potential on a circle. logging.basicConfig(level=logging.INFO) @@ -114,14 +116,8 @@ def test_qbx_direct(ctx_getter): h = 2 * np.pi / n strengths = (sigma * h,) - tgtindices = np.arange(0, n) - tgtindices = np.random.choice(tgtindices, size=int(0.8 * n)) - tgtranges = np.arange(0, tgtindices.shape[0] + 1, - tgtindices.shape[0] // nblks) - srcindices = np.arange(0, n) - srcindices = np.random.choice(srcindices, size=int(0.8 * n)) - srcranges = np.arange(0, srcindices.shape[0] + 1, - srcindices.shape[0] // nblks) + tgtindices, tgtranges = create_index_subset(n, nblks, factor) + srcindices, srcranges = create_index_subset(n, nblks, factor) assert tgtranges.shape == srcranges.shape _, (mat,) = mat_gen(queue, targets, sources, centers, @@ -134,16 +130,14 @@ def test_qbx_direct(ctx_getter): eps = 1.0e-10 * la.norm(result_lpot) assert la.norm(result_mat - result_lpot) < eps + index_set = MatrixBlockIndex(queue, + tgtindices, srcindices, tgtranges, srcranges) _, (blk,) = blk_gen(queue, targets, sources, centers, expansion_radii, - tgtindices, srcindices, tgtranges, srcranges) + index_set) - for i in range(srcranges.shape[0] - 1): - itgt = np.s_[tgtranges[i]:tgtranges[i + 1]] - isrc = np.s_[srcranges[i]:srcranges[i + 1]] - block = np.ix_(tgtindices[itgt], srcindices[isrc]) - - eps = 1.0e-10 * la.norm(mat[block]) - assert la.norm(blk[itgt, isrc] - mat[block]) < eps + rowindices, colindices = index_set.linear_indices() + eps = 1.0e-10 * la.norm(mat) + assert la.norm(blk - mat[rowindices, colindices].reshape(-1)) < eps @pytest.mark.parametrize(("exclude_self", "factor"), @@ -161,7 +155,6 @@ def test_p2p_direct(ctx_getter, exclude_self, factor): mode_nr = 25 from sumpy.p2p import P2P - from sumpy.tools import MatrixBlockIndex from sumpy.p2p import P2PMatrixGenerator, P2PMatrixBlockGenerator lpot = P2P(ctx, [lknl], exclude_self=exclude_self) mat_gen = P2PMatrixGenerator(ctx, [lknl], exclude_self=exclude_self)