diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 97756a55d5690219317d930f1ae43debc769a682..896d34c97637a42c8ef716d0734fcf84cb515211 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -369,15 +369,15 @@ class P2PMatrixBlockGenerator(P2PBase): loopy_knl = lp.make_kernel( "{[i, j]: 1 <= i <= nranges and 1 <= j <= i}", """ - blkprefix[0] = 0.0 - blkprefix[i] = reduce(sum, j, \ + blkranges[0] = 0.0 + blkranges[i] = reduce(sum, j, \ (srcranges[j] - srcranges[j - 1]) * \ (tgtranges[j] - tgtranges[j - 1])) \ """, [ lp.GlobalArg("tgtranges", None, shape="nranges + 1"), lp.GlobalArg("srcranges", None, shape="nranges + 1"), - lp.GlobalArg("blkprefix", np.int32, shape="nranges + 1"), + lp.GlobalArg("blkranges", np.int32, shape="nranges + 1"), lp.ValueArg("nranges", None) ], name="block_cumsum_knl", @@ -400,7 +400,7 @@ class P2PMatrixBlockGenerator(P2PBase): <> nsrcblock = srcranges[irange + 1] - srcranges[irange] for itgt, isrc - <> imat = blkprefix[irange] + (nsrcblock * itgt + isrc) + <> imat = blkranges[irange] + (nsrcblock * itgt + isrc) rowindices[imat] = tgtindices[tgtranges[irange] + itgt] colindices[imat] = srcindices[srcranges[irange] + isrc] @@ -412,7 +412,7 @@ class P2PMatrixBlockGenerator(P2PBase): lp.GlobalArg("tgtindices", None, shape="ntgtindices"), lp.GlobalArg("srcranges", None, shape="nranges + 1"), lp.GlobalArg("tgtranges", None, shape="nranges + 1"), - lp.GlobalArg("blkprefix", None, shape="nranges + 1"), + lp.GlobalArg("blkranges", None, shape="nranges + 1"), lp.GlobalArg("rowindices", None, shape="nresults"), lp.GlobalArg("colindices", None, shape="nresults"), lp.ValueArg("nsrcindices", np.int32), @@ -434,16 +434,16 @@ class P2PMatrixBlockGenerator(P2PBase): sources_is_obj_array=( is_obj_array(sources) or isinstance(sources, (tuple, list)))) - _, (blkprefix,) = cumsum()(queue, + _, (blkranges,) = cumsum()(queue, tgtranges=tgtranges, srcranges=srcranges) _, (rowindices, colindices,) = linear_index()(queue, tgtindices=tgtindices, srcindices=srcindices, tgtranges=tgtranges, srcranges=srcranges, - blkprefix=blkprefix, nresults=blkprefix[-1]) + blkranges=blkranges, nresults=blkranges[-1]) evt, results = knl(queue, targets=targets, sources=sources, tgtindices=rowindices, srcindices=colindices, **kwargs) - return evt, tuple(list(results) + [rowindices, colindices]) + return evt, tuple(list(results) + [rowindices, colindices, blkranges]) # }}} diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py index e784529e0baaea3416f0db64c100ea3d5836c934..12198a396941af4f9acd9712e19c580f74c6f35d 100644 --- a/test/test_matrixgen.py +++ b/test/test_matrixgen.py @@ -146,9 +146,8 @@ def test_qbx_direct(ctx_getter): assert la.norm(blk[itgt, isrc] - mat[block]) < eps -@pytest.mark.parametrize("exclude_self", "factor", [ - (True, 1.0), (True, 0.6), (False, 1.0), (False, 0.6) - ]) +@pytest.mark.parametrize(("exclude_self", "factor"), + [(True, 1.0), (True, 0.6), (False, 1.0), (False, 0.6)]) def test_p2p_direct(ctx_getter, exclude_self, factor): # This does a point-to-point kernel evaluation on a circle. logging.basicConfig(level=logging.INFO) @@ -191,8 +190,10 @@ def test_p2p_direct(ctx_getter, exclude_self, factor): eps = 1.0e-10 * la.norm(result_lpot) assert la.norm(result_mat - result_lpot) < eps - _, (blk, rowindices, colindices) = blk_gen(queue, targets, sources, - tgtindices, srcindices, tgtranges, srcranges, **extra_kwargs) + _, (blk, rowindices, colindices, blkranges) = \ + blk_gen(queue, targets, sources, + tgtindices, srcindices, tgtranges, srcranges, + **extra_kwargs) eps = 1.0e-10 * la.norm(mat) assert la.norm(blk - mat[rowindices, colindices].reshape(-1)) < eps