From f8a7e8d41da3f4aa91c629df4a46a8c7347c1b8c Mon Sep 17 00:00:00 2001 From: Alex Fikl Date: Sat, 14 Apr 2018 19:12:55 -0500 Subject: [PATCH 01/11] simplify test data generation --- test/test_matrixgen.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py index 4e2b3ad7..eb85052b 100644 --- a/test/test_matrixgen.py +++ b/test/test_matrixgen.py @@ -63,6 +63,19 @@ def create_arguments(n, mode, target_radius=1.0): return targets, sources, centers, sigma, expansion_radii +def create_subset(indices, ranges, factor): + indices_ = np.empty(ranges.shape[0] - 1, dtype=np.object) + for i in range(ranges.shape[0] - 1): + iidx = indices[np.s_[ranges[i]:ranges[i + 1]]] + indices_[i] = np.sort(np.random.choice(iidx, + size=int(factor * len(iidx)), replace=False)) + + ranges_ = np.cumsum([0] + [r.shape[0] for r in indices_]) + indices_ = np.hstack(indices_) + + return indices_, ranges_ + + def test_qbx_direct(ctx_getter): # This evaluates a single layer potential on a circle. logging.basicConfig(level=logging.INFO) @@ -154,13 +167,14 @@ def test_p2p_direct(ctx_getter, exclude_self): strengths = (sigma * h,) tgtindices = np.arange(0, n) - tgtindices = np.random.choice(tgtindices, size=int(0.8 * n)) tgtranges = np.arange(0, tgtindices.shape[0] + 1, tgtindices.shape[0] // nblks) + tgtindices, tgtranges = create_subset(tgtindices, tgtranges, 0.6) + srcindices = np.arange(0, n) - srcindices = np.random.choice(srcindices, size=int(0.8 * n)) srcranges = np.arange(0, srcindices.shape[0] + 1, srcindices.shape[0] // nblks) + srcindices, srcranges = create_subset(srcindices, srcranges, 0.6) assert tgtranges.shape == srcranges.shape extra_kwargs = {} -- GitLab From ba2c9b44cd494a49b2835464190dde8d95ecf42c Mon Sep 17 00:00:00 2001 From: Alex Fikl Date: Sat, 14 Apr 2018 21:25:44 -0500 Subject: [PATCH 02/11] p2p: rework block generator to not waste huge amounts of space --- sumpy/p2p.py | 141 +++++++++++++++++++++++++++-------------- test/test_matrixgen.py | 57 ++++++++--------- 2 files changed, 120 insertions(+), 78 deletions(-) diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 38bec39d..97756a55 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -33,6 +33,7 @@ import loopy as lp from loopy.version import MOST_RECENT_LANGUAGE_VERSION from pymbolic import var +from pytools import memoize_in from sumpy.tools import KernelComputation, KernelCacheWrapper @@ -306,68 +307,44 @@ class P2PMatrixBlockGenerator(P2PBase): arguments = ( self.get_default_src_tgt_arguments() + [ - lp.GlobalArg("srcindices", None, shape="nsrcindices"), - lp.GlobalArg("tgtindices", None, shape="ntgtindices"), - lp.GlobalArg("srcranges", None, shape="nranges + 1"), - lp.GlobalArg("tgtranges", None, shape="nranges + 1"), - lp.ValueArg("nsrcindices", None), - lp.ValueArg("ntgtindices", None), - lp.ValueArg("nranges", None) + lp.GlobalArg("srcindices", None, shape="nresults"), + lp.GlobalArg("tgtindices", None, shape="nresults"), + lp.ValueArg("nresults", None) ] + - [lp.GlobalArg("result_%d" % i, dtype, - shape="ntgtindices, nsrcindices") + [lp.GlobalArg("result_%d" % i, dtype, shape="nresults") for i, dtype in enumerate(self.value_dtypes)]) - loopy_knl = lp.make_kernel([ - "{[irange]: 0 <= irange < nranges}", - "{[itgt, isrc, idim]: \ - 0 <= itgt < ntgtblock and \ - 0 <= isrc < nsrcblock and \ - 0 <= idim < dim}", - ], + loopy_knl = lp.make_kernel( + "{[imat, idim]: 0 <= imat < nresults and 0 <= idim < dim}", self.get_kernel_scaling_assignments() + [""" - for irange - <> tgtstart = tgtranges[irange] - <> tgtend = tgtranges[irange + 1] - <> ntgtblock = tgtend - tgtstart - <> srcstart = srcranges[irange] - <> srcend = srcranges[irange + 1] - <> nsrcblock = srcend - srcstart - - for itgt, isrc - <> d[idim] = targets[idim, tgtindices[tgtstart + itgt]] - \ - sources[idim, srcindices[srcstart + isrc]] + for imat + <> d[idim] = targets[idim, tgtindices[imat]] - \ + sources[idim, srcindices[imat]] """] + [""" - <> is_self = (srcindices[srcstart + isrc] == - target_to_source[tgtindices[tgtstart + itgt]]) + <> is_self = (srcindices[imat] == + target_to_source[tgtindices[imat]]) """ if self.exclude_self else ""] + loopy_insns + kernel_exprs + [""" - result_{i}[tgtstart + itgt, srcstart + isrc] = \ - knl_{i}_scaling * pair_result_{i} \ - {{id_prefix=write_p2p,inames=irange}} + result_{i}[imat] = \ + knl_{i}_scaling * pair_result_{i} \ + {{id_prefix=write_p2p}} """.format(i=iknl) for iknl in range(len(self.kernels))] - + [""" - end - end - """], + + ["end"], arguments, - assumptions="nranges>=1", + assumptions="nresults>=1", silenced_warnings="write_race(write_p2p*)", name=self.name, fixed_parameters=dict(dim=self.dim), lang_version=MOST_RECENT_LANGUAGE_VERSION) - loopy_knl = lp.add_dtypes(loopy_knl, dict( - nsources=np.int32, - ntargets=np.int32, - ntgtindices=np.int32, - nsrcindices=np.int32)) - loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") + loopy_knl = lp.add_dtypes(loopy_knl, + dict(nsources=np.int32, ntargets=np.int32)) + for knl in self.kernels: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) @@ -382,11 +359,74 @@ class P2PMatrixBlockGenerator(P2PBase): if targets_is_obj_array: knl = lp.tag_array_axes(knl, "targets", "sep,C") - knl = lp.split_iname(knl, "irange", 128, outer_tag="g.0") + knl = lp.split_iname(knl, "imat", 1024, outer_tag="g.0") return knl def __call__(self, queue, targets, sources, tgtindices, srcindices, tgtranges, srcranges, **kwargs): + @memoize_in(self, "block_cumsum_knl") + def cumsum(): + loopy_knl = lp.make_kernel( + "{[i, j]: 1 <= i <= nranges and 1 <= j <= i}", + """ + blkprefix[0] = 0.0 + blkprefix[i] = reduce(sum, j, \ + (srcranges[j] - srcranges[j - 1]) * \ + (tgtranges[j] - tgtranges[j - 1])) \ + """, + [ + lp.GlobalArg("tgtranges", None, shape="nranges + 1"), + lp.GlobalArg("srcranges", None, shape="nranges + 1"), + lp.GlobalArg("blkprefix", np.int32, shape="nranges + 1"), + lp.ValueArg("nranges", None) + ], + name="block_cumsum_knl", + default_offset=lp.auto, + lang_version=MOST_RECENT_LANGUAGE_VERSION) + + loopy_knl = lp.realize_reduction(loopy_knl, force_scan=True, + force_outer_iname_for_scan="i") + return loopy_knl + + @memoize_in(self, "block_linear_index_knl") + def linear_index(): + loopy_knl = lp.make_kernel([ + "{[irange]: 0 <= irange < nranges}", + "{[itgt, isrc]: 0 <= itgt < ntgtblock and 0 <= isrc < nsrcblock}" + ], + """ + for irange + <> ntgtblock = tgtranges[irange + 1] - tgtranges[irange] + <> nsrcblock = srcranges[irange + 1] - srcranges[irange] + + for itgt, isrc + <> imat = blkprefix[irange] + (nsrcblock * itgt + isrc) + + rowindices[imat] = tgtindices[tgtranges[irange] + itgt] + colindices[imat] = srcindices[srcranges[irange] + isrc] + end + end + """, + [ + lp.GlobalArg("srcindices", None, shape="nsrcindices"), + lp.GlobalArg("tgtindices", None, shape="ntgtindices"), + lp.GlobalArg("srcranges", None, shape="nranges + 1"), + lp.GlobalArg("tgtranges", None, shape="nranges + 1"), + lp.GlobalArg("blkprefix", None, shape="nranges + 1"), + lp.GlobalArg("rowindices", None, shape="nresults"), + lp.GlobalArg("colindices", None, shape="nresults"), + lp.ValueArg("nsrcindices", np.int32), + lp.ValueArg("ntgtindices", np.int32), + lp.ValueArg("nresults", None), + lp.ValueArg("nranges", None) + ], + name="block_linear_prefix_knl", + default_offset=lp.auto, + lang_version=MOST_RECENT_LANGUAGE_VERSION) + loopy_knl = lp.split_iname(loopy_knl, "irange", 128, outer_tag="g.0") + + return loopy_knl + from pytools.obj_array import is_obj_array knl = self.get_cached_optimized_kernel( targets_is_obj_array=( @@ -394,9 +434,16 @@ class P2PMatrixBlockGenerator(P2PBase): sources_is_obj_array=( is_obj_array(sources) or isinstance(sources, (tuple, list)))) - return knl(queue, targets=targets, sources=sources, - tgtindices=tgtindices, srcindices=srcindices, - tgtranges=tgtranges, srcranges=srcranges, **kwargs) + _, (blkprefix,) = cumsum()(queue, + tgtranges=tgtranges, srcranges=srcranges) + _, (rowindices, colindices,) = linear_index()(queue, + tgtindices=tgtindices, srcindices=srcindices, + tgtranges=tgtranges, srcranges=srcranges, + blkprefix=blkprefix, nresults=blkprefix[-1]) + evt, results = knl(queue, targets=targets, sources=sources, + tgtindices=rowindices, srcindices=colindices, **kwargs) + + return evt, tuple(list(results) + [rowindices, colindices]) # }}} diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py index eb85052b..e784529e 100644 --- a/test/test_matrixgen.py +++ b/test/test_matrixgen.py @@ -63,15 +63,22 @@ def create_arguments(n, mode, target_radius=1.0): return targets, sources, centers, sigma, expansion_radii -def create_subset(indices, ranges, factor): - indices_ = np.empty(ranges.shape[0] - 1, dtype=np.object) - for i in range(ranges.shape[0] - 1): - iidx = indices[np.s_[ranges[i]:ranges[i + 1]]] - indices_[i] = np.sort(np.random.choice(iidx, - size=int(factor * len(iidx)), replace=False)) +def create_index_subset(nnodes, nblks, factor): + indices = np.arange(0, nnodes) + ranges = np.arange(0, nnodes + 1, nnodes // nblks) - ranges_ = np.cumsum([0] + [r.shape[0] for r in indices_]) - indices_ = np.hstack(indices_) + if abs(factor - 1.0) < 1.0e-14: + ranges_ = ranges + indices_ = indices + else: + indices_ = np.empty(ranges.shape[0] - 1, dtype=np.object) + for i in range(ranges.shape[0] - 1): + iidx = indices[np.s_[ranges[i]:ranges[i + 1]]] + indices_[i] = np.sort(np.random.choice(iidx, + size=int(factor * len(iidx)), replace=False)) + + ranges_ = np.cumsum([0] + [r.shape[0] for r in indices_]) + indices_ = np.hstack(indices_) return indices_, ranges_ @@ -139,11 +146,12 @@ def test_qbx_direct(ctx_getter): assert la.norm(blk[itgt, isrc] - mat[block]) < eps -@pytest.mark.parametrize("exclude_self", [True, False]) -def test_p2p_direct(ctx_getter, exclude_self): - # This evaluates a single layer potential on a circle. +@pytest.mark.parametrize("exclude_self", "factor", [ + (True, 1.0), (True, 0.6), (False, 1.0), (False, 0.6) + ]) +def test_p2p_direct(ctx_getter, exclude_self, factor): + # This does a point-to-point kernel evaluation on a circle. logging.basicConfig(level=logging.INFO) - ctx = ctx_getter() queue = cl.CommandQueue(ctx) @@ -166,15 +174,8 @@ def test_p2p_direct(ctx_getter, exclude_self): h = 2 * np.pi / n strengths = (sigma * h,) - tgtindices = np.arange(0, n) - tgtranges = np.arange(0, tgtindices.shape[0] + 1, - tgtindices.shape[0] // nblks) - tgtindices, tgtranges = create_subset(tgtindices, tgtranges, 0.6) - - srcindices = np.arange(0, n) - srcranges = np.arange(0, srcindices.shape[0] + 1, - srcindices.shape[0] // nblks) - srcindices, srcranges = create_subset(srcindices, srcranges, 0.6) + tgtindices, tgtranges = create_index_subset(n, nblks, factor) + srcindices, srcranges = create_index_subset(n, nblks, factor) assert tgtranges.shape == srcranges.shape extra_kwargs = {} @@ -190,17 +191,11 @@ def test_p2p_direct(ctx_getter, exclude_self): eps = 1.0e-10 * la.norm(result_lpot) assert la.norm(result_mat - result_lpot) < eps - _, (blk,) = blk_gen(queue, targets, sources, - tgtindices, srcindices, tgtranges, srcranges, - **extra_kwargs) + _, (blk, rowindices, colindices) = blk_gen(queue, targets, sources, + tgtindices, srcindices, tgtranges, srcranges, **extra_kwargs) - for i in range(srcranges.shape[0] - 1): - itgt = np.s_[tgtranges[i]:tgtranges[i + 1]] - isrc = np.s_[srcranges[i]:srcranges[i + 1]] - block = np.ix_(tgtindices[itgt], srcindices[isrc]) - - eps = 1.0e-10 * la.norm(mat[block]) - assert la.norm(blk[itgt, isrc] - mat[block]) < eps + eps = 1.0e-10 * la.norm(mat) + assert la.norm(blk - mat[rowindices, colindices].reshape(-1)) < eps # You can test individual routines by typing -- GitLab From f82035124152bae6c6b72313dc0fcd0e32f874e0 Mon Sep 17 00:00:00 2001 From: Alex Fikl Date: Sat, 14 Apr 2018 21:46:06 -0500 Subject: [PATCH 03/11] p2p: also return the block ranges so we can separate them --- sumpy/p2p.py | 16 ++++++++-------- test/test_matrixgen.py | 11 ++++++----- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 97756a55..896d34c9 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -369,15 +369,15 @@ class P2PMatrixBlockGenerator(P2PBase): loopy_knl = lp.make_kernel( "{[i, j]: 1 <= i <= nranges and 1 <= j <= i}", """ - blkprefix[0] = 0.0 - blkprefix[i] = reduce(sum, j, \ + blkranges[0] = 0.0 + blkranges[i] = reduce(sum, j, \ (srcranges[j] - srcranges[j - 1]) * \ (tgtranges[j] - tgtranges[j - 1])) \ """, [ lp.GlobalArg("tgtranges", None, shape="nranges + 1"), lp.GlobalArg("srcranges", None, shape="nranges + 1"), - lp.GlobalArg("blkprefix", np.int32, shape="nranges + 1"), + lp.GlobalArg("blkranges", np.int32, shape="nranges + 1"), lp.ValueArg("nranges", None) ], name="block_cumsum_knl", @@ -400,7 +400,7 @@ class P2PMatrixBlockGenerator(P2PBase): <> nsrcblock = srcranges[irange + 1] - srcranges[irange] for itgt, isrc - <> imat = blkprefix[irange] + (nsrcblock * itgt + isrc) + <> imat = blkranges[irange] + (nsrcblock * itgt + isrc) rowindices[imat] = tgtindices[tgtranges[irange] + itgt] colindices[imat] = srcindices[srcranges[irange] + isrc] @@ -412,7 +412,7 @@ class P2PMatrixBlockGenerator(P2PBase): lp.GlobalArg("tgtindices", None, shape="ntgtindices"), lp.GlobalArg("srcranges", None, shape="nranges + 1"), lp.GlobalArg("tgtranges", None, shape="nranges + 1"), - lp.GlobalArg("blkprefix", None, shape="nranges + 1"), + lp.GlobalArg("blkranges", None, shape="nranges + 1"), lp.GlobalArg("rowindices", None, shape="nresults"), lp.GlobalArg("colindices", None, shape="nresults"), lp.ValueArg("nsrcindices", np.int32), @@ -434,16 +434,16 @@ class P2PMatrixBlockGenerator(P2PBase): sources_is_obj_array=( is_obj_array(sources) or isinstance(sources, (tuple, list)))) - _, (blkprefix,) = cumsum()(queue, + _, (blkranges,) = cumsum()(queue, tgtranges=tgtranges, srcranges=srcranges) _, (rowindices, colindices,) = linear_index()(queue, tgtindices=tgtindices, srcindices=srcindices, tgtranges=tgtranges, srcranges=srcranges, - blkprefix=blkprefix, nresults=blkprefix[-1]) + blkranges=blkranges, nresults=blkranges[-1]) evt, results = knl(queue, targets=targets, sources=sources, tgtindices=rowindices, srcindices=colindices, **kwargs) - return evt, tuple(list(results) + [rowindices, colindices]) + return evt, tuple(list(results) + [rowindices, colindices, blkranges]) # }}} diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py index e784529e..12198a39 100644 --- a/test/test_matrixgen.py +++ b/test/test_matrixgen.py @@ -146,9 +146,8 @@ def test_qbx_direct(ctx_getter): assert la.norm(blk[itgt, isrc] - mat[block]) < eps -@pytest.mark.parametrize("exclude_self", "factor", [ - (True, 1.0), (True, 0.6), (False, 1.0), (False, 0.6) - ]) +@pytest.mark.parametrize(("exclude_self", "factor"), + [(True, 1.0), (True, 0.6), (False, 1.0), (False, 0.6)]) def test_p2p_direct(ctx_getter, exclude_self, factor): # This does a point-to-point kernel evaluation on a circle. logging.basicConfig(level=logging.INFO) @@ -191,8 +190,10 @@ def test_p2p_direct(ctx_getter, exclude_self, factor): eps = 1.0e-10 * la.norm(result_lpot) assert la.norm(result_mat - result_lpot) < eps - _, (blk, rowindices, colindices) = blk_gen(queue, targets, sources, - tgtindices, srcindices, tgtranges, srcranges, **extra_kwargs) + _, (blk, rowindices, colindices, blkranges) = \ + blk_gen(queue, targets, sources, + tgtindices, srcindices, tgtranges, srcranges, + **extra_kwargs) eps = 1.0e-10 * la.norm(mat) assert la.norm(blk - mat[rowindices, colindices].reshape(-1)) < eps -- GitLab From d63f96d29e7d5910f4739fe1ffa94562534c64dd Mon Sep 17 00:00:00 2001 From: Alex Fikl Date: Tue, 17 Apr 2018 20:17:38 -0500 Subject: [PATCH 04/11] p2p: add a cache object that keeps track of the indices in the various forms --- sumpy/p2p.py | 190 +++++++++++++++++++++++++---------------- test/test_matrixgen.py | 9 +- 2 files changed, 120 insertions(+), 79 deletions(-) diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 896d34c9..4b0590f0 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -33,7 +33,7 @@ import loopy as lp from loopy.version import MOST_RECENT_LANGUAGE_VERSION from pymbolic import var -from pytools import memoize_in +from pytools import memoize_method, memoize_in from sumpy.tools import KernelComputation, KernelCacheWrapper @@ -293,6 +293,97 @@ class P2PMatrixGenerator(P2PBase): # {{{ P2P matrix block writer +class P2PMatrixLinearIndex(object): + def __init__(self, queue, tgtindices, srcindices, tgtranges, srcranges): + self.queue = queue + self.tgtindices = tgtindices + self.tgtranges = tgtranges + self.srcindices = srcindices + self.srcranges = srcranges + + @memoize_method + def block_ranges(self): + @memoize_in(self, "linear_cumsum_knl") + def cumsum(): + loopy_knl = lp.make_kernel( + "{[i, j]: 0 <= i < nranges and 0 <= j <= i}", + """ + blkranges[0] = 0 + blkranges[i + 1] = reduce(sum, j, \ + (srcranges[j + 1] - srcranges[j]) * \ + (tgtranges[j + 1] - tgtranges[j])) \ + """, + [ + lp.GlobalArg("tgtranges", None, shape="nranges + 1"), + lp.GlobalArg("srcranges", None, shape="nranges + 1"), + lp.GlobalArg("blkranges", np.int32, shape="nranges + 1"), + lp.ValueArg("nranges", None) + ], + name="linear_cumsum_knl", + default_offset=lp.auto, + lang_version=MOST_RECENT_LANGUAGE_VERSION) + + loopy_knl = lp.realize_reduction(loopy_knl, force_scan=True, + force_outer_iname_for_scan="i") + return loopy_knl + + _, (blkranges,) = cumsum()(self.queue, + tgtranges=self.tgtranges, srcranges=self.srcranges); + + return blkranges + + @memoize_method + def linear_indices(self): + @memoize_in(self, "linear_index_knl") + def linear_index(): + loopy_knl = lp.make_kernel([ + "{[irange]: 0 <= irange < nranges}", + "{[itgt, isrc]: 0 <= itgt < ntgtblock and 0 <= isrc < nsrcblock}" + ], + """ + for irange + <> ntgtblock = tgtranges[irange + 1] - tgtranges[irange] + <> nsrcblock = srcranges[irange + 1] - srcranges[irange] + + for itgt, isrc + <> imat = blkranges[irange] + (nsrcblock * itgt + isrc) + + rowindices[imat] = tgtindices[tgtranges[irange] + itgt] \ + {id_prefix=write_index} + colindices[imat] = srcindices[srcranges[irange] + isrc] \ + {id_prefix=write_index} + end + end + """, + [ + lp.GlobalArg("srcindices", None), + lp.GlobalArg("tgtindices", None), + lp.GlobalArg("srcranges", None, shape="nranges + 1"), + lp.GlobalArg("tgtranges", None, shape="nranges + 1"), + lp.GlobalArg("blkranges", None, shape="nranges + 1"), + lp.GlobalArg("rowindices", None, shape="nresults"), + lp.GlobalArg("colindices", None, shape="nresults"), + lp.ValueArg("nresults", None), + lp.ValueArg("nranges", None), + "..." + ], + name="linear_index_knl", + default_offset=lp.auto, + silenced_warnings="write_race(write_index*)", + lang_version=MOST_RECENT_LANGUAGE_VERSION) + loopy_knl = lp.split_iname(loopy_knl, "irange", 128, outer_tag="g.0") + + return loopy_knl + + blkranges = self.block_ranges() + _, (rowindices, colindices) = linear_index()(self.queue, + tgtindices=self.tgtindices, srcindices=self.srcindices, + tgtranges=self.tgtranges, srcranges=self.srcranges, + blkranges=blkranges, nresults=blkranges[-1]) + + return rowindices, colindices, blkranges + + class P2PMatrixBlockGenerator(P2PBase): """Generator for a subset of P2P interaction matrix entries.""" @@ -362,71 +453,27 @@ class P2PMatrixBlockGenerator(P2PBase): knl = lp.split_iname(knl, "imat", 1024, outer_tag="g.0") return knl - def __call__(self, queue, targets, sources, tgtindices, srcindices, - tgtranges, srcranges, **kwargs): - @memoize_in(self, "block_cumsum_knl") - def cumsum(): - loopy_knl = lp.make_kernel( - "{[i, j]: 1 <= i <= nranges and 1 <= j <= i}", - """ - blkranges[0] = 0.0 - blkranges[i] = reduce(sum, j, \ - (srcranges[j] - srcranges[j - 1]) * \ - (tgtranges[j] - tgtranges[j - 1])) \ - """, - [ - lp.GlobalArg("tgtranges", None, shape="nranges + 1"), - lp.GlobalArg("srcranges", None, shape="nranges + 1"), - lp.GlobalArg("blkranges", np.int32, shape="nranges + 1"), - lp.ValueArg("nranges", None) - ], - name="block_cumsum_knl", - default_offset=lp.auto, - lang_version=MOST_RECENT_LANGUAGE_VERSION) - - loopy_knl = lp.realize_reduction(loopy_knl, force_scan=True, - force_outer_iname_for_scan="i") - return loopy_knl - - @memoize_in(self, "block_linear_index_knl") - def linear_index(): - loopy_knl = lp.make_kernel([ - "{[irange]: 0 <= irange < nranges}", - "{[itgt, isrc]: 0 <= itgt < ntgtblock and 0 <= isrc < nsrcblock}" - ], - """ - for irange - <> ntgtblock = tgtranges[irange + 1] - tgtranges[irange] - <> nsrcblock = srcranges[irange + 1] - srcranges[irange] - - for itgt, isrc - <> imat = blkranges[irange] + (nsrcblock * itgt + isrc) - - rowindices[imat] = tgtindices[tgtranges[irange] + itgt] - colindices[imat] = srcindices[srcranges[irange] + isrc] - end - end - """, - [ - lp.GlobalArg("srcindices", None, shape="nsrcindices"), - lp.GlobalArg("tgtindices", None, shape="ntgtindices"), - lp.GlobalArg("srcranges", None, shape="nranges + 1"), - lp.GlobalArg("tgtranges", None, shape="nranges + 1"), - lp.GlobalArg("blkranges", None, shape="nranges + 1"), - lp.GlobalArg("rowindices", None, shape="nresults"), - lp.GlobalArg("colindices", None, shape="nresults"), - lp.ValueArg("nsrcindices", np.int32), - lp.ValueArg("ntgtindices", np.int32), - lp.ValueArg("nresults", None), - lp.ValueArg("nranges", None) - ], - name="block_linear_prefix_knl", - default_offset=lp.auto, - lang_version=MOST_RECENT_LANGUAGE_VERSION) - loopy_knl = lp.split_iname(loopy_knl, "irange", 128, outer_tag="g.0") - - return loopy_knl - + def __call__(self, queue, targets, sources, index_set, **kwargs): + """Construct a set of blocks from the full P2P interaction matrix. + The relevant subset of indices for block :math:`i` is given by the + row indices `tgtindices[tgtranges[i]:tgtranges[i + 1]]` and the + column indices `srcindices[srcranges[i]:srcranges[i + 1]]`. + + :arg tgtindices: list of row indices for all blocks. + :arg srcindices: list of column indices for all blocks. + :arg tgtranges: list used to index `tgtindices` in each block. + :arg srcindices: list used to index `srcindices` in each block. + + :return: a tuple containing `(kernel_1_block, ... kernel_n_block, + rowindices, colindices, blkranges)`. The set `(rowindices, colindices)` + can be used to place each element of each block in the full matrix. + Each `kernel_i_block` is a one-dimensional array containing all the + blocks, i.e. the ith kernel evaluated at each of the input + source-target pairs. To retrieve a given block :math:`j`, one can + use `kernel_i_block[blkranges[j]:blkranges[j + 1]]`, which can then + be reshaped into the original block size using the `tgtranges` and + `srcranges` index lists. + """ from pytools.obj_array import is_obj_array knl = self.get_cached_optimized_kernel( targets_is_obj_array=( @@ -434,17 +481,10 @@ class P2PMatrixBlockGenerator(P2PBase): sources_is_obj_array=( is_obj_array(sources) or isinstance(sources, (tuple, list)))) - _, (blkranges,) = cumsum()(queue, - tgtranges=tgtranges, srcranges=srcranges) - _, (rowindices, colindices,) = linear_index()(queue, - tgtindices=tgtindices, srcindices=srcindices, - tgtranges=tgtranges, srcranges=srcranges, - blkranges=blkranges, nresults=blkranges[-1]) - evt, results = knl(queue, targets=targets, sources=sources, + rowindices, colindices, _ = index_set.linear_indices() + return knl(queue, targets=targets, sources=sources, tgtindices=rowindices, srcindices=colindices, **kwargs) - return evt, tuple(list(results) + [rowindices, colindices, blkranges]) - # }}} diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py index 12198a39..984fbc5b 100644 --- a/test/test_matrixgen.py +++ b/test/test_matrixgen.py @@ -161,6 +161,7 @@ def test_p2p_direct(ctx_getter, exclude_self, factor): mode_nr = 25 from sumpy.p2p import P2P + from sumpy.p2p import P2PMatrixLinearIndex from sumpy.p2p import P2PMatrixGenerator, P2PMatrixBlockGenerator lpot = P2P(ctx, [lknl], exclude_self=exclude_self) mat_gen = P2PMatrixGenerator(ctx, [lknl], exclude_self=exclude_self) @@ -190,11 +191,11 @@ def test_p2p_direct(ctx_getter, exclude_self, factor): eps = 1.0e-10 * la.norm(result_lpot) assert la.norm(result_mat - result_lpot) < eps - _, (blk, rowindices, colindices, blkranges) = \ - blk_gen(queue, targets, sources, - tgtindices, srcindices, tgtranges, srcranges, - **extra_kwargs) + index_set = P2PMatrixLinearIndex(queue, + tgtindices, srcindices, tgtranges, srcranges) + _, (blk,) = blk_gen(queue, targets, sources, index_set, **extra_kwargs) + rowindices, colindices, _ = index_set.linear_indices() eps = 1.0e-10 * la.norm(mat) assert la.norm(blk - mat[rowindices, colindices].reshape(-1)) < eps -- GitLab From 1e01bbb613e778b9b3d9b4b3ab46b33475e76a2b Mon Sep 17 00:00:00 2001 From: Alex Fikl Date: Tue, 17 Apr 2018 21:12:40 -0500 Subject: [PATCH 05/11] p2p: move the index cache class to sumpy.tools --- sumpy/p2p.py | 133 +++++++--------------------------------- sumpy/tools.py | 136 ++++++++++++++++++++++++++++++++++++++++- test/test_matrixgen.py | 6 +- 3 files changed, 160 insertions(+), 115 deletions(-) diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 4b0590f0..d1903a28 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -293,97 +293,6 @@ class P2PMatrixGenerator(P2PBase): # {{{ P2P matrix block writer -class P2PMatrixLinearIndex(object): - def __init__(self, queue, tgtindices, srcindices, tgtranges, srcranges): - self.queue = queue - self.tgtindices = tgtindices - self.tgtranges = tgtranges - self.srcindices = srcindices - self.srcranges = srcranges - - @memoize_method - def block_ranges(self): - @memoize_in(self, "linear_cumsum_knl") - def cumsum(): - loopy_knl = lp.make_kernel( - "{[i, j]: 0 <= i < nranges and 0 <= j <= i}", - """ - blkranges[0] = 0 - blkranges[i + 1] = reduce(sum, j, \ - (srcranges[j + 1] - srcranges[j]) * \ - (tgtranges[j + 1] - tgtranges[j])) \ - """, - [ - lp.GlobalArg("tgtranges", None, shape="nranges + 1"), - lp.GlobalArg("srcranges", None, shape="nranges + 1"), - lp.GlobalArg("blkranges", np.int32, shape="nranges + 1"), - lp.ValueArg("nranges", None) - ], - name="linear_cumsum_knl", - default_offset=lp.auto, - lang_version=MOST_RECENT_LANGUAGE_VERSION) - - loopy_knl = lp.realize_reduction(loopy_knl, force_scan=True, - force_outer_iname_for_scan="i") - return loopy_knl - - _, (blkranges,) = cumsum()(self.queue, - tgtranges=self.tgtranges, srcranges=self.srcranges); - - return blkranges - - @memoize_method - def linear_indices(self): - @memoize_in(self, "linear_index_knl") - def linear_index(): - loopy_knl = lp.make_kernel([ - "{[irange]: 0 <= irange < nranges}", - "{[itgt, isrc]: 0 <= itgt < ntgtblock and 0 <= isrc < nsrcblock}" - ], - """ - for irange - <> ntgtblock = tgtranges[irange + 1] - tgtranges[irange] - <> nsrcblock = srcranges[irange + 1] - srcranges[irange] - - for itgt, isrc - <> imat = blkranges[irange] + (nsrcblock * itgt + isrc) - - rowindices[imat] = tgtindices[tgtranges[irange] + itgt] \ - {id_prefix=write_index} - colindices[imat] = srcindices[srcranges[irange] + isrc] \ - {id_prefix=write_index} - end - end - """, - [ - lp.GlobalArg("srcindices", None), - lp.GlobalArg("tgtindices", None), - lp.GlobalArg("srcranges", None, shape="nranges + 1"), - lp.GlobalArg("tgtranges", None, shape="nranges + 1"), - lp.GlobalArg("blkranges", None, shape="nranges + 1"), - lp.GlobalArg("rowindices", None, shape="nresults"), - lp.GlobalArg("colindices", None, shape="nresults"), - lp.ValueArg("nresults", None), - lp.ValueArg("nranges", None), - "..." - ], - name="linear_index_knl", - default_offset=lp.auto, - silenced_warnings="write_race(write_index*)", - lang_version=MOST_RECENT_LANGUAGE_VERSION) - loopy_knl = lp.split_iname(loopy_knl, "irange", 128, outer_tag="g.0") - - return loopy_knl - - blkranges = self.block_ranges() - _, (rowindices, colindices) = linear_index()(self.queue, - tgtindices=self.tgtindices, srcindices=self.srcindices, - tgtranges=self.tgtranges, srcranges=self.srcranges, - blkranges=blkranges, nresults=blkranges[-1]) - - return rowindices, colindices, blkranges - - class P2PMatrixBlockGenerator(P2PBase): """Generator for a subset of P2P interaction matrix entries.""" @@ -454,25 +363,27 @@ class P2PMatrixBlockGenerator(P2PBase): return knl def __call__(self, queue, targets, sources, index_set, **kwargs): - """Construct a set of blocks from the full P2P interaction matrix. - The relevant subset of indices for block :math:`i` is given by the - row indices `tgtindices[tgtranges[i]:tgtranges[i + 1]]` and the - column indices `srcindices[srcranges[i]:srcranges[i + 1]]`. - - :arg tgtindices: list of row indices for all blocks. - :arg srcindices: list of column indices for all blocks. - :arg tgtranges: list used to index `tgtindices` in each block. - :arg srcindices: list used to index `srcindices` in each block. - - :return: a tuple containing `(kernel_1_block, ... kernel_n_block, - rowindices, colindices, blkranges)`. The set `(rowindices, colindices)` - can be used to place each element of each block in the full matrix. - Each `kernel_i_block` is a one-dimensional array containing all the - blocks, i.e. the ith kernel evaluated at each of the input - source-target pairs. To retrieve a given block :math:`j`, one can - use `kernel_i_block[blkranges[j]:blkranges[j + 1]]`, which can then - be reshaped into the original block size using the `tgtranges` and - `srcranges` index lists. + """Construct a set of blocks of the full P2P interaction matrix. + + The blocks are returned as one-dimensional arrays, for performance + and storage reasons. If the two-dimensional form is desired, it can + be obtained using the information in the `index_set` for a block + :math:`i` in the following way: + + .. code-block:: python + + blkranges = index_set.linear_ranges() + m = index_set.tgtranges[i + 1] - index_set.tgtranges[i] + n = index_set.srcranges[i + 1] - index_set.srcranges[i] + + block2d = result[blkranges[i]:blkranges[i + 1]].reshape(m, n) + + :arg targets: target point coordinates. + :arg sources: source point coordinates. + :arg index_set: a :class:`sumpy.tools.MatrixBlockIndex` object used + to define the various blocks. + :return: a tuple of one-dimensional arrays of kernel evaluations at + target-source pairs described by `index_set`. """ from pytools.obj_array import is_obj_array knl = self.get_cached_optimized_kernel( @@ -481,7 +392,7 @@ class P2PMatrixBlockGenerator(P2PBase): sources_is_obj_array=( is_obj_array(sources) or isinstance(sources, (tuple, list)))) - rowindices, colindices, _ = index_set.linear_indices() + rowindices, colindices = index_set.linear_indices() return knl(queue, targets=targets, sources=sources, tgtindices=rowindices, srcindices=colindices, **kwargs) diff --git a/sumpy/tools.py b/sumpy/tools.py index 964c64cc..b891da0e 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -24,10 +24,13 @@ THE SOFTWARE. import six from six.moves import range, zip -from pytools import memoize_method +from pytools import memoize_method, memoize_in import numpy as np import sumpy.symbolic as sym +import loopy as lp +from loopy.version import MOST_RECENT_LANGUAGE_VERSION + import logging logger = logging.getLogger(__name__) @@ -291,6 +294,137 @@ class KernelComputation(object): # }}} +# {{{ + +class MatrixBlockIndex(object): + def __init__(self, queue, tgtindices, srcindices, tgtranges, srcranges): + """Keep track of different ways to index into matrix blocks. + + In this description, a block is given by a subset of indices of + the global matrix operator. For example, the :math:`i`-th block of + a matrix `mat` using the input arguments is obtained by: + + .. code-block:: python + + mat[np.ix_(tgtindices[tgtranges[i]:tgtranges[i + 1]], + srcindices[srcranges[i]:srcranges[i + 1]])] + + :arg tgtindices: a subset of the full matrix row index set. + :arg srcindices: a subset of the full matrix column index set. + :arg tgtranges: an array used to index into `tgtindices`. + :arg srcranges: an array used to index into `srcindices`. + """ + + self.queue = queue + self.tgtindices = tgtindices + self.tgtranges = tgtranges + self.srcindices = srcindices + self.srcranges = srcranges + + @memoize_method + def linear_ranges(self): + """ + :return: an array containing the cummulative sum of all block sizes. + It can be used to index into a linear representation of the + matrix blocks. + """ + + @memoize_in(self, "block_cumsum_knl") + def cumsum(): + loopy_knl = lp.make_kernel( + "{[i, j]: 0 <= i < nranges and 0 <= j <= i}", + """ + blkranges[0] = 0 + blkranges[i + 1] = reduce(sum, j, \ + (srcranges[j + 1] - srcranges[j]) * \ + (tgtranges[j + 1] - tgtranges[j])) \ + """, + [ + lp.GlobalArg("tgtranges", None, shape="nranges + 1"), + lp.GlobalArg("srcranges", None, shape="nranges + 1"), + lp.GlobalArg("blkranges", np.int32, shape="nranges + 1"), + lp.ValueArg("nranges", None) + ], + name="block_cumsum_knl", + default_offset=lp.auto, + lang_version=MOST_RECENT_LANGUAGE_VERSION) + + loopy_knl = lp.realize_reduction(loopy_knl, force_scan=True, + force_outer_iname_for_scan="i") + return loopy_knl + + _, (blkranges,) = cumsum()(self.queue, + tgtranges=self.tgtranges, srcranges=self.srcranges); + + return blkranges + + @memoize_method + def linear_indices(self): + """ + :return: a tuple of `(rowindices, colindices)` that can be used in + conjunction with :func:`linear_ranges` to provide linear indexing + into a set of matrix blocks. These index arrays are just the + concatenated Cartesian products of all the block arrays described + by :attr:`tgtindices` and :arg:`srcindices`. + + They can be used to index directly into the matrix as follows: + + .. code-block:: python + + mat[rowindices[blkranges[i]:blkranges[i + 1]], + colindices[blkranges[i]:blkranges[i + 1]]] + """ + + @memoize_in(self, "block_index_knl") + def linear_index(): + loopy_knl = lp.make_kernel([ + "{[irange]: 0 <= irange < nranges}", + "{[itgt, isrc]: 0 <= itgt < ntgtblock and 0 <= isrc < nsrcblock}" + ], + """ + for irange + <> ntgtblock = tgtranges[irange + 1] - tgtranges[irange] + <> nsrcblock = srcranges[irange + 1] - srcranges[irange] + + for itgt, isrc + <> imat = blkranges[irange] + (nsrcblock * itgt + isrc) + + rowindices[imat] = tgtindices[tgtranges[irange] + itgt] \ + {id_prefix=write_index} + colindices[imat] = srcindices[srcranges[irange] + isrc] \ + {id_prefix=write_index} + end + end + """, + [ + lp.GlobalArg("srcindices", None), + lp.GlobalArg("tgtindices", None), + lp.GlobalArg("srcranges", None, shape="nranges + 1"), + lp.GlobalArg("tgtranges", None, shape="nranges + 1"), + lp.GlobalArg("blkranges", None, shape="nranges + 1"), + lp.GlobalArg("rowindices", None, shape="nresults"), + lp.GlobalArg("colindices", None, shape="nresults"), + lp.ValueArg("nresults", None), + lp.ValueArg("nranges", None), + "..." + ], + name="block_index_knl", + default_offset=lp.auto, + silenced_warnings="write_race(write_index*)", + lang_version=MOST_RECENT_LANGUAGE_VERSION) + loopy_knl = lp.split_iname(loopy_knl, "irange", 128, outer_tag="g.0") + + return loopy_knl + + blkranges = self.linear_ranges() + _, (rowindices, colindices) = linear_index()(self.queue, + tgtindices=self.tgtindices, srcindices=self.srcindices, + tgtranges=self.tgtranges, srcranges=self.srcranges, + blkranges=blkranges, nresults=blkranges[-1]) + + return rowindices, colindices + +# }}} # {{{ OrderedSet diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py index 984fbc5b..d6e85c23 100644 --- a/test/test_matrixgen.py +++ b/test/test_matrixgen.py @@ -161,7 +161,7 @@ def test_p2p_direct(ctx_getter, exclude_self, factor): mode_nr = 25 from sumpy.p2p import P2P - from sumpy.p2p import P2PMatrixLinearIndex + from sumpy.tools import MatrixBlockIndex from sumpy.p2p import P2PMatrixGenerator, P2PMatrixBlockGenerator lpot = P2P(ctx, [lknl], exclude_self=exclude_self) mat_gen = P2PMatrixGenerator(ctx, [lknl], exclude_self=exclude_self) @@ -191,11 +191,11 @@ def test_p2p_direct(ctx_getter, exclude_self, factor): eps = 1.0e-10 * la.norm(result_lpot) assert la.norm(result_mat - result_lpot) < eps - index_set = P2PMatrixLinearIndex(queue, + index_set = MatrixBlockIndex(queue, tgtindices, srcindices, tgtranges, srcranges) _, (blk,) = blk_gen(queue, targets, sources, index_set, **extra_kwargs) - rowindices, colindices, _ = index_set.linear_indices() + rowindices, colindices = index_set.linear_indices() eps = 1.0e-10 * la.norm(mat) assert la.norm(blk - mat[rowindices, colindices].reshape(-1)) < eps -- GitLab From 0885edb0008673b20640af6aca7f8edc82705822 Mon Sep 17 00:00:00 2001 From: Alex Fikl Date: Tue, 17 Apr 2018 21:23:49 -0500 Subject: [PATCH 06/11] qbx: use same method as p2p to construct the block matrices --- sumpy/p2p.py | 16 +++++----- sumpy/qbx.py | 70 ++++++++++++++---------------------------- test/test_matrixgen.py | 31 ++++++++----------- 3 files changed, 43 insertions(+), 74 deletions(-) diff --git a/sumpy/p2p.py b/sumpy/p2p.py index d1903a28..9503a2a1 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -307,15 +307,15 @@ class P2PMatrixBlockGenerator(P2PBase): arguments = ( self.get_default_src_tgt_arguments() + [ - lp.GlobalArg("srcindices", None, shape="nresults"), - lp.GlobalArg("tgtindices", None, shape="nresults"), - lp.ValueArg("nresults", None) + lp.GlobalArg("srcindices", None, shape="nresult"), + lp.GlobalArg("tgtindices", None, shape="nresult"), + lp.ValueArg("nresult", None) ] + - [lp.GlobalArg("result_%d" % i, dtype, shape="nresults") + [lp.GlobalArg("result_%d" % i, dtype, shape="nresult") for i, dtype in enumerate(self.value_dtypes)]) loopy_knl = lp.make_kernel( - "{[imat, idim]: 0 <= imat < nresults and 0 <= idim < dim}", + "{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}", self.get_kernel_scaling_assignments() + [""" for imat @@ -335,7 +335,7 @@ class P2PMatrixBlockGenerator(P2PBase): for iknl in range(len(self.kernels))] + ["end"], arguments, - assumptions="nresults>=1", + assumptions="nresult>=1", silenced_warnings="write_race(write_p2p*)", name=self.name, fixed_parameters=dict(dim=self.dim), @@ -392,9 +392,9 @@ class P2PMatrixBlockGenerator(P2PBase): sources_is_obj_array=( is_obj_array(sources) or isinstance(sources, (tuple, list)))) - rowindices, colindices = index_set.linear_indices() + tgtindices, srcindices = index_set.linear_indices() return knl(queue, targets=targets, sources=sources, - tgtindices=rowindices, srcindices=colindices, **kwargs) + tgtindices=tgtindices, srcindices=srcindices, **kwargs) # }}} diff --git a/sumpy/qbx.py b/sumpy/qbx.py index f3b2d6f7..73f7ec3d 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -335,68 +335,44 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): arguments = ( self.get_default_src_tgt_arguments() + [ - lp.GlobalArg("srcindices", None, shape="nsrcindices"), - lp.GlobalArg("tgtindices", None, shape="ntgtindices"), - lp.GlobalArg("srcranges", None, shape="nranges + 1"), - lp.GlobalArg("tgtranges", None, shape="nranges + 1"), - lp.ValueArg("nsrcindices", None), - lp.ValueArg("ntgtindices", None), - lp.ValueArg("nranges", None) + lp.GlobalArg("srcindices", None, shape="nresult"), + lp.GlobalArg("tgtindices", None, shape="nresult"), + lp.ValueArg("nresult", None) ] + - [lp.GlobalArg("result_%d" % i, - dtype, shape="ntgtindices, nsrcindices") - for i, dtype in enumerate(self.value_dtypes)]) + [lp.GlobalArg("result_%d" % i, dtype, shape="nresult") + for i, dtype in enumerate(self.value_dtypes)]) loopy_knl = lp.make_kernel([ - "{[irange]: 0 <= irange < nranges}", - "{[itgt, isrc]: 0 <= itgt < ntgtblock and 0 <= isrc < nsrcblock}", - "{[idim]: 0 <= idim < dim}" + "{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}" ], self.get_kernel_scaling_assignments() + [""" - for irange - <> tgtstart = tgtranges[irange] - <> tgtend = tgtranges[irange + 1] - <> ntgtblock = tgtend - tgtstart - <> srcstart = srcranges[irange] - <> srcend = srcranges[irange + 1] - <> nsrcblock = srcend - srcstart - - for itgt, isrc - <> a[idim] = center[idim, tgtindices[tgtstart + itgt]] - \ - src[idim, srcindices[srcstart + isrc]] \ - {dup=idim} - <> b[idim] = tgt[idim, tgtindices[tgtstart + itgt]] - \ - center[idim, tgtindices[tgtstart + itgt]] \ - {dup=idim} - <> rscale = expansion_radii[tgtindices[tgtstart + itgt]] + for imat + <> a[idim] = center[idim, tgtindices[imat]] - \ + src[idim, srcindices[imat]] {dup=idim} + <> b[idim] = tgt[idim, tgtindices[imat]] - \ + center[idim, tgtindices[imat]] {dup=idim} + <> rscale = expansion_radii[tgtindices[imat]] """] + loopy_insns + kernel_exprs + [""" - result_{i}[tgtstart + itgt, srcstart + isrc] = \ - knl_{i}_scaling * pair_result_{i} \ - {{id_prefix=write_lpot,inames=irange}} + result_{i}[imat] = knl_{i}_scaling * pair_result_{i} \ + {{id_prefix=write_lpot}} """.format(i=iknl) for iknl in range(len(self.expansions))] - + [""" - end - end - """], + + ["end"], arguments, name=self.name, - assumptions="nranges>=1", + assumptions="nresult>=1", default_offset=lp.auto, silenced_warnings="write_race(write_lpot*)", fixed_parameters=dict(dim=self.dim), lang_version=MOST_RECENT_LANGUAGE_VERSION) - loopy_knl = lp.add_dtypes(loopy_knl, dict( - nsources=np.int32, - ntargets=np.int32, - ntgtindices=np.int32, - nsrcindices=np.int32)) - loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") + loopy_knl = lp.add_dtypes(loopy_knl, + dict(nsources=np.int32, ntargets=np.int32)) + for expn in self.expansions: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) @@ -405,17 +381,17 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): def get_optimized_kernel(self): loopy_knl = self.get_kernel() - loopy_knl = lp.split_iname(loopy_knl, "irange", 128, outer_tag="g.0") + loopy_knl = lp.split_iname(loopy_knl, "imat", 1024, outer_tag="g.0") return loopy_knl def __call__(self, queue, targets, sources, centers, expansion_radii, - tgtindices, srcindices, tgtranges, srcranges, **kwargs): + index_set, **kwargs): knl = self.get_cached_optimized_kernel() + tgtindices, srcindices = index_set.linear_indices() return knl(queue, src=sources, tgt=targets, center=centers, expansion_radii=expansion_radii, - tgtindices=tgtindices, srcindices=srcindices, - tgtranges=tgtranges, srcranges=srcranges, **kwargs) + tgtindices=tgtindices, srcindices=srcindices, **kwargs) # }}} diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py index d6e85c23..a687ee16 100644 --- a/test/test_matrixgen.py +++ b/test/test_matrixgen.py @@ -31,6 +31,8 @@ import pyopencl as cl from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) +from sumpy.tools import MatrixBlockIndex + import logging logger = logging.getLogger(__name__) @@ -82,8 +84,8 @@ def create_index_subset(nnodes, nblks, factor): return indices_, ranges_ - -def test_qbx_direct(ctx_getter): +@pytest.mark.parametrize('factor', [1.0, 0.6]) +def test_qbx_direct(ctx_getter, factor): # This evaluates a single layer potential on a circle. logging.basicConfig(level=logging.INFO) @@ -114,14 +116,8 @@ def test_qbx_direct(ctx_getter): h = 2 * np.pi / n strengths = (sigma * h,) - tgtindices = np.arange(0, n) - tgtindices = np.random.choice(tgtindices, size=int(0.8 * n)) - tgtranges = np.arange(0, tgtindices.shape[0] + 1, - tgtindices.shape[0] // nblks) - srcindices = np.arange(0, n) - srcindices = np.random.choice(srcindices, size=int(0.8 * n)) - srcranges = np.arange(0, srcindices.shape[0] + 1, - srcindices.shape[0] // nblks) + tgtindices, tgtranges = create_index_subset(n, nblks, factor) + srcindices, srcranges = create_index_subset(n, nblks, factor) assert tgtranges.shape == srcranges.shape _, (mat,) = mat_gen(queue, targets, sources, centers, @@ -134,16 +130,14 @@ def test_qbx_direct(ctx_getter): eps = 1.0e-10 * la.norm(result_lpot) assert la.norm(result_mat - result_lpot) < eps + index_set = MatrixBlockIndex(queue, + tgtindices, srcindices, tgtranges, srcranges) _, (blk,) = blk_gen(queue, targets, sources, centers, expansion_radii, - tgtindices, srcindices, tgtranges, srcranges) + index_set) - for i in range(srcranges.shape[0] - 1): - itgt = np.s_[tgtranges[i]:tgtranges[i + 1]] - isrc = np.s_[srcranges[i]:srcranges[i + 1]] - block = np.ix_(tgtindices[itgt], srcindices[isrc]) - - eps = 1.0e-10 * la.norm(mat[block]) - assert la.norm(blk[itgt, isrc] - mat[block]) < eps + rowindices, colindices = index_set.linear_indices() + eps = 1.0e-10 * la.norm(mat) + assert la.norm(blk - mat[rowindices, colindices].reshape(-1)) < eps @pytest.mark.parametrize(("exclude_self", "factor"), @@ -161,7 +155,6 @@ def test_p2p_direct(ctx_getter, exclude_self, factor): mode_nr = 25 from sumpy.p2p import P2P - from sumpy.tools import MatrixBlockIndex from sumpy.p2p import P2PMatrixGenerator, P2PMatrixBlockGenerator lpot = P2P(ctx, [lknl], exclude_self=exclude_self) mat_gen = P2PMatrixGenerator(ctx, [lknl], exclude_self=exclude_self) -- GitLab From c120504edd0e2740b54c0ae1a0ca82663a78b7aa Mon Sep 17 00:00:00 2001 From: Alex Fikl Date: Tue, 17 Apr 2018 21:26:18 -0500 Subject: [PATCH 07/11] qbx: add some docs --- sumpy/qbx.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 73f7ec3d..554771e7 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -386,6 +386,17 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): def __call__(self, queue, targets, sources, centers, expansion_radii, index_set, **kwargs): + """ + :arg targets: target point coordinates. + :arg sources: source point coordinates. + :arg centers: QBX target expansion centers. + :arg expansion_radii: radii for each expansion center. + :arg index_set: a :class:`sumpy.tools.MatrixBlockIndex` object used + to define the various blocks. + :return: a tuple of one-dimensional arrays of kernel evaluations at + target-source pairs described by `index_set`. + """ + knl = self.get_cached_optimized_kernel() tgtindices, srcindices = index_set.linear_indices() -- GitLab From 11fa1a90635f5715ac296e4a361bfcb802fce739 Mon Sep 17 00:00:00 2001 From: Alex Fikl Date: Wed, 18 Apr 2018 16:47:12 -0500 Subject: [PATCH 08/11] tools: remove some globalargs that loopy can infer --- sumpy/tools.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/sumpy/tools.py b/sumpy/tools.py index b891da0e..9d73d1a1 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -347,6 +347,7 @@ class MatrixBlockIndex(object): ], name="block_cumsum_knl", default_offset=lp.auto, + assumptions="nranges>=1", lang_version=MOST_RECENT_LANGUAGE_VERSION) loopy_knl = lp.realize_reduction(loopy_knl, force_scan=True, @@ -397,19 +398,15 @@ class MatrixBlockIndex(object): end """, [ - lp.GlobalArg("srcindices", None), - lp.GlobalArg("tgtindices", None), - lp.GlobalArg("srcranges", None, shape="nranges + 1"), - lp.GlobalArg("tgtranges", None, shape="nranges + 1"), - lp.GlobalArg("blkranges", None, shape="nranges + 1"), + lp.GlobalArg('blkranges', None, shape="nranges + 1"), lp.GlobalArg("rowindices", None, shape="nresults"), lp.GlobalArg("colindices", None, shape="nresults"), lp.ValueArg("nresults", None), - lp.ValueArg("nranges", None), - "..." + '...' ], name="block_index_knl", default_offset=lp.auto, + assumptions='nranges>=1', silenced_warnings="write_race(write_index*)", lang_version=MOST_RECENT_LANGUAGE_VERSION) loopy_knl = lp.split_iname(loopy_knl, "irange", 128, outer_tag="g.0") -- GitLab From a7a171e9f4b268dd8c91dcfe668eb37a7c8bd9aa Mon Sep 17 00:00:00 2001 From: Alex Fikl Date: Wed, 18 Apr 2018 16:48:41 -0500 Subject: [PATCH 09/11] enable doc generation --- sumpy/p2p.py | 9 ++++++--- sumpy/qbx.py | 10 ++++++++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 9503a2a1..f0bba60f 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -294,7 +294,10 @@ class P2PMatrixGenerator(P2PBase): # {{{ P2P matrix block writer class P2PMatrixBlockGenerator(P2PBase): - """Generator for a subset of P2P interaction matrix entries.""" + """Generator for a subset of P2P interaction matrix entries. + + .. automethod:: __call__ + """ default_name = "p2p_block" @@ -381,9 +384,9 @@ class P2PMatrixBlockGenerator(P2PBase): :arg targets: target point coordinates. :arg sources: source point coordinates. :arg index_set: a :class:`sumpy.tools.MatrixBlockIndex` object used - to define the various blocks. + to define the various blocks. :return: a tuple of one-dimensional arrays of kernel evaluations at - target-source pairs described by `index_set`. + target-source pairs described by `index_set`. """ from pytools.obj_array import is_obj_array knl = self.get_cached_optimized_kernel( diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 554771e7..985565f1 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -189,7 +189,10 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper): # {{{ direct applier class LayerPotential(LayerPotentialBase): - """Direct applier for the layer potential.""" + """Direct applier for the layer potential. + + .. automethod:: __call__ + """ default_name = "qbx_apply" @@ -321,7 +324,10 @@ class LayerPotentialMatrixGenerator(LayerPotentialBase): # {{{ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): - """Generator for a subset of the layer potential matrix entries.""" + """Generator for a subset of the layer potential matrix entries. + + .. automethod:: __call__ + """ default_name = "qbx_block" -- GitLab From c654614108fcd644927828815d537224f2f6f1f8 Mon Sep 17 00:00:00 2001 From: Alex Fikl Date: Wed, 18 Apr 2018 16:51:54 -0500 Subject: [PATCH 10/11] add copyright --- sumpy/tools.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sumpy/tools.py b/sumpy/tools.py index 9d73d1a1..4ddfb25f 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -1,6 +1,9 @@ from __future__ import division, absolute_import -__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" +__copyright__ = """ +Copyright (C) 2012 Andreas Kloeckner +Copyright (C) 2018 Alexandru Fikl +""" __license__ = """ Permission is hereby granted, free of charge, to any person obtaining a copy -- GitLab From 992acb2ba693c9c876f3d8ce18bed503dd8696ca Mon Sep 17 00:00:00 2001 From: Alex Fikl Date: Wed, 18 Apr 2018 17:30:51 -0500 Subject: [PATCH 11/11] flake8 fixes --- sumpy/p2p.py | 1 - sumpy/tools.py | 4 +++- test/test_matrixgen.py | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sumpy/p2p.py b/sumpy/p2p.py index f0bba60f..c408360d 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -33,7 +33,6 @@ import loopy as lp from loopy.version import MOST_RECENT_LANGUAGE_VERSION from pymbolic import var -from pytools import memoize_method, memoize_in from sumpy.tools import KernelComputation, KernelCacheWrapper diff --git a/sumpy/tools.py b/sumpy/tools.py index 4ddfb25f..b80deddd 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -297,6 +297,7 @@ class KernelComputation(object): # }}} + # {{{ class MatrixBlockIndex(object): @@ -358,7 +359,7 @@ class MatrixBlockIndex(object): return loopy_knl _, (blkranges,) = cumsum()(self.queue, - tgtranges=self.tgtranges, srcranges=self.srcranges); + tgtranges=self.tgtranges, srcranges=self.srcranges) return blkranges @@ -426,6 +427,7 @@ class MatrixBlockIndex(object): # }}} + # {{{ OrderedSet # Source: http://code.activestate.com/recipes/576694-orderedset/ diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py index a687ee16..892fef63 100644 --- a/test/test_matrixgen.py +++ b/test/test_matrixgen.py @@ -84,6 +84,7 @@ def create_index_subset(nnodes, nblks, factor): return indices_, ranges_ + @pytest.mark.parametrize('factor', [1.0, 0.6]) def test_qbx_direct(ctx_getter, factor): # This evaluates a single layer potential on a circle. -- GitLab