From 2df8cd3bffb71b32d8f85aa4915ac142a1276719 Mon Sep 17 00:00:00 2001 From: Alex Fikl Date: Sun, 18 Feb 2018 17:09:43 -0600 Subject: [PATCH 1/4] add block matrix generators --- sumpy/p2p.py | 121 ++++++++++++++++++++++++++++++++++++++--- sumpy/qbx.py | 113 +++++++++++++++++++++++++++++++++++--- test/test_matrixgen.py | 59 ++++++++++++++++++-- 3 files changed, 272 insertions(+), 21 deletions(-) diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 8c37b0e0..b76e7890 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -120,10 +120,26 @@ class SingleSrcTgtListP2PBase(P2PComputationBase): shape=(self.dim, "nsources")), lp.GlobalArg("targets", None, shape=(self.dim, "ntargets")), - lp.ValueArg("nsources", None), - lp.ValueArg("ntargets", None) + lp.ValueArg("nsources", np.int32), + lp.ValueArg("ntargets", np.int32) + ] + + def get_domains(self): + return [ + "{[itgt]: 0 <= itgt < ntargets}", + "{[isrc]: 0 <= isrc < nsources}", + "{[idim]: 0 <= idim < dim}" ] + def get_loop_begin(self): + return ["for itgt, isrc"] + + def get_loop_end(self): + return ["end"] + + def get_assumptions(self): + return "nsources>=1 and ntargets>=1" + def get_kernel(self): loopy_insns, result_names = self.get_loopy_insns_and_result_names() @@ -146,25 +162,24 @@ class SingleSrcTgtListP2PBase(P2PComputationBase): + gather_loopy_source_arguments(self.kernels)) loopy_knl = lp.make_kernel( - "{[isrc,itgt,idim]: 0<=itgt d[idim] = targets[idim,itgt] - sources[idim,isrc]"] + ["<> is_self = (isrc == target_to_source[itgt])" if self.exclude_self else ""] + + loopy_insns + [ lp.Assignment(id=None, assignee="pair_result_%d" % i, expression=expr, temp_var_type=lp.auto) for i, expr in enumerate(exprs) ] - + ["end"] + + self.get_loop_end() + self.get_result_store_instructions(), arguments, + assumptions=self.get_assumptions(), name=self.name, - assumptions="nsources>=1 and ntargets>=1", fixed_parameters=dict( dim=self.dim, nstrengths=self.strength_count, @@ -186,7 +201,8 @@ class SingleSrcTgtListP2PBase(P2PComputationBase): if targets_is_obj_array: knl = lp.tag_array_axes(knl, "targets", "sep,C") - knl = lp.split_iname(knl, "itgt", 1024, outer_tag="g.0") + # FIXME: how to split when using blocks? + # knl = lp.split_iname(knl, "itgt", 1024, outer_tag="g.0") return knl @@ -266,6 +282,93 @@ class P2PMatrixGenerator(SingleSrcTgtListP2PBase): # }}} +# {{{ + +class P2PMatrixBlockGenerator(SingleSrcTgtListP2PBase): + default_name = "p2p_block" + + def get_src_tgt_arguments(self): + return super(P2PMatrixBlockGenerator, self).get_src_tgt_arguments() \ + + [ + lp.GlobalArg("srcindices", None, shape="nsrcindices"), + lp.GlobalArg("tgtindices", None, shape="ntgtindices"), + lp.GlobalArg("srcranges", None, shape="nranges"), + lp.GlobalArg("tgtranges", None, shape="nranges"), + lp.ValueArg("nsrcindices", np.int32), + lp.ValueArg("ntgtindices", np.int32), + lp.ValueArg("nranges", None) + ] + + def get_domains(self): + # FIXME: this doesn't work when separating j and k + return [ + "{[i]: 0 <= i < nranges - 1}", + "{[j, k]: 0 <= j < tgt_length and 0 <= k < src_length}", + "{[idim]: 0 <= idim < dim}" + ] + + def get_loop_begin(self): + return [ + """ + for i + <> tgtstart = tgtranges[i] + <> tgtend = tgtranges[i + 1] + <> tgt_length = tgtend - tgtstart + <> srcstart = srcranges[i] + <> srcend = srcranges[i + 1] + <> src_length = srcend - srcstart + for j, k + <> itgt = tgtindices[tgtstart + j] + <> isrc = srcindices[srcstart + k] + """ + ] + + def get_loop_end(self): + return [ + """ + end + end + """ + ] + + def get_strength_or_not(self, isrc, kernel_idx): + return 1 + + def get_input_and_output_arguments(self): + return [ + lp.GlobalArg("result_%d" % i, dtype, shape="ntgtindices,nsrcindices") + for i, dtype in enumerate(self.value_dtypes) + ] + + def get_result_store_instructions(self): + # FIXME: doesn't work without inames=i. check how the loops are nested! + return [ + """ + result_{i}[tgtstart + j, srcstart + k] = \ + knl_{i}_scaling * pair_result_{i} {{inames=i}} + """.format(i=iknl) + for iknl in range(len(self.kernels)) + ] + + def get_assumptions(self): + return "nranges>=2" + + def __call__(self, queue, targets, sources, tgtindices, srcindices, + tgtranges, srcranges, **kwargs): + from pytools.obj_array import is_obj_array + knl = self.get_optimized_kernel( + targets_is_obj_array=( + is_obj_array(targets) or isinstance(targets, (tuple, list))), + sources_is_obj_array=( + is_obj_array(sources) or isinstance(sources, (tuple, list)))) + + return knl(queue, targets=targets, sources=sources, + tgtindices=tgtindices, srcindices=srcindices, + tgtranges=tgtranges, srcranges=srcranges, **kwargs) + +# }}} + + # {{{ P2P from CSR-like interaction list class P2PFromCSR(P2PComputationBase): diff --git a/sumpy/qbx.py b/sumpy/qbx.py index cde20ef2..64ea5578 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -116,10 +116,26 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper): lp.GlobalArg("center", None, shape=(self.dim, "ntargets"), order="C"), lp.GlobalArg("expansion_radii", None, shape="ntargets"), - lp.ValueArg("nsources", None), - lp.ValueArg("ntargets", None), + lp.ValueArg("nsources", np.int32), + lp.ValueArg("ntargets", np.int32), ] + def get_domains(self): + return [ + "{[itgt]: 0 <= itgt < ntargets}", + "{[isrc]: 0 <= isrc < nsources}", + "{[idim]: 0 <= idim < dim}" + ] + + def get_loop_begin(self): + return ["for itgt, isrc"] + + def get_loop_end(self): + return ["end"] + + def get_assumptions(self): + return "nsources>=1 and ntargets>=1" + @memoize_method def get_kernel(self): from sumpy.symbolic import make_sym_vector @@ -162,10 +178,9 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper): + gather_loopy_source_arguments(self.kernels)) loopy_knl = lp.make_kernel( - "{[isrc,itgt,idim]: 0<=itgt tgtstart = tgtranges[i] + <> tgtend = tgtranges[i + 1] + <> tgt_length = tgtend - tgtstart + <> srcstart = srcranges[i] + <> srcend = srcranges[i + 1] + <> src_length = srcend - srcstart + for j, k + <> itgt = tgtindices[tgtstart + j] + <> isrc = srcindices[srcstart + k] + """ + ] + + def get_loop_end(self): + return [ + """ + end + end + """ + ] + + def get_strength_or_not(self, isrc, kernel_idx): + return 1 + + def get_input_and_output_arguments(self): + return [ + lp.GlobalArg("result_%d" % i, dtype, shape="ntgtindices,nsrcindices") + for i, dtype in enumerate(self.value_dtypes) + ] + + def get_result_store_instructions(self): + return [ + """ + result_KNLIDX[tgtstart + j, srcstart + k] = \ + knl_KNLIDX_scaling*pair_result_KNLIDX {inames=i} + """.replace("KNLIDX", str(iknl)) + for iknl in range(len(self.expansions)) + ] + + def get_assumptions(self): + return "nranges>=2" + + def __call__(self, queue, targets, sources, centers, expansion_radii, + tgtindices, srcindices, tgtranges, srcranges, **kwargs): + knl = self.get_optimized_kernel() + + return knl(queue, src=sources, tgt=targets, center=centers, + expansion_radii=expansion_radii, + tgtindices=tgtindices, srcindices=srcindices, + tgtranges=tgtranges, srcranges=srcranges, **kwargs) + +# }}} + # }}} diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py index 244a555b..ed5cbae7 100644 --- a/test/test_matrixgen.py +++ b/test/test_matrixgen.py @@ -73,15 +73,19 @@ def test_qbx_direct(ctx_getter): from sumpy.kernel import LaplaceKernel lknl = LaplaceKernel(2) + nblks = 10 order = 12 mode_nr = 25 from sumpy.qbx import LayerPotential from sumpy.qbx import LayerPotentialMatrixGenerator + from sumpy.qbx import LayerPotentialMatrixBlockGenerator from sumpy.expansion.local import LineTaylorLocalExpansion + lpot = LayerPotential(ctx, [LineTaylorLocalExpansion(lknl, order)]) mat_gen = LayerPotentialMatrixGenerator(ctx, [LineTaylorLocalExpansion(lknl, order)]) - lpot = LayerPotential(ctx, [LineTaylorLocalExpansion(lknl, order)]) + blk_gen = LayerPotentialMatrixBlockGenerator(ctx, + [LineTaylorLocalExpansion(lknl, order)]) for n in [200, 300, 400]: targets, sources, centers, sigma, expansion_radii = \ @@ -90,16 +94,37 @@ def test_qbx_direct(ctx_getter): h = 2 * np.pi / n strengths = (sigma * h,) + tgtindices = np.arange(0, n) + tgtindices = np.random.choice(tgtindices, size=int(0.8 * n)) + tgtranges = np.arange(0, tgtindices.shape[0] + 1, + tgtindices.shape[0] // nblks) + srcindices = np.arange(0, n) + srcindices = np.random.choice(srcindices, size=int(0.8 * n)) + srcranges = np.arange(0, srcindices.shape[0] + 1, + srcindices.shape[0] // nblks) + assert tgtranges.shape == srcranges.shape + _, (mat,) = mat_gen(queue, targets, sources, centers, - expansion_radii=expansion_radii) + expansion_radii) result_mat = mat.dot(strengths[0]) _, (result_lpot,) = lpot(queue, targets, sources, centers, strengths, - expansion_radii=expansion_radii) + expansion_radii) eps = 1.0e-10 * la.norm(result_lpot) assert la.norm(result_mat - result_lpot) < eps + _, (blk,) = blk_gen(queue, targets, sources, centers, expansion_radii, + tgtindices, srcindices, tgtranges, srcranges) + + for i in range(srcranges.shape[0] - 1): + itgt = np.s_[tgtranges[i]:tgtranges[i + 1]] + isrc = np.s_[srcranges[i]:srcranges[i + 1]] + block = np.ix_(tgtindices[itgt], srcindices[isrc]) + + eps = 1.0e-10 * la.norm(mat[block]) + assert la.norm(blk[itgt, isrc] - mat[block]) < eps + @pytest.mark.parametrize("exclude_self", [True, False]) def test_p2p_direct(ctx_getter, exclude_self): @@ -112,12 +137,14 @@ def test_p2p_direct(ctx_getter, exclude_self): from sumpy.kernel import LaplaceKernel lknl = LaplaceKernel(2) + nblks = 10 mode_nr = 25 from sumpy.p2p import P2P - from sumpy.p2p import P2PMatrixGenerator - mat_gen = P2PMatrixGenerator(ctx, [lknl], exclude_self=exclude_self) + from sumpy.p2p import P2PMatrixGenerator, P2PMatrixBlockGenerator lpot = P2P(ctx, [lknl], exclude_self=exclude_self) + mat_gen = P2PMatrixGenerator(ctx, [lknl], exclude_self=exclude_self) + blk_gen = P2PMatrixBlockGenerator(ctx, [lknl], exclude_self=exclude_self) for n in [200, 300, 400]: targets, sources, _, sigma, _ = \ @@ -126,6 +153,16 @@ def test_p2p_direct(ctx_getter, exclude_self): h = 2 * np.pi / n strengths = (sigma * h,) + tgtindices = np.arange(0, n) + tgtindices = np.random.choice(tgtindices, size=int(0.8 * n)) + tgtranges = np.arange(0, tgtindices.shape[0] + 1, + tgtindices.shape[0] // nblks) + srcindices = np.arange(0, n) + srcindices = np.random.choice(srcindices, size=int(0.8 * n)) + srcranges = np.arange(0, srcindices.shape[0] + 1, + srcindices.shape[0] // nblks) + assert tgtranges.shape == srcranges.shape + extra_kwargs = {} if exclude_self: extra_kwargs["target_to_source"] = np.arange(n, dtype=np.int32) @@ -139,6 +176,18 @@ def test_p2p_direct(ctx_getter, exclude_self): eps = 1.0e-10 * la.norm(result_lpot) assert la.norm(result_mat - result_lpot) < eps + _, (blk,) = blk_gen(queue, targets, sources, + tgtindices, srcindices, tgtranges, srcranges, + **extra_kwargs) + + for i in range(srcranges.shape[0] - 1): + itgt = np.s_[tgtranges[i]:tgtranges[i + 1]] + isrc = np.s_[srcranges[i]:srcranges[i + 1]] + block = np.ix_(tgtindices[itgt], srcindices[isrc]) + + eps = 1.0e-10 * la.norm(mat[block]) + assert la.norm(blk[itgt, isrc] - mat[block]) < eps + # You can test individual routines by typing # $ python test_kernels.py 'test_p2p(cl.create_some_context)' -- GitLab From c75253c1420d622a0479eeef4c9a8462bbf53d5b Mon Sep 17 00:00:00 2001 From: Alex Fikl Date: Wed, 21 Feb 2018 16:55:45 -0600 Subject: [PATCH 2/4] rename block iteration index --- sumpy/p2p.py | 34 ++++++++++++++++++++++------------ sumpy/qbx.py | 23 +++++++++++++++-------- 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/sumpy/p2p.py b/sumpy/p2p.py index b76e7890..e7b47d1e 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -201,8 +201,7 @@ class SingleSrcTgtListP2PBase(P2PComputationBase): if targets_is_obj_array: knl = lp.tag_array_axes(knl, "targets", "sep,C") - # FIXME: how to split when using blocks? - # knl = lp.split_iname(knl, "itgt", 1024, outer_tag="g.0") + knl = lp.split_iname(knl, "itgt", 1024, outer_tag="g.0") return knl @@ -300,9 +299,8 @@ class P2PMatrixBlockGenerator(SingleSrcTgtListP2PBase): ] def get_domains(self): - # FIXME: this doesn't work when separating j and k return [ - "{[i]: 0 <= i < nranges - 1}", + "{[irange]: 0 <= irange < nranges - 1}", "{[j, k]: 0 <= j < tgt_length and 0 <= k < src_length}", "{[idim]: 0 <= idim < dim}" ] @@ -310,12 +308,12 @@ class P2PMatrixBlockGenerator(SingleSrcTgtListP2PBase): def get_loop_begin(self): return [ """ - for i - <> tgtstart = tgtranges[i] - <> tgtend = tgtranges[i + 1] + for irange + <> tgtstart = tgtranges[irange] + <> tgtend = tgtranges[irange + 1] <> tgt_length = tgtend - tgtstart - <> srcstart = srcranges[i] - <> srcend = srcranges[i + 1] + <> srcstart = srcranges[irange] + <> srcend = srcranges[irange + 1] <> src_length = srcend - srcstart for j, k <> itgt = tgtindices[tgtstart + j] @@ -341,11 +339,10 @@ class P2PMatrixBlockGenerator(SingleSrcTgtListP2PBase): ] def get_result_store_instructions(self): - # FIXME: doesn't work without inames=i. check how the loops are nested! return [ """ result_{i}[tgtstart + j, srcstart + k] = \ - knl_{i}_scaling * pair_result_{i} {{inames=i}} + knl_{i}_scaling * pair_result_{i} {{inames=irange}} """.format(i=iknl) for iknl in range(len(self.kernels)) ] @@ -353,14 +350,27 @@ class P2PMatrixBlockGenerator(SingleSrcTgtListP2PBase): def get_assumptions(self): return "nranges>=2" + def get_optimized_kernel(self, targets_is_obj_array, sources_is_obj_array): + # FIXME + knl = self.get_kernel() + + if sources_is_obj_array: + knl = lp.tag_array_axes(knl, "sources", "sep,C") + if targets_is_obj_array: + knl = lp.tag_array_axes(knl, "targets", "sep,C") + + knl = lp.split_iname(knl, "irange", 128, outer_tag="g.0") + return knl + def __call__(self, queue, targets, sources, tgtindices, srcindices, tgtranges, srcranges, **kwargs): from pytools.obj_array import is_obj_array - knl = self.get_optimized_kernel( + knl = self.get_cached_optimized_kernel( targets_is_obj_array=( is_obj_array(targets) or isinstance(targets, (tuple, list))), sources_is_obj_array=( is_obj_array(sources) or isinstance(sources, (tuple, list)))) + print(knl) return knl(queue, targets=targets, sources=sources, tgtindices=tgtindices, srcindices=srcindices, diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 64ea5578..52db31f0 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -212,7 +212,6 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper): def get_optimized_kernel(self): # FIXME specialize/tune for GPU/CPU loopy_knl = self.get_kernel() - return loopy_knl # FIXME: how to tune for blocks? import pyopencl as cl @@ -335,7 +334,7 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): def get_domains(self): # FIXME: this doesn't work when separating j and k return [ - "{[i]: 0 <= i < nranges - 1}", + "{[irange]: 0 <= irange < nranges - 1}", "{[j, k]: 0 <= j < tgt_length and 0 <= k < src_length}", "{[idim]: 0 <= idim < dim}" ] @@ -343,12 +342,12 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): def get_loop_begin(self): return [ """ - for i - <> tgtstart = tgtranges[i] - <> tgtend = tgtranges[i + 1] + for irange + <> tgtstart = tgtranges[irange] + <> tgtend = tgtranges[irange + 1] <> tgt_length = tgtend - tgtstart - <> srcstart = srcranges[i] - <> srcend = srcranges[i + 1] + <> srcstart = srcranges[irange] + <> srcend = srcranges[irange + 1] <> src_length = srcend - srcstart for j, k <> itgt = tgtindices[tgtstart + j] @@ -377,7 +376,7 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): return [ """ result_KNLIDX[tgtstart + j, srcstart + k] = \ - knl_KNLIDX_scaling*pair_result_KNLIDX {inames=i} + knl_KNLIDX_scaling*pair_result_KNLIDX {inames=irange} """.replace("KNLIDX", str(iknl)) for iknl in range(len(self.expansions)) ] @@ -385,6 +384,14 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): def get_assumptions(self): return "nranges>=2" + @memoize_method + def get_optimized_kernel(self): + # FIXME + loopy_knl = self.get_kernel() + + loopy_knl = lp.split_iname(loopy_knl, "irange", 128, outer_tag="g.0") + return loopy_knl + def __call__(self, queue, targets, sources, centers, expansion_radii, tgtindices, srcindices, tgtranges, srcranges, **kwargs): knl = self.get_optimized_kernel() -- GitLab From bc92b89d2ffe4c67d88700abedcefc6e595ae8d2 Mon Sep 17 00:00:00 2001 From: Alex Fikl Date: Thu, 22 Feb 2018 00:08:32 -0600 Subject: [PATCH 3/4] bump kernel version --- sumpy/p2p.py | 1 - sumpy/qbx.py | 5 ++++- sumpy/version.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sumpy/p2p.py b/sumpy/p2p.py index e7b47d1e..422a3c92 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -370,7 +370,6 @@ class P2PMatrixBlockGenerator(SingleSrcTgtListP2PBase): is_obj_array(targets) or isinstance(targets, (tuple, list))), sources_is_obj_array=( is_obj_array(sources) or isinstance(sources, (tuple, list)))) - print(knl) return knl(queue, targets=targets, sources=sources, tgtindices=tgtindices, srcindices=srcindices, diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 52db31f0..50d41464 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -1,6 +1,9 @@ from __future__ import division, absolute_import -__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" +__copyright__ = """ +Copyright (C) 2012 Andreas Kloeckner +Copyright (C) 2018 Alexandru Fikl +""" __license__ = """ Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/sumpy/version.py b/sumpy/version.py index 98f05a2d..7edeb71f 100644 --- a/sumpy/version.py +++ b/sumpy/version.py @@ -29,4 +29,4 @@ VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS # branch name, so as to avoid conflicts with the master branch. Make sure # to reset this to the next number up with "master" before merging into # master. -KERNEL_VERSION = ("master", 28) +KERNEL_VERSION = ("master", 29) -- GitLab From 2a91c9a85f239dfd937b8e239275377686097415 Mon Sep 17 00:00:00 2001 From: Alex Fikl Date: Tue, 27 Feb 2018 08:06:51 -0600 Subject: [PATCH 4/4] make nranges be equal to the number of blocks --- sumpy/p2p.py | 8 ++++---- sumpy/qbx.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 422a3c92..d8586cc2 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -291,8 +291,8 @@ class P2PMatrixBlockGenerator(SingleSrcTgtListP2PBase): + [ lp.GlobalArg("srcindices", None, shape="nsrcindices"), lp.GlobalArg("tgtindices", None, shape="ntgtindices"), - lp.GlobalArg("srcranges", None, shape="nranges"), - lp.GlobalArg("tgtranges", None, shape="nranges"), + lp.GlobalArg("srcranges", None, shape="nranges + 1"), + lp.GlobalArg("tgtranges", None, shape="nranges + 1"), lp.ValueArg("nsrcindices", np.int32), lp.ValueArg("ntgtindices", np.int32), lp.ValueArg("nranges", None) @@ -300,7 +300,7 @@ class P2PMatrixBlockGenerator(SingleSrcTgtListP2PBase): def get_domains(self): return [ - "{[irange]: 0 <= irange < nranges - 1}", + "{[irange]: 0 <= irange < nranges}", "{[j, k]: 0 <= j < tgt_length and 0 <= k < src_length}", "{[idim]: 0 <= idim < dim}" ] @@ -348,7 +348,7 @@ class P2PMatrixBlockGenerator(SingleSrcTgtListP2PBase): ] def get_assumptions(self): - return "nranges>=2" + return "nranges>=1" def get_optimized_kernel(self, targets_is_obj_array, sources_is_obj_array): # FIXME diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 50d41464..0f617e62 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -327,8 +327,8 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): + [ lp.GlobalArg("srcindices", None, shape="nsrcindices"), lp.GlobalArg("tgtindices", None, shape="ntgtindices"), - lp.GlobalArg("srcranges", None, shape="nranges"), - lp.GlobalArg("tgtranges", None, shape="nranges"), + lp.GlobalArg("srcranges", None, shape="nranges + 1"), + lp.GlobalArg("tgtranges", None, shape="nranges + 1"), lp.ValueArg("nsrcindices", np.int32), lp.ValueArg("ntgtindices", np.int32), lp.ValueArg("nranges", None) @@ -337,7 +337,7 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): def get_domains(self): # FIXME: this doesn't work when separating j and k return [ - "{[irange]: 0 <= irange < nranges - 1}", + "{[irange]: 0 <= irange < nranges}", "{[j, k]: 0 <= j < tgt_length and 0 <= k < src_length}", "{[idim]: 0 <= idim < dim}" ] @@ -385,7 +385,7 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): ] def get_assumptions(self): - return "nranges>=2" + return "nranges>=1" @memoize_method def get_optimized_kernel(self): -- GitLab