diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 422a3c92330960aee0aed8c9cf2b5b16654a2bb8..d8586cc228e47d6aa9f12d7c7c436726ad28bc2e 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -291,8 +291,8 @@ class P2PMatrixBlockGenerator(SingleSrcTgtListP2PBase): + [ lp.GlobalArg("srcindices", None, shape="nsrcindices"), lp.GlobalArg("tgtindices", None, shape="ntgtindices"), - lp.GlobalArg("srcranges", None, shape="nranges"), - lp.GlobalArg("tgtranges", None, shape="nranges"), + lp.GlobalArg("srcranges", None, shape="nranges + 1"), + lp.GlobalArg("tgtranges", None, shape="nranges + 1"), lp.ValueArg("nsrcindices", np.int32), lp.ValueArg("ntgtindices", np.int32), lp.ValueArg("nranges", None) @@ -300,7 +300,7 @@ class P2PMatrixBlockGenerator(SingleSrcTgtListP2PBase): def get_domains(self): return [ - "{[irange]: 0 <= irange < nranges - 1}", + "{[irange]: 0 <= irange < nranges}", "{[j, k]: 0 <= j < tgt_length and 0 <= k < src_length}", "{[idim]: 0 <= idim < dim}" ] @@ -348,7 +348,7 @@ class P2PMatrixBlockGenerator(SingleSrcTgtListP2PBase): ] def get_assumptions(self): - return "nranges>=2" + return "nranges>=1" def get_optimized_kernel(self, targets_is_obj_array, sources_is_obj_array): # FIXME diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 50d4146484c051041e1382d1c8b833aa67bbea26..0f617e62990c4aad8f187b03d22e2ac9c72b46fd 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -327,8 +327,8 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): + [ lp.GlobalArg("srcindices", None, shape="nsrcindices"), lp.GlobalArg("tgtindices", None, shape="ntgtindices"), - lp.GlobalArg("srcranges", None, shape="nranges"), - lp.GlobalArg("tgtranges", None, shape="nranges"), + lp.GlobalArg("srcranges", None, shape="nranges + 1"), + lp.GlobalArg("tgtranges", None, shape="nranges + 1"), lp.ValueArg("nsrcindices", np.int32), lp.ValueArg("ntgtindices", np.int32), lp.ValueArg("nranges", None) @@ -337,7 +337,7 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): def get_domains(self): # FIXME: this doesn't work when separating j and k return [ - "{[irange]: 0 <= irange < nranges - 1}", + "{[irange]: 0 <= irange < nranges}", "{[j, k]: 0 <= j < tgt_length and 0 <= k < src_length}", "{[idim]: 0 <= idim < dim}" ] @@ -385,7 +385,7 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): ] def get_assumptions(self): - return "nranges>=2" + return "nranges>=1" @memoize_method def get_optimized_kernel(self):