diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 648b2d4a4b32a6e54009069ae8cbf6bbbd670b71..82e767b7337058422f7a893e00b3d1620f20c139 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -155,7 +155,6 @@ class E2EFromCSR(E2EBase): <> tgt_ibox = target_boxes[itgt_box] <> tgt_center[idim] = centers[idim, tgt_ibox] \ - {id=fetch_tgt_center} <> isrc_start = src_box_starts[itgt_box] <> isrc_stop = src_box_starts[itgt_box+1] @@ -164,9 +163,9 @@ class E2EFromCSR(E2EBase): <> src_ibox = src_box_lists[isrc_box] \ {id=read_src_ibox} - <> src_center[idim] = centers[idim, src_ibox] \ - {id=fetch_src_center} - <> d[idim] = tgt_center[idim] - src_center[idim] + <> src_center[idim] = centers[idim, src_ibox] {dup=idim} + <> d[idim] = tgt_center[idim] - src_center[idim] \ + {dup=idim} """] + [""" <> src_coeff{coeffidx} = \ @@ -204,8 +203,7 @@ class E2EFromCSR(E2EBase): for expn in [self.src_expansion, self.tgt_expansion]: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) - loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "id:fetch_tgt_center", - tags={"idim": "unr"}) + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) return loopy_knl @@ -261,7 +259,6 @@ class E2EFromChildren(E2EBase): <> tgt_ibox = target_boxes[itgt_box] <> tgt_center[idim] = centers[idim, tgt_ibox] \ - {id=fetch_tgt_center} for isrc_box <> src_ibox = box_child_ids[isrc_box,tgt_ibox] \ @@ -269,9 +266,9 @@ class E2EFromChildren(E2EBase): <> is_src_box_valid = src_ibox != 0 if is_src_box_valid - <> src_center[idim] = centers[idim, src_ibox] \ - {id=fetch_src_center} - <> d[idim] = tgt_center[idim] - src_center[idim] + <> src_center[idim] = centers[idim, src_ibox] {dup=idim} + <> d[idim] = tgt_center[idim] - src_center[idim] \ + {dup=idim} """] + [""" <> src_coeff{i} = \ @@ -311,9 +308,7 @@ class E2EFromChildren(E2EBase): for expn in [self.src_expansion, self.tgt_expansion]: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) - loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "id:fetch_tgt_center", - tags={"idim": "unr"}) - loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") return loopy_knl @@ -362,14 +357,12 @@ class E2EFromParent(E2EBase): <> tgt_ibox = target_boxes[itgt_box] <> tgt_center[idim] = centers[idim, tgt_ibox] \ - {id=fetch_tgt_center} <> src_ibox = box_parent_ids[tgt_ibox] \ {id=read_src_ibox} - <> src_center[idim] = centers[idim, src_ibox] \ - {id=fetch_src_center} - <> d[idim] = tgt_center[idim] - src_center[idim] + <> src_center[idim] = centers[idim, src_ibox] {dup=idim} + <> d[idim] = tgt_center[idim] - src_center[idim] {dup=idim} """] + [""" <> src_coeff{i} = \ @@ -404,9 +397,7 @@ class E2EFromParent(E2EBase): for expn in [self.src_expansion, self.tgt_expansion]: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) - loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "id:fetch_tgt_center", - tags={"idim": "unr"}) - loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") return loopy_knl diff --git a/sumpy/e2p.py b/sumpy/e2p.py index 52c71c9455e6d510fc5114c1867595207275407f..13dae6bc3ebe46e5b49a62dcb25de77044a4686d 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -148,8 +148,7 @@ class E2PFromSingleBox(E2PBase): """.format(coeffidx=i) for i in range(ncoeffs)] + [""" for itgt - <> b[idim] = targets[idim, itgt] - center[idim] \ - {id=compute_b} + <> b[idim] = targets[idim, itgt] - center[idim] {dup=idim} """] + loopy_insns + [""" @@ -182,8 +181,7 @@ class E2PFromSingleBox(E2PBase): dim=self.dim, nresults=len(result_names)) - loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "id:compute_b", - tags={"idim": "unr"}) + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) return loopy_knl @@ -234,7 +232,7 @@ class E2PFromCSR(E2PBase): <> itgt_end = itgt_start+box_target_counts_nonchild[tgt_ibox] for itgt - <> tgt[idim] = targets[idim,itgt] {id=fetch_tgt} + <> tgt[idim] = targets[idim,itgt] <> isrc_box_start = source_box_starts[itgt_box] <> isrc_box_end = source_box_starts[itgt_box+1] @@ -245,9 +243,8 @@ class E2PFromCSR(E2PBase): <> coeff{coeffidx} = expansions[src_ibox, {coeffidx}] """.format(coeffidx=i) for i in range(ncoeffs)] + [""" - <> center[idim] = centers[idim, src_ibox] \ - {id=fetch_center} - <> b[idim] = tgt[idim] - center[idim] + <> center[idim] = centers[idim, src_ibox] {dup=idim} + <> b[idim] = tgt[idim] - center[idim] {dup=idim} """] + loopy_insns + [""" end @@ -283,10 +280,7 @@ class E2PFromCSR(E2PBase): dim=self.dim, nresults=len(result_names)) - loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "id:fetch_tgt", - tags={"idim": "unr"}) - loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "id:fetch_center", - tags={"idim": "unr"}) + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.set_loop_priority(loopy_knl, "itgt_box,itgt,isrc_box") loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) diff --git a/sumpy/p2e.py b/sumpy/p2e.py index f9a5dc532e5d161b0640fecdd15f3d5436d0246b..c0b3f50df5801d7b202dda65288f634a7f42f3de 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -126,8 +126,7 @@ class P2EFromSingleBox(P2EBase): <> center[idim] = centers[idim, src_ibox] {id=fetch_center} for isrc - <> a[idim] = center[idim] - sources[idim, isrc] \ - {id=compute_a} + <> a[idim] = center[idim] - sources[idim, isrc] {dup=idim} <> strength = strengths[isrc] """] + self.get_loopy_instructions() + [""" @@ -159,9 +158,7 @@ class P2EFromSingleBox(P2EBase): loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) - loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "id:fetch_center", - tags={"idim": "unr"}) - loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") return loopy_knl @@ -238,7 +235,7 @@ class P2EFromCSR(P2EBase): for isrc <> a[idim] = center[idim] - sources[idim, isrc] \ - {id=compute_a} + {dup=idim} <> strength = strengths[isrc] """] + self.get_loopy_instructions() + [""" @@ -259,9 +256,7 @@ class P2EFromCSR(P2EBase): loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) - loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "id:fetch_center", - tags={"idim": "unr"}) - loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") return loopy_knl diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 44ad14b22f59504056abecb3e68c03e8e7576fc3..4dab9e27a0f22fa696db980ad58e8a1ed995dd22 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -128,29 +128,26 @@ class P2P(P2PBase): "{[isrc,itgt,idim]: 0<=itgt d[idim] = targets[idim,itgt] - sources[idim,isrc] \ - {id=compute_d} - """ - ]+[ - lp.Assignment(id=None, - assignee="pair_result_%d" % i, expression=expr, - temp_var_type=lp.auto) - for i, expr in enumerate(exprs) - ] - + ["end"] - + [ - """ + + [""" + for itgt + for isrc + """] + loopy_insns + [""" + <> d[idim] = targets[idim,itgt] - sources[idim,isrc] \ + """]+[ + lp.Assignment(id=None, + assignee="pair_result_%d" % i, expression=expr, + temp_var_type=lp.auto) + for i, expr in enumerate(exprs) + ] + [""" + end + """] + [""" result[KNLIDX, itgt] = knl_KNLIDX_scaling \ * simul_reduce(sum, isrc, pair_result_KNLIDX) """.replace("KNLIDX", str(iknl)) - for iknl in range(len(exprs)) - ] - + ["end"], + for iknl in range(len(exprs))] + [ + ] + [""" + end + """], [ lp.GlobalArg("sources", None, shape=(self.dim, "nsources")), @@ -171,9 +168,7 @@ class P2P(P2PBase): nstrengths=self.strength_count, nresults=len(self.kernels)) - for where in ["compute_d"]: - loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "id:"+where, - tags=dict(idim="unr")) + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for knl in self.kernels: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) @@ -254,7 +249,7 @@ class P2PFromCSR(P2PBase): for isrc <> d[idim] = \ targets[idim,itgt] - sources[idim,isrc] \ - {id=compute_d} + {dup=idim} """ ] + loopy_insns + [ lp.Assignment(id=None, @@ -299,9 +294,7 @@ class P2PFromCSR(P2PBase): nstrengths=self.strength_count, nkernels=len(self.kernels)) - loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "id:compute_d", - tags=dict(idim="unr")) - + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.tag_array_axes(loopy_knl, "strength", "sep,C") for knl in self.kernels: diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 89e4848e1b4a3dfd710045cf5b92c8ff1557acc7..73234b65cf5c6c28851596d959c2c73cfbd9ac64 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -95,8 +95,8 @@ class LayerPotentialBase(KernelComputation): def get_compute_a_and_b_vecs(self): return """ - <> a[idim] = center[idim,itgt] - src[idim,isrc] {id=compute_a} - <> b[idim] = tgt[idim,itgt] - center[idim,itgt] {id=compute_b} + <> a[idim] = center[idim,itgt] - src[idim,isrc] {dup=idim} + <> b[idim] = tgt[idim,itgt] - center[idim,itgt] {dup=idim} """ def get_src_tgt_arguments(self): @@ -179,9 +179,7 @@ class LayerPotentialBase(KernelComputation): loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) - for where in ["compute_a", "compute_b"]: - loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "id:"+where, - tags={"idim": "unr"}) + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for expn in self.expansions: loopy_knl = expn.prepare_loopy_kernel(loopy_knl)