diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 2f8125b4232df353ceebd8a5a0cb7a5504e27203..b96dd5d2f6a566425444ed50e3954cf05f2c63fa 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -505,7 +505,7 @@ class P2PFromCSR(P2PBase): <> itgt_offset = itgt_offset_outer * nsplit + inner <> itgt = itgt_offset + itgt_start <> cond_itgt = itgt < itgt_end - <> acc[iknl] = 0 {id=init_acc, dup=iknl} + <> acc[iknl] = 0 {id=init_acc} if cond_itgt tgt_center[idim] = targets[idim, itgt] {id=prefetch_tgt,dup=idim} end @@ -531,7 +531,10 @@ class P2PFromCSR(P2PBase): """] + [""" <> is_self = (isrc == target_to_source[itgt]) """ if self.exclude_self else ""] - + [f"<> strength_{i} = local_isrc_strength[{i}, isrc - isrc_start]" for + + [f""" + <> strength_{i} = local_isrc_strength[{i}, isrc - isrc_start] \ + {{dep=prefetch_charge}} + """ for i in set(self.strength_usage)] + loopy_insns + [f""" @@ -547,7 +550,7 @@ class P2PFromCSR(P2PBase): + [f""" if cond_itgt result[{iknl}, itgt] = knl_{iknl}_scaling * acc[{iknl}] \ - {{dep=update_acc_{iknl}}} + {{id=write_csr,dep=update_acc_{iknl}}} end """ for iknl in range(len(self.target_kernels))] + [""" @@ -566,7 +569,7 @@ class P2PFromCSR(P2PBase): <> isrc_box_end = source_box_starts[itgt_box+1] for itgt - <> acc[iknl] = 0 {id=init_acc, dup=iknl} + <> acc[iknl] = 0 {id=init_acc} tgt_center[idim] = targets[idim, itgt] {id=prefetch_tgt,dup=idim} for isrc_box <> src_ibox = source_box_lists[isrc_box] {id=src_box_insn_0} @@ -593,7 +596,7 @@ class P2PFromCSR(P2PBase): """] + [f""" result[{iknl}, itgt] = knl_{iknl}_scaling * acc[{iknl}] \ - {{dep=update_acc_{iknl}}} + {{id_prefix=write_csr,dep=update_acc_{iknl}}} """ for iknl in range(len(self.target_kernels))] + [""" end @@ -606,7 +609,8 @@ class P2PFromCSR(P2PBase): arguments, assumptions="ntgt_boxes>=1", name=self.name, - silenced_warnings="write_race(write_csr*)", + silenced_warnings=["write_race(write_csr*)", "write_race(prefetch_src)", + "write_race(prefetch_charge)"], fixed_parameters=dict( dim=self.dim, nstrengths=self.strength_count, @@ -641,7 +645,7 @@ class P2PFromCSR(P2PBase): knl = lp.set_temporary_address_space(knl, ["local_isrc", "local_isrc_strength"], lp.AddressSpace.LOCAL) knl = lp.add_inames_for_unused_hw_axes(knl) - knl = lp.set_options(knl, write_code=True) + # knl = lp.set_options(knl, write_code=True) knl = self._allow_redundant_execution_of_knl_scaling(knl) knl = lp.set_options(knl,