diff --git a/pytential/qbx/geometry.py b/pytential/qbx/geometry.py index ca53df5a366e0abe51afc111abe13cde2ed9ace1..7cc8e8ea739f38e20aa154af2891e7caa676ad07 100644 --- a/pytential/qbx/geometry.py +++ b/pytential/qbx/geometry.py @@ -30,6 +30,7 @@ import pyopencl.array # noqa from pytools import memoize_method from boxtree.tools import DeviceDataRecord import loopy as lp +from loopy.version import MOST_RECENT_LANGUAGE_VERSION from cgen import Enum @@ -125,7 +126,8 @@ class QBXFMMGeometryCodeGetter(TreeCodeContainerMixin): """ targets[dim, i] = points[dim, i] """, - default_offset=lp.auto, name="copy_targets") + default_offset=lp.auto, name="copy_targets", + lang_version=MOST_RECENT_LANGUAGE_VERSION) knl = lp.fix_parameters(knl, ndims=self.ambient_dim) @@ -182,7 +184,8 @@ class QBXFMMGeometryCodeGetter(TreeCodeContainerMixin): "..." ], name="qbx_center_to_target_box_lookup", - silenced_warnings="write_race(tgt_write)") + silenced_warnings="write_race(tgt_write)", + lang_version=MOST_RECENT_LANGUAGE_VERSION) knl = lp.split_iname(knl, "ibox", 128, inner_tag="l.0", outer_tag="g.0") @@ -244,7 +247,8 @@ class QBXFMMGeometryCodeGetter(TreeCodeContainerMixin): lp.ValueArg("ntargets", np.int32), ], name="pick_used_centers", - silenced_warnings="write_race(center_is_used_write)") + silenced_warnings="write_race(center_is_used_write)", + lang_version=MOST_RECENT_LANGUAGE_VERSION) knl = lp.split_iname(knl, "i", 128, inner_tag="l.0", outer_tag="g.0") return knl diff --git a/pytential/qbx/interactions.py b/pytential/qbx/interactions.py index 6105472db63a1aad41256798880ecef91e852c49..470151f3018099a5ac336c09a4ccec52a43a2539 100644 --- a/pytential/qbx/interactions.py +++ b/pytential/qbx/interactions.py @@ -24,6 +24,7 @@ THE SOFTWARE. import numpy as np import loopy as lp +from loopy.version import MOST_RECENT_LANGUAGE_VERSION from pytools import memoize_method from six.moves import range @@ -32,7 +33,7 @@ from sumpy.e2e import E2EBase from sumpy.e2p import E2PBase -PYTENTIAL_KERNEL_VERSION = 5 +PYTENTIAL_KERNEL_VERSION = 7 # {{{ form qbx expansions from points @@ -105,15 +106,19 @@ class P2QBXLFromCSR(P2EBase): """] + [""" qbx_expansions[tgt_icenter, {i}] = \ simul_reduce(sum, (isrc_box, isrc), strength*coeff{i}) \ - {{id_prefix=write_expn}} - """.format(i=i) for i in range(ncoeffs)] + [""" + {{id_prefix=write_expn{nosync}}} + """.format(i=i, + nosync=",nosync=write_expn*" + if ncoeffs > 1 else "") + for i in range(ncoeffs)] + [""" end """], arguments, name=self.name, assumptions="ntgt_centers>=1", silenced_warnings="write_race(write_expn*)", - fixed_parameters=dict(dim=self.dim)) + fixed_parameters=dict(dim=self.dim), + lang_version=MOST_RECENT_LANGUAGE_VERSION) loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") @@ -186,8 +191,11 @@ class M2QBXL(E2EBase): """] + [""" qbx_expansions[icenter, {i}] = qbx_expansions[icenter, {i}] + \ simul_reduce(sum, isrc_box, coeff{i}) \ - {{id_prefix=write_expn}} - """.format(i=i) for i in range(ncoeff_tgt)] + [""" + {{id_prefix=write_expn{nosync}}} + """.format(i=i, + nosync=",nosync=write_expn*" + if ncoeff_tgt > 1 else "") + for i in range(ncoeff_tgt)] + [""" end """], @@ -209,7 +217,8 @@ class M2QBXL(E2EBase): ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, assumptions="ncenters>=1", silenced_warnings="write_race(write_expn*)", - fixed_parameters=dict(dim=self.dim)) + fixed_parameters=dict(dim=self.dim), + lang_version=MOST_RECENT_LANGUAGE_VERSION) for expn in [self.src_expansion, self.tgt_expansion]: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) @@ -288,8 +297,11 @@ class L2QBXL(E2EBase): ] + self.get_translation_loopy_insns() + [""" qbx_expansions[icenter, {i}] = \ qbx_expansions[icenter, {i}] + coeff{i} \ - {{id_prefix=write_expn}} - """.format(i=i) for i in range(ncoeff_tgt)] + [""" + {{id_prefix=write_expn{nosync}}} + """.format(i=i, + nosync=",nosync=write_expn*" + if ncoeff_tgt > 1 else "") + for i in range(ncoeff_tgt)] + [""" end end """], @@ -309,7 +321,8 @@ class L2QBXL(E2EBase): name=self.name, assumptions="ncenters>=1", silenced_warnings="write_race(write_expn*)", - fixed_parameters=dict(dim=self.dim, nchildren=2**self.dim)) + fixed_parameters=dict(dim=self.dim, nchildren=2**self.dim), + lang_version=MOST_RECENT_LANGUAGE_VERSION) for expn in [self.src_expansion, self.tgt_expansion]: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) @@ -383,8 +396,11 @@ class QBXL2P(E2PBase): ] + loopy_insns + [""" result[{i},center_itgt] = kernel_scaling * result_{i}_p \ - {{id_prefix=write_result}} - """.format(i=i) for i in range(len(result_names))] + [""" + {{id_prefix=write_result{nosync}}} + """.format(i=i, + nosync=",nosync=write_result*" + if len(result_names) > 1 else "") + for i in range(len(result_names))] + [""" end end """], @@ -405,7 +421,8 @@ class QBXL2P(E2PBase): name=self.name, assumptions="nglobal_qbx_centers>=1", silenced_warnings="write_race(write_result*)", - fixed_parameters=dict(dim=self.dim, nresults=len(result_names))) + fixed_parameters=dict(dim=self.dim, nresults=len(result_names)), + lang_version=MOST_RECENT_LANGUAGE_VERSION) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) diff --git a/pytential/qbx/refinement.py b/pytential/qbx/refinement.py index 5d90a320015ecd2a249c00634ffcf221cac261ef..08539c6899e696dd8af6778bdefdd41de33d87a8 100644 --- a/pytential/qbx/refinement.py +++ b/pytential/qbx/refinement.py @@ -28,6 +28,7 @@ THE SOFTWARE. import loopy as lp +from loopy.version import MOST_RECENT_LANGUAGE_VERSION import numpy as np import pyopencl as cl @@ -255,7 +256,8 @@ class RefinerCodeContainer(TreeCodeContainerMixin): """, options="return_dict", silenced_warnings="write_race(write_refine_flags_updated)", - name="refine_kernel_length_scale_to_panel_size_ratio") + name="refine_kernel_length_scale_to_panel_size_ratio", + lang_version=MOST_RECENT_LANGUAGE_VERSION) knl = lp.split_iname(knl, "panel", 128, inner_tag="l.0", outer_tag="g.0") return knl diff --git a/pytential/qbx/utils.py b/pytential/qbx/utils.py index ecb939de841d7b567b735d52ff26b327a72d50b1..6673b450bb7ed3cb2cdf043e95963c0930fab89a 100644 --- a/pytential/qbx/utils.py +++ b/pytential/qbx/utils.py @@ -32,6 +32,7 @@ from boxtree.tree import Tree import pyopencl as cl import pyopencl.array # noqa from pytools import memoize, memoize_method +from loopy.version import MOST_RECENT_LANGUAGE_VERSION import logging logger = logging.getLogger(__name__) @@ -84,7 +85,8 @@ def get_interleaver_kernel(dtype): lp.GlobalArg("dst", shape=(var("dstlen"),), dtype=dtype), "..." ], - assumptions="2*srclen = dstlen") + assumptions="2*srclen = dstlen", + lang_version=MOST_RECENT_LANGUAGE_VERSION) knl = lp.split_iname(knl, "i", 128, inner_tag="l.0", outer_tag="g.0") return knl @@ -217,7 +219,8 @@ def panel_sizes(discr, last_dim_length): knl = lp.make_kernel( "{[i,j,k]: 0<=i