From 521454e34200be72404142cb73f355bbe5c7024a Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 4 Dec 2019 12:02:57 -0600 Subject: [PATCH 1/3] Use atomic accesses for potentially concurrent writes (closes #140) --- pytential/qbx/refinement.py | 14 +++++++++----- pytential/qbx/target_assoc.py | 6 +++--- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/pytential/qbx/refinement.py b/pytential/qbx/refinement.py index 829e706c..5e1afca2 100644 --- a/pytential/qbx/refinement.py +++ b/pytential/qbx/refinement.py @@ -144,8 +144,8 @@ EXPANSION_DISK_UNDISTURBED_BY_SOURCES_CHECKER = AreaQueryElementwiseTemplate( if (is_close) { - panel_refine_flags[center_panel] = 1; - *found_panel_to_refine = 1; + atomic_or(&panel_refine_flags[center_panel], 1); + atomic_or(found_panel_to_refine, 1); break; } } @@ -203,8 +203,8 @@ SUFFICIENT_SOURCE_QUADRATURE_RESOLUTION_CHECKER = AreaQueryElementwiseTemplate( if (is_close) { - panel_refine_flags[my_panel] = 1; - *found_panel_to_refine = 1; + atomic_or(&panel_refine_flags[my_panel], 1); + atomic_or(found_panel_to_refine, 1); break; } } @@ -252,10 +252,14 @@ class RefinerCodeContainer(TreeCodeContainerMixin): <> over_threshold = element_property[ielement] > threshold if over_threshold refine_flags[ielement] = 1 - refine_flags_updated = 1 {id=write_refine_flags_updated} + refine_flags_updated = 1 {id=write_refine_flags_updated, atomic} end end """, + [ + lp.GlobalArg("refine_flags_updated", shape=(), for_atomic=True), + "..." + ], options="return_dict", silenced_warnings="write_race(write_refine_flags_updated)", name="refine_kernel_length_scale_to_quad_resolution_ratio", diff --git a/pytential/qbx/target_assoc.py b/pytential/qbx/target_assoc.py index d8f32f54..27658c8a 100644 --- a/pytential/qbx/target_assoc.py +++ b/pytential/qbx/target_assoc.py @@ -208,7 +208,7 @@ QBX_TARGET_MARKER = AreaQueryElementwiseTemplate( <= tunnel_radius_by_source[source]) { target_status[i] = MARKED_QBX_CENTER_PENDING; - *found_target_close_to_panel = 1; + atomic_or(found_target_close_to_panel, 1); } } """, @@ -401,8 +401,8 @@ QBX_FAILED_TARGET_ASSOCIATION_REFINER = AreaQueryElementwiseTemplate( { particle_id_t panel = bsearch( panel_to_source_starts, npanels + 1, source); - refine_flags[panel] = 1; - *found_panel_to_refine = 1; + atomic_or(&refine_flags[panel], 1); + atomic_or(found_panel_to_refine, 1); } } """, -- GitLab From 60d4e8289f528ede21edf2ef09a94387cc39fa0d Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 4 Dec 2019 12:19:44 -0600 Subject: [PATCH 2/3] Rewrite loopy kernel to avoid atomic ops --- pytential/qbx/refinement.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/pytential/qbx/refinement.py b/pytential/qbx/refinement.py index 5e1afca2..ae99bbf3 100644 --- a/pytential/qbx/refinement.py +++ b/pytential/qbx/refinement.py @@ -252,16 +252,10 @@ class RefinerCodeContainer(TreeCodeContainerMixin): <> over_threshold = element_property[ielement] > threshold if over_threshold refine_flags[ielement] = 1 - refine_flags_updated = 1 {id=write_refine_flags_updated, atomic} end end """, - [ - lp.GlobalArg("refine_flags_updated", shape=(), for_atomic=True), - "..." - ], options="return_dict", - silenced_warnings="write_race(write_refine_flags_updated)", name="refine_kernel_length_scale_to_quad_resolution_ratio", lang_version=MOST_RECENT_LANGUAGE_VERSION) @@ -404,25 +398,25 @@ class RefinerWrangler(TreeWranglerBase): debug, wait_for=None): knl = self.code_container.element_prop_threshold_checker() - if debug: - npanels_to_refine_prev = cl.array.sum(refine_flags).get() + npanels_to_refine_prev = cl.array.sum(refine_flags).get() evt, out = knl(self.queue, element_property=element_property, refine_flags=refine_flags, - refine_flags_updated=np.array(0), threshold=np.array(threshold), wait_for=wait_for) cl.wait_for_events([evt]) + npanels_to_refine = cl.array.sum(refine_flags).get() + updated = npanels_to_refine > npanels_to_refine_prev + if debug: - npanels_to_refine = cl.array.sum(refine_flags).get() - if npanels_to_refine > npanels_to_refine_prev: - logger.debug("refiner: found {} panel(s) to refine".format( - npanels_to_refine - npanels_to_refine_prev)) + logger.debug( + "refiner: found %d panel(s) to refine", + npanels_to_refine - npanels_to_refine_prev) - return (out["refine_flags_updated"].get() == 1).all() + return updated # }}} -- GitLab From 16cfd6ced544caa0bb019378a17baf0992700455 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 4 Dec 2019 12:22:03 -0600 Subject: [PATCH 3/3] Revert "Rewrite loopy kernel to avoid atomic ops" This reverts commit 60d4e8289f528ede21edf2ef09a94387cc39fa0d. --- pytential/qbx/refinement.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/pytential/qbx/refinement.py b/pytential/qbx/refinement.py index ae99bbf3..5e1afca2 100644 --- a/pytential/qbx/refinement.py +++ b/pytential/qbx/refinement.py @@ -252,10 +252,16 @@ class RefinerCodeContainer(TreeCodeContainerMixin): <> over_threshold = element_property[ielement] > threshold if over_threshold refine_flags[ielement] = 1 + refine_flags_updated = 1 {id=write_refine_flags_updated, atomic} end end """, + [ + lp.GlobalArg("refine_flags_updated", shape=(), for_atomic=True), + "..." + ], options="return_dict", + silenced_warnings="write_race(write_refine_flags_updated)", name="refine_kernel_length_scale_to_quad_resolution_ratio", lang_version=MOST_RECENT_LANGUAGE_VERSION) @@ -398,25 +404,25 @@ class RefinerWrangler(TreeWranglerBase): debug, wait_for=None): knl = self.code_container.element_prop_threshold_checker() - npanels_to_refine_prev = cl.array.sum(refine_flags).get() + if debug: + npanels_to_refine_prev = cl.array.sum(refine_flags).get() evt, out = knl(self.queue, element_property=element_property, refine_flags=refine_flags, + refine_flags_updated=np.array(0), threshold=np.array(threshold), wait_for=wait_for) cl.wait_for_events([evt]) - npanels_to_refine = cl.array.sum(refine_flags).get() - updated = npanels_to_refine > npanels_to_refine_prev - if debug: - logger.debug( - "refiner: found %d panel(s) to refine", - npanels_to_refine - npanels_to_refine_prev) + npanels_to_refine = cl.array.sum(refine_flags).get() + if npanels_to_refine > npanels_to_refine_prev: + logger.debug("refiner: found {} panel(s) to refine".format( + npanels_to_refine - npanels_to_refine_prev)) - return updated + return (out["refine_flags_updated"].get() == 1).all() # }}} -- GitLab