From c5c78f2e743041b724eadddf5590cafd0e23395e Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 14 Sep 2018 13:42:28 -0500 Subject: [PATCH 1/5] Use VectorArg with offsets by default. Bump pyopencl version requirement to 2018.2 --- boxtree/area_query.py | 4 ++-- boxtree/tools.py | 11 +++++++++-- boxtree/traversal.py | 2 +- boxtree/tree.py | 3 ++- boxtree/tree_build_kernels.py | 6 +++--- setup.py | 2 +- 6 files changed, 18 insertions(+), 10 deletions(-) diff --git a/boxtree/area_query.py b/boxtree/area_query.py index 42f0ef9..42ec46b 100644 --- a/boxtree/area_query.py +++ b/boxtree/area_query.py @@ -661,7 +661,7 @@ class AreaQueryBuilder(object): debug=False, root_extent_stretch_factor=TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) - from pyopencl.tools import VectorArg, ScalarArg + from boxtree.tools import VectorArg, ScalarArg arg_decls = [ VectorArg(coord_dtype, "box_centers"), ScalarArg(coord_dtype, "root_extent"), @@ -1064,7 +1064,7 @@ class PeerListFinder(object): targets_have_extent=False, sources_have_extent=False) - from pyopencl.tools import VectorArg, ScalarArg + from boxtree.tools import VectorArg, ScalarArg arg_decls = [ VectorArg(coord_dtype, "box_centers"), ScalarArg(coord_dtype, "root_extent"), diff --git a/boxtree/tools.py b/boxtree/tools.py index 03678ae..9402d9b 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -27,11 +27,18 @@ import numpy as np from pytools import Record, memoize_method import pyopencl as cl import pyopencl.array # noqa -from pyopencl.tools import dtype_to_c_struct +from pyopencl.tools import dtype_to_c_struct, VectorArg as _VectorArg +from pyopencl.tools import ScalarArg # noqa from mako.template import Template from pytools.obj_array import make_obj_array from boxtree.fmm import TimingFuture, TimingResult +from functools import partial + + +# Use offsets in VectorArg by default. +VectorArg = partial(_VectorArg, with_offset=True) + AXIS_NAMES = ("x", "y", "z", "w") @@ -378,7 +385,7 @@ class GappyCopyAndMapKernel: @memoize_method def _get_kernel(self, dtype, src_index_dtype, dst_index_dtype, have_src_indices, have_dst_indices, map_values): - from pyopencl.tools import VectorArg + from boxtree.tools import VectorArg args = [ VectorArg(dtype, "input_ary", with_offset=True), diff --git a/boxtree/traversal.py b/boxtree/traversal.py index f7e7045..6cf694c 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1793,7 +1793,7 @@ class FMMTraversalBuilder: from_sep_smaller_crit=from_sep_smaller_crit, ) from pyopencl.algorithm import ListOfListsBuilder - from pyopencl.tools import VectorArg, ScalarArg + from boxtree.tools import VectorArg, ScalarArg result = {} diff --git a/boxtree/tree.py b/boxtree/tree.py index fa8c438..fa2d328 100644 --- a/boxtree/tree.py +++ b/boxtree/tree.py @@ -731,7 +731,8 @@ class ParticleListFilter(object): @memoize_method def get_filter_target_lists_in_user_order_kernel(self, particle_id_dtype, user_order_flags_dtype): - from pyopencl.tools import VectorArg, dtype_to_ctype + from boxtree.tools import VectorArg + from pyopencl.tools import dtype_to_ctype from pyopencl.algorithm import ListOfListsBuilder from mako.template import Template diff --git a/boxtree/tree_build_kernels.py b/boxtree/tree_build_kernels.py index a854834..f6fa964 100644 --- a/boxtree/tree_build_kernels.py +++ b/boxtree/tree_build_kernels.py @@ -892,7 +892,7 @@ LEVEL_RESTRICT_TPL = Template( def build_level_restrict_kernel(context, preamble_with_dtype_decls, dimensions, axis_names, box_id_dtype, coord_dtype, box_level_dtype, max_levels): - from pyopencl.tools import VectorArg, ScalarArg + from boxtree.tools import VectorArg, ScalarArg arguments = ( [ @@ -1369,7 +1369,7 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, + str(MORTON_NR_SCAN_PREAMBLE_TPL.render(**codegen_args)) ) - from pyopencl.tools import VectorArg, ScalarArg + from boxtree.tools import VectorArg, ScalarArg common_arguments = ( [ # box-local morton bin counts for each particle at the current level @@ -1582,7 +1582,7 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, # FIXME: Turn me into a scan template - from pyopencl.tools import VectorArg + from boxtree.tools import VectorArg find_prune_indices_kernel = GenericScanKernel( context, box_id_dtype, arguments=[ diff --git a/setup.py b/setup.py index 32cca6a..4d1d325 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ def main(): packages=["boxtree"], install_requires=[ "pytools>=2018.4", - "pyopencl>=2013.1", + "pyopencl>=2018.2", "Mako>=0.7.3", "pytest>=2.3", "cgen>=2013.1.2", -- GitLab From a2a6da89d9c7e5ad0e997182b2a7eb41b17f580e Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 13 Nov 2018 22:58:40 -0600 Subject: [PATCH 2/5] More fixes --- boxtree/area_query.py | 22 +++++++------- boxtree/traversal.py | 58 ++++++++++++++++++----------------- boxtree/tree.py | 7 +++-- test/test_fmm.py | 16 +++++----- test/test_traversal.py | 8 ++--- test/test_tree.py | 68 +++++++++++++++++++++--------------------- 6 files changed, 91 insertions(+), 88 deletions(-) diff --git a/boxtree/area_query.py b/boxtree/area_query.py index 42ec46b..084ac12 100644 --- a/boxtree/area_query.py +++ b/boxtree/area_query.py @@ -663,11 +663,11 @@ class AreaQueryBuilder(object): from boxtree.tools import VectorArg, ScalarArg arg_decls = [ - VectorArg(coord_dtype, "box_centers"), + VectorArg(coord_dtype, "box_centers", with_offset=False), ScalarArg(coord_dtype, "root_extent"), VectorArg(np.uint8, "box_levels"), ScalarArg(box_id_dtype, "aligned_nboxes"), - VectorArg(box_id_dtype, "box_child_ids"), + VectorArg(box_id_dtype, "box_child_ids", with_offset=False), VectorArg(box_flags_enum.dtype, "box_flags"), VectorArg(peer_list_idx_dtype, "peer_list_starts"), VectorArg(box_id_dtype, "peer_lists"), @@ -746,12 +746,12 @@ class AreaQueryBuilder(object): result, evt = area_query_kernel( queue, len(ball_radii), tree.box_centers.data, tree.root_extent, - tree.box_levels.data, tree.aligned_nboxes, - tree.box_child_ids.data, tree.box_flags.data, - peer_lists.peer_list_starts.data, - peer_lists.peer_lists.data, ball_radii.data, + tree.box_levels, tree.aligned_nboxes, + tree.box_child_ids.data, tree.box_flags, + peer_lists.peer_list_starts, + peer_lists.peer_lists, ball_radii, *(tuple(tree.bounding_box[0]) + - tuple(bc.data for bc in ball_centers)), + tuple(bc for bc in ball_centers)), wait_for=wait_for) aq_plog.done() @@ -1066,11 +1066,11 @@ class PeerListFinder(object): from boxtree.tools import VectorArg, ScalarArg arg_decls = [ - VectorArg(coord_dtype, "box_centers"), + VectorArg(coord_dtype, "box_centers", with_offset=False), ScalarArg(coord_dtype, "root_extent"), VectorArg(np.uint8, "box_levels"), ScalarArg(box_id_dtype, "aligned_nboxes"), - VectorArg(box_id_dtype, "box_child_ids"), + VectorArg(box_id_dtype, "box_child_ids", with_offset=False), VectorArg(box_flags_enum.dtype, "box_flags"), ] @@ -1114,8 +1114,8 @@ class PeerListFinder(object): result, evt = peer_list_finder_kernel( queue, tree.nboxes, tree.box_centers.data, tree.root_extent, - tree.box_levels.data, tree.aligned_nboxes, - tree.box_child_ids.data, tree.box_flags.data, + tree.box_levels, tree.aligned_nboxes, + tree.box_child_ids.data, tree.box_flags, wait_for=wait_for) pl_plog.done() diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 6cf694c..c024d47 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1851,11 +1851,11 @@ class FMMTraversalBuilder: # {{{ build list N builders base_args = [ - VectorArg(coord_dtype, "box_centers"), + VectorArg(coord_dtype, "box_centers", with_offset=False), ScalarArg(coord_dtype, "root_extent"), VectorArg(np.uint8, "box_levels"), ScalarArg(box_id_dtype, "aligned_nboxes"), - VectorArg(box_id_dtype, "box_child_ids"), + VectorArg(box_id_dtype, "box_child_ids", with_offset=False), VectorArg(box_flags_enum.dtype, "box_flags"), ] @@ -1869,7 +1869,7 @@ class FMMTraversalBuilder: ("from_sep_siblings", FROM_SEP_SIBLINGS_TEMPLATE, [ VectorArg(box_id_dtype, "target_or_target_parent_boxes"), - VectorArg(box_id_dtype, "box_parent_ids"), + VectorArg(box_id_dtype, "box_parent_ids", with_offset=False), VectorArg(box_id_dtype, "same_level_non_well_sep_boxes_starts"), VectorArg(box_id_dtype, @@ -1883,8 +1883,10 @@ class FMMTraversalBuilder: "same_level_non_well_sep_boxes_starts"), VectorArg(box_id_dtype, "same_level_non_well_sep_boxes_lists"), - VectorArg(coord_dtype, "box_target_bounding_box_min"), - VectorArg(coord_dtype, "box_target_bounding_box_max"), + VectorArg(coord_dtype, "box_target_bounding_box_min", + with_offset=False), + VectorArg(coord_dtype, "box_target_bounding_box_max", + with_offset=False), VectorArg(particle_id_dtype, "box_source_counts_cumul"), ScalarArg(particle_id_dtype, "from_sep_smaller_min_nsources_cumul"), @@ -1897,7 +1899,7 @@ class FMMTraversalBuilder: [ ScalarArg(coord_dtype, "stick_out_factor"), VectorArg(box_id_dtype, "target_or_target_parent_boxes"), - VectorArg(box_id_dtype, "box_parent_ids"), + VectorArg(box_id_dtype, "box_parent_ids", with_offset=False), VectorArg(box_id_dtype, "same_level_non_well_sep_boxes_starts"), VectorArg(box_id_dtype, @@ -1982,7 +1984,7 @@ class FMMTraversalBuilder: fin_debug("building list of source boxes, their parents, and target boxes") result, evt = knl_info.sources_parents_and_targets_builder( - queue, tree.nboxes, tree.box_flags.data, wait_for=wait_for) + queue, tree.nboxes, tree.box_flags, wait_for=wait_for) wait_for = [evt] source_parent_boxes = result["source_parent_boxes"].lists @@ -2137,8 +2139,8 @@ class FMMTraversalBuilder: result, evt = knl_info.same_level_non_well_sep_boxes_builder( queue, tree.nboxes, - tree.box_centers.data, tree.root_extent, tree.box_levels.data, - tree.aligned_nboxes, tree.box_child_ids.data, tree.box_flags.data, + tree.box_centers.data, tree.root_extent, tree.box_levels, + tree.aligned_nboxes, tree.box_child_ids.data, tree.box_flags, wait_for=wait_for) wait_for = [evt] same_level_non_well_sep_boxes = result["same_level_non_well_sep_boxes"] @@ -2151,9 +2153,9 @@ class FMMTraversalBuilder: result, evt = knl_info.neighbor_source_boxes_builder( queue, len(target_boxes), - tree.box_centers.data, tree.root_extent, tree.box_levels.data, - tree.aligned_nboxes, tree.box_child_ids.data, tree.box_flags.data, - target_boxes.data, wait_for=wait_for) + tree.box_centers.data, tree.root_extent, tree.box_levels, + tree.aligned_nboxes, tree.box_child_ids.data, tree.box_flags, + target_boxes, wait_for=wait_for) wait_for = [evt] neighbor_source_boxes = result["neighbor_source_boxes"] @@ -2166,11 +2168,11 @@ class FMMTraversalBuilder: result, evt = knl_info.from_sep_siblings_builder( queue, len(target_or_target_parent_boxes), - tree.box_centers.data, tree.root_extent, tree.box_levels.data, - tree.aligned_nboxes, tree.box_child_ids.data, tree.box_flags.data, - target_or_target_parent_boxes.data, tree.box_parent_ids.data, - same_level_non_well_sep_boxes.starts.data, - same_level_non_well_sep_boxes.lists.data, + tree.box_centers.data, tree.root_extent, tree.box_levels, + tree.aligned_nboxes, tree.box_child_ids.data, tree.box_flags, + target_or_target_parent_boxes, tree.box_parent_ids.data, + same_level_non_well_sep_boxes.starts, + same_level_non_well_sep_boxes.lists, wait_for=wait_for) wait_for = [evt] from_sep_siblings = result["from_sep_siblings"] @@ -2185,14 +2187,14 @@ class FMMTraversalBuilder: from_sep_smaller_base_args = ( queue, len(target_boxes), - tree.box_centers.data, tree.root_extent, tree.box_levels.data, - tree.aligned_nboxes, tree.box_child_ids.data, tree.box_flags.data, - tree.stick_out_factor, target_boxes.data, - same_level_non_well_sep_boxes.starts.data, - same_level_non_well_sep_boxes.lists.data, + tree.box_centers.data, tree.root_extent, tree.box_levels, + tree.aligned_nboxes, tree.box_child_ids.data, tree.box_flags, + tree.stick_out_factor, target_boxes, + same_level_non_well_sep_boxes.starts, + same_level_non_well_sep_boxes.lists, box_target_bounding_box_min.data, box_target_bounding_box_max.data, - tree.box_source_counts_cumul.data, + tree.box_source_counts_cumul, _from_sep_smaller_min_nsources_cumul, ) @@ -2240,12 +2242,12 @@ class FMMTraversalBuilder: result, evt = knl_info.from_sep_bigger_builder( queue, len(target_or_target_parent_boxes), - tree.box_centers.data, tree.root_extent, tree.box_levels.data, - tree.aligned_nboxes, tree.box_child_ids.data, tree.box_flags.data, - tree.stick_out_factor, target_or_target_parent_boxes.data, + tree.box_centers.data, tree.root_extent, tree.box_levels, + tree.aligned_nboxes, tree.box_child_ids.data, tree.box_flags, + tree.stick_out_factor, target_or_target_parent_boxes, tree.box_parent_ids.data, - same_level_non_well_sep_boxes.starts.data, - same_level_non_well_sep_boxes.lists.data, + same_level_non_well_sep_boxes.starts, + same_level_non_well_sep_boxes.lists, wait_for=wait_for) wait_for = [evt] diff --git a/boxtree/tree.py b/boxtree/tree.py index fa2d328..254e53f 100644 --- a/boxtree/tree.py +++ b/boxtree/tree.py @@ -787,9 +787,10 @@ class ParticleListFilter(object): tree.particle_id_dtype, user_order_flags.dtype) result, evt = kernel(queue, tree.nboxes, - user_order_flags.data, - user_target_ids.data, - tree.box_target_starts.data, tree.box_target_counts_nonchild.data) + user_order_flags, + user_target_ids, + tree.box_target_starts, + tree.box_target_counts_nonchild) return FilteredTargetListsInUserOrder( nfiltered_targets=result["filt_tgt_list"].count, diff --git a/test/test_fmm.py b/test/test_fmm.py index 8041721..e6c42c7 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -106,7 +106,7 @@ class ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder( (3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None, "l2", "static_l2"), # noqa: E501 ]) -def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req, +def test_fmm_completeness(ctx_factory, dims, nsources_req, ntargets_req, who_has_extent, source_gen, target_gen, filter_kind, well_sep_is_n_away, extent_norm, from_sep_smaller_crit): """Tests whether the built FMM traversal structures and driver completely @@ -118,7 +118,7 @@ def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req, logging.basicConfig(level=logging.INFO) - ctx = ctx_getter() + ctx = ctx_factory() queue = cl.CommandQueue(ctx) dtype = np.float64 @@ -336,13 +336,13 @@ def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req, @pytest.mark.parametrize("dims", [2, 3]) @pytest.mark.parametrize("use_dipoles", [True, False]) @pytest.mark.parametrize("helmholtz_k", [0, 2]) -def test_pyfmmlib_fmm(ctx_getter, dims, use_dipoles, helmholtz_k): +def test_pyfmmlib_fmm(ctx_factory, dims, use_dipoles, helmholtz_k): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") - ctx = ctx_getter() + ctx = ctx_factory() queue = cl.CommandQueue(ctx) nsources = 3000 @@ -506,8 +506,8 @@ def test_pyfmmlib_fmm(ctx_getter, dims, use_dipoles, helmholtz_k): # {{{ test particle count thresholding in traversal generation @pytest.mark.parametrize("enable_extents", [True, False]) -def test_interaction_list_particle_count_thresholding(ctx_getter, enable_extents): - ctx = ctx_getter() +def test_interaction_list_particle_count_thresholding(ctx_factory, enable_extents): + ctx = ctx_factory() queue = cl.CommandQueue(ctx) logging.basicConfig(level=logging.INFO) @@ -564,8 +564,8 @@ def test_interaction_list_particle_count_thresholding(ctx_getter, enable_extents # {{{ test fmm with float32 dtype @pytest.mark.parametrize("enable_extents", [True, False]) -def test_fmm_float32(ctx_getter, enable_extents): - ctx = ctx_getter() +def test_fmm_float32(ctx_factory, enable_extents): + ctx = ctx_factory() queue = cl.CommandQueue(ctx) from pyopencl.characterize import has_struct_arg_count_bug diff --git a/test/test_traversal.py b/test/test_traversal.py index 538739d..32645a2 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -48,10 +48,10 @@ logger = logging.getLogger(__name__) (3, True), (3, False), ]) -def test_tree_connectivity(ctx_getter, dims, sources_are_targets): +def test_tree_connectivity(ctx_factory, dims, sources_are_targets): logging.basicConfig(level=logging.INFO) - ctx = ctx_getter() + ctx = ctx_factory() queue = cl.CommandQueue(ctx) dtype = np.float64 @@ -292,8 +292,8 @@ def test_tree_connectivity(ctx_getter, dims, sources_are_targets): # {{{ visualization helper (not a test) -def plot_traversal(ctx_getter, do_plot=False, well_sep_is_n_away=1): - ctx = ctx_getter() +def plot_traversal(ctx_factory, do_plot=False, well_sep_is_n_away=1): + ctx = ctx_factory() queue = cl.CommandQueue(ctx) #for dims in [2, 3]: diff --git a/test/test_tree.py b/test/test_tree.py index d6fe9f8..3153644 100644 --- a/test/test_tree.py +++ b/test/test_tree.py @@ -43,10 +43,10 @@ logger = logging.getLogger(__name__) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @pytest.mark.parametrize("dims", [2, 3]) @pytest.mark.parametrize("nparticles", [9, 4096, 10**5]) -def test_bounding_box(ctx_getter, dtype, dims, nparticles): +def test_bounding_box(ctx_factory, dtype, dims, nparticles): logging.basicConfig(level=logging.INFO) - ctx = ctx_getter() + ctx = ctx_factory() queue = cl.CommandQueue(ctx) from boxtree.tools import AXIS_NAMES @@ -215,8 +215,8 @@ def particle_tree_test_decorator(f): @particle_tree_test_decorator -def test_single_box_particle_tree(ctx_getter, dtype, dims, do_plot=False): - ctx = ctx_getter() +def test_single_box_particle_tree(ctx_factory, dtype, dims, do_plot=False): + ctx = ctx_factory() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder @@ -227,8 +227,8 @@ def test_single_box_particle_tree(ctx_getter, dtype, dims, do_plot=False): @particle_tree_test_decorator -def test_two_level_particle_tree(ctx_getter, dtype, dims, do_plot=False): - ctx = ctx_getter() +def test_two_level_particle_tree(ctx_factory, dtype, dims, do_plot=False): + ctx = ctx_factory() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder @@ -239,8 +239,8 @@ def test_two_level_particle_tree(ctx_getter, dtype, dims, do_plot=False): @particle_tree_test_decorator -def test_unpruned_particle_tree(ctx_getter, dtype, dims, do_plot=False): - ctx = ctx_getter() +def test_unpruned_particle_tree(ctx_factory, dtype, dims, do_plot=False): + ctx = ctx_factory() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder @@ -252,8 +252,8 @@ def test_unpruned_particle_tree(ctx_getter, dtype, dims, do_plot=False): @particle_tree_test_decorator -def test_particle_tree_with_reallocations(ctx_getter, dtype, dims, do_plot=False): - ctx = ctx_getter() +def test_particle_tree_with_reallocations(ctx_factory, dtype, dims, do_plot=False): + ctx = ctx_factory() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder @@ -265,8 +265,8 @@ def test_particle_tree_with_reallocations(ctx_getter, dtype, dims, do_plot=False @particle_tree_test_decorator def test_particle_tree_with_many_empty_leaves( - ctx_getter, dtype, dims, do_plot=False): - ctx = ctx_getter() + ctx_factory, dtype, dims, do_plot=False): + ctx = ctx_factory() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder @@ -277,8 +277,8 @@ def test_particle_tree_with_many_empty_leaves( @particle_tree_test_decorator -def test_vanilla_particle_tree(ctx_getter, dtype, dims, do_plot=False): - ctx = ctx_getter() +def test_vanilla_particle_tree(ctx_factory, dtype, dims, do_plot=False): + ctx = ctx_factory() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder @@ -289,9 +289,9 @@ def test_vanilla_particle_tree(ctx_getter, dtype, dims, do_plot=False): @particle_tree_test_decorator -def test_explicit_refine_weights_particle_tree(ctx_getter, dtype, dims, +def test_explicit_refine_weights_particle_tree(ctx_factory, dtype, dims, do_plot=False): - ctx = ctx_getter() + ctx = ctx_factory() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder @@ -309,8 +309,8 @@ def test_explicit_refine_weights_particle_tree(ctx_getter, dtype, dims, @particle_tree_test_decorator -def test_non_adaptive_particle_tree(ctx_getter, dtype, dims, do_plot=False): - ctx = ctx_getter() +def test_non_adaptive_particle_tree(ctx_factory, dtype, dims, do_plot=False): + ctx = ctx_factory() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder @@ -326,10 +326,10 @@ def test_non_adaptive_particle_tree(ctx_getter, dtype, dims, do_plot=False): @pytest.mark.opencl @pytest.mark.parametrize("dims", [2, 3]) -def test_source_target_tree(ctx_getter, dims, do_plot=False): +def test_source_target_tree(ctx_factory, dims, do_plot=False): logging.basicConfig(level=logging.INFO) - ctx = ctx_getter() + ctx = ctx_factory() queue = cl.CommandQueue(ctx) nsources = 2 * 10**5 @@ -439,10 +439,10 @@ def test_source_target_tree(ctx_getter, dims, do_plot=False): @pytest.mark.opencl @pytest.mark.parametrize("dims", [2, 3]) @pytest.mark.parametrize("extent_norm", ["linf", "l2"]) -def test_extent_tree(ctx_getter, dims, extent_norm, do_plot=False): +def test_extent_tree(ctx_factory, dims, extent_norm, do_plot=False): logging.basicConfig(level=logging.INFO) - ctx = ctx_getter() + ctx = ctx_factory() queue = cl.CommandQueue(ctx) nsources = 100000 @@ -662,10 +662,10 @@ def test_extent_tree(ctx_getter, dims, extent_norm, do_plot=False): @pytest.mark.opencl @pytest.mark.geo_lookup @pytest.mark.parametrize("dims", [2, 3]) -def test_leaves_to_balls_query(ctx_getter, dims, do_plot=False): +def test_leaves_to_balls_query(ctx_factory, dims, do_plot=False): logging.basicConfig(level=logging.INFO) - ctx = ctx_getter() + ctx = ctx_factory() queue = cl.CommandQueue(ctx) nparticles = 10**5 @@ -766,8 +766,8 @@ def run_area_query_test(ctx, queue, tree, ball_centers, ball_radii): @pytest.mark.opencl @pytest.mark.area_query @pytest.mark.parametrize("dims", [2, 3]) -def test_area_query(ctx_getter, dims, do_plot=False): - ctx = ctx_getter() +def test_area_query(ctx_factory, dims, do_plot=False): + ctx = ctx_factory() queue = cl.CommandQueue(ctx) nparticles = 10**5 @@ -795,12 +795,12 @@ def test_area_query(ctx_getter, dims, do_plot=False): @pytest.mark.opencl @pytest.mark.area_query @pytest.mark.parametrize("dims", [2, 3]) -def test_area_query_balls_outside_bbox(ctx_getter, dims, do_plot=False): +def test_area_query_balls_outside_bbox(ctx_factory, dims, do_plot=False): """ The input to the area query includes balls whose centers are not within the tree bounding box. """ - ctx = ctx_getter() + ctx = ctx_factory() queue = cl.CommandQueue(ctx) nparticles = 10**4 @@ -835,8 +835,8 @@ def test_area_query_balls_outside_bbox(ctx_getter, dims, do_plot=False): @pytest.mark.opencl @pytest.mark.area_query @pytest.mark.parametrize("dims", [2, 3]) -def test_area_query_elwise(ctx_getter, dims, do_plot=False): - ctx = ctx_getter() +def test_area_query_elwise(ctx_factory, dims, do_plot=False): + ctx = ctx_factory() queue = cl.CommandQueue(ctx) nparticles = 10**5 @@ -904,8 +904,8 @@ def test_area_query_elwise(ctx_getter, dims, do_plot=False): @pytest.mark.parametrize("lookbehind", [0, 1]) @pytest.mark.parametrize("skip_prune", [True, False]) @pytest.mark.parametrize("dims", [2, 3]) -def test_level_restriction(ctx_getter, dims, skip_prune, lookbehind, do_plot=False): - ctx = ctx_getter() +def test_level_restriction(ctx_factory, dims, skip_prune, lookbehind, do_plot=False): + ctx = ctx_factory() queue = cl.CommandQueue(ctx) nparticles = 10**5 @@ -985,10 +985,10 @@ def test_level_restriction(ctx_getter, dims, skip_prune, lookbehind, do_plot=Fal @pytest.mark.geo_lookup @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @pytest.mark.parametrize("dims", [2, 3]) -def test_space_invader_query(ctx_getter, dims, dtype, do_plot=False): +def test_space_invader_query(ctx_factory, dims, dtype, do_plot=False): logging.basicConfig(level=logging.INFO) - ctx = ctx_getter() + ctx = ctx_factory() queue = cl.CommandQueue(ctx) dtype = np.dtype(dtype) -- GitLab From 18d5b8dcae5ecd8f7cd12d90b81e0c94a9970f0c Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 13 Nov 2018 23:07:36 -0600 Subject: [PATCH 3/5] Get rid of instances of with_offset=True --- boxtree/tools.py | 10 +++++----- boxtree/tree_build_kernels.py | 13 ++++++------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/boxtree/tools.py b/boxtree/tools.py index 9402d9b..7a8c24b 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -388,18 +388,18 @@ class GappyCopyAndMapKernel: from boxtree.tools import VectorArg args = [ - VectorArg(dtype, "input_ary", with_offset=True), - VectorArg(dtype, "output_ary", with_offset=True), + VectorArg(dtype, "input_ary"), + VectorArg(dtype, "output_ary"), ] if have_src_indices: - args.append(VectorArg(src_index_dtype, "from_indices", with_offset=True)) + args.append(VectorArg(src_index_dtype, "from_indices")) if have_dst_indices: - args.append(VectorArg(dst_index_dtype, "to_indices", with_offset=True)) + args.append(VectorArg(dst_index_dtype, "to_indices")) if map_values: - args.append(VectorArg(dtype, "value_map", with_offset=True)) + args.append(VectorArg(dtype, "value_map")) from pyopencl.tools import dtype_to_ctype src = GAPPY_COPY_TPL.render( diff --git a/boxtree/tree_build_kernels.py b/boxtree/tree_build_kernels.py index d302478..3bab74b 100644 --- a/boxtree/tree_build_kernels.py +++ b/boxtree/tree_build_kernels.py @@ -1496,8 +1496,8 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, context, common_arguments + [ - VectorArg(np.int32, "box_has_children", with_offset=True), - VectorArg(np.int32, "box_force_split", with_offset=True), + VectorArg(np.int32, "box_has_children"), + VectorArg(np.int32, "box_force_split"), ScalarArg(coord_dtype, "root_extent"), ] + [VectorArg(box_id_dtype, "box_child_ids_mnr_{mnr}".format(mnr=mnr)) @@ -1531,11 +1531,10 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, context, common_arguments + [ - VectorArg(np.int32, "box_has_children", with_offset=True), - VectorArg(np.int32, "box_force_split", with_offset=True), - VectorArg(particle_id_dtype, "new_user_srcntgt_ids", - with_offset=True), - VectorArg(box_id_dtype, "new_srcntgt_box_ids", with_offset=True), + VectorArg(np.int32, "box_has_children"), + VectorArg(np.int32, "box_force_split"), + VectorArg(particle_id_dtype, "new_user_srcntgt_ids"), + VectorArg(box_id_dtype, "new_srcntgt_box_ids"), ], str(particle_renumberer_kernel_source), name="renumber_particles", preamble=( -- GitLab From 8704f412ca5112ceb16f954f6a63e7d127353341 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 14 Nov 2018 04:29:40 -0500 Subject: [PATCH 4/5] flake8 fix --- boxtree/traversal.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 17ef571..2d60f68 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1869,7 +1869,8 @@ class FMMTraversalBuilder: ("from_sep_siblings", FROM_SEP_SIBLINGS_TEMPLATE, [ VectorArg(box_id_dtype, "target_or_target_parent_boxes"), - VectorArg(box_id_dtype, "box_parent_ids", with_offset=False), + VectorArg(box_id_dtype, "box_parent_ids", + with_offset=False), VectorArg(box_id_dtype, "same_level_non_well_sep_boxes_starts"), VectorArg(box_id_dtype, @@ -1899,7 +1900,8 @@ class FMMTraversalBuilder: [ ScalarArg(coord_dtype, "stick_out_factor"), VectorArg(box_id_dtype, "target_or_target_parent_boxes"), - VectorArg(box_id_dtype, "box_parent_ids", with_offset=False), + VectorArg(box_id_dtype, "box_parent_ids", + with_offset=False), VectorArg(box_id_dtype, "same_level_non_well_sep_boxes_starts"), VectorArg(box_id_dtype, -- GitLab From 7de32c5334c0456f9a1dbf336db63d87c3fefe12 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 29 Nov 2018 01:30:47 -0600 Subject: [PATCH 5/5] Un-deprecate, directly test loopy kernels --- boxtree/tools.py | 83 +++++++++++++++++++++++++--------------------- test/test_tools.py | 22 ++++++++++++ 2 files changed, 67 insertions(+), 38 deletions(-) diff --git a/boxtree/tools.py b/boxtree/tools.py index 7a8c24b..56180ac 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -32,6 +32,9 @@ from pyopencl.tools import ScalarArg # noqa from mako.template import Template from pytools.obj_array import make_obj_array from boxtree.fmm import TimingFuture, TimingResult +import loopy as lp + +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa from functools import partial @@ -107,16 +110,16 @@ def make_normal_particle_array(queue, nparticles, dims, dtype, seed=15): def make_surface_particle_array(queue, nparticles, dims, dtype, seed=15): - import loopy as lp - if dims == 2: def get_2d_knl(dtype): knl = lp.make_kernel( "{[i]: 0<=i phi = 2*M_PI/n * i - x[i] = 0.5* (3*cos(phi) + 2*sin(3*phi)) - y[i] = 0.5* (1*sin(phi) + 1.5*sin(2*phi)) + for i + <> phi = 2*M_PI/n * i + x[i] = 0.5* (3*cos(phi) + 2*sin(3*phi)) + y[i] = 0.5* (1*sin(phi) + 1.5*sin(2*phi)) + end """, [ lp.GlobalArg("x,y", dtype, shape=lp.auto), @@ -140,11 +143,13 @@ def make_surface_particle_array(queue, nparticles, dims, dtype, seed=15): knl = lp.make_kernel( "{[i,j]: 0<=i,j phi = 2*M_PI/n * i - <> theta = 2*M_PI/n * j - x[i,j] = 5*cos(phi) * (3 + cos(theta)) - y[i,j] = 5*sin(phi) * (3 + cos(theta)) - z[i,j] = 5*sin(theta) + for i,j + <> phi = 2*M_PI/n * i + <> theta = 2*M_PI/n * j + x[i,j] = 5*cos(phi) * (3 + cos(theta)) + y[i,j] = 5*sin(phi) * (3 + cos(theta)) + z[i,j] = 5*sin(theta) + end """, [ lp.GlobalArg("x,y,z,", dtype, shape=lp.auto), @@ -166,8 +171,6 @@ def make_surface_particle_array(queue, nparticles, dims, dtype, seed=15): def make_uniform_particle_array(queue, nparticles, dims, dtype, seed=15): - import loopy as lp - if dims == 2: n = int(nparticles**0.5) @@ -175,13 +178,15 @@ def make_uniform_particle_array(queue, nparticles, dims, dtype, seed=15): knl = lp.make_kernel( "{[i,j]: 0<=i,j xx = 4*i/(n-1) - <> yy = 4*j/(n-1) - angle = 0.3 - <> s = sin(angle) - <> c = cos(angle) - x[i,j] = c*xx + s*yy - 2 - y[i,j] = -s*xx + c*yy - 2 + for i,j + <> xx = 4*i/(n-1) + <> yy = 4*j/(n-1) + angle = 0.3 + <> s = sin(angle) + <> c = cos(angle) + x[i,j] = c*xx + s*yy - 2 + y[i,j] = -s*xx + c*yy - 2 + end """, [ lp.GlobalArg("x,y", dtype, shape=lp.auto), @@ -205,25 +210,27 @@ def make_uniform_particle_array(queue, nparticles, dims, dtype, seed=15): knl = lp.make_kernel( "{[i,j,k]: 0<=i,j,k xx = i/(n-1) - <> yy = j/(n-1) - <> zz = k/(n-1) - - phi = 0.3 - <> s1 = sin(phi) - <> c1 = cos(phi) - - <> xxx = c1*xx + s1*yy - <> yyy = -s1*xx + c1*yy - <> zzz = zz - - theta = 0.7 - <> s2 = sin(theta) - <> c2 = cos(theta) - - x[i,j,k] = 4 * (c2*xxx + s2*zzz) - 2 - y[i,j,k] = 4 * yyy - 2 - z[i,j,k] = 4 * (-s2*xxx + c2*zzz) - 2 + for i,j,k + <> xx = i/(n-1) + <> yy = j/(n-1) + <> zz = k/(n-1) + + phi = 0.3 + <> s1 = sin(phi) + <> c1 = cos(phi) + + <> xxx = c1*xx + s1*yy + <> yyy = -s1*xx + c1*yy + <> zzz = zz + + theta = 0.7 + <> s2 = sin(theta) + <> c2 = cos(theta) + + x[i,j,k] = 4 * (c2*xxx + s2*zzz) - 2 + y[i,j,k] = 4 * yyy - 2 + z[i,j,k] = 4 * (-s2*xxx + c2*zzz) - 2 + end """, [ lp.GlobalArg("x,y,z", dtype, shape=lp.auto), diff --git a/test/test_tools.py b/test/test_tools.py index 0a2f23e..2da427e 100644 --- a/test/test_tools.py +++ b/test/test_tools.py @@ -26,6 +26,17 @@ THE SOFTWARE. """ +import pytest +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + + +from boxtree.tools import ( # noqa: F401 + make_normal_particle_array as p_normal, + make_surface_particle_array as p_surface, + make_uniform_particle_array as p_uniform) + + def test_device_record(): from boxtree.tools import DeviceDataRecord @@ -52,6 +63,17 @@ def test_device_record(): assert np.array_equal(record_host.obj_array[i], record.obj_array[i]) +@pytest.mark.parametrize("array_factory", (p_normal, p_surface, p_uniform)) +@pytest.mark.parametrize("dim", (2, 3)) +@pytest.mark.parametrize("dtype", (np.float32, np.float64)) +def test_particle_array(ctx_factory, array_factory, dim, dtype): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + particles = array_factory(queue, 1000, dim, dtype) + assert len(particles) == dim + assert all(len(particles[0]) == len(axis) for axis in particles) + + # You can test individual routines by typing # $ python test_tools.py 'test_routine' -- GitLab