From 02addfd6fc2b929a5e1eea9db6fcf6816ae963e0 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 7 Jul 2015 13:37:24 -0500 Subject: [PATCH 01/20] First setup steps for refine_weights --- boxtree/tree_build.py | 55 +++++++++++++++++++++++++++-------- boxtree/tree_build_kernels.py | 40 ++++++++++++++++++------- 2 files changed, 72 insertions(+), 23 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 0842064..39bb70c 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -1,7 +1,4 @@ -from __future__ import division -from __future__ import absolute_import -from six.moves import range -from six.moves import zip +from __future__ import division, absolute_import __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -25,6 +22,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +from six.moves import range, zip import numpy as np from pytools import memoize_method @@ -72,9 +70,10 @@ class TreeBuilder(object): # {{{ run control - def __call__(self, queue, particles, max_particles_in_box, + def __call__(self, queue, particles, max_particles_in_box=None, allocator=None, debug=False, targets=None, source_radii=None, target_radii=None, stick_out_factor=0.25, + refine_weights=None, wait_for=None, non_adaptive=False, **kwargs): """ @@ -85,12 +84,14 @@ class TreeBuilder(object): Must have the same (inner) dtype as *particles*. :arg source_radii: If not *None*, a :class:`pyopencl.array.Array` of the same dtype as *particles*. - If this is given, *targets* must also be given, i.e. sources and targets must be separate. See :ref:`extent`. :arg target_radii: Like *source_radii*, but for targets. :arg stick_out_factor: See :attr:`Tree.stick_out_factor` and :ref:`extent`. + :arg refine_weights: If not *None*, a :class:`pyopencl.array.Array` of the + type :class:`numpy.float32`. A box will be split if it has a cumulative + refine_weight greater than 1. :arg wait_for: may either be *None* or a list of :class:`pyopencl.Event` instances for whose completion this command waits before starting exeuction. @@ -160,7 +161,7 @@ class TreeBuilder(object): def zeros(shape, dtype): result = (cl.array.empty(queue, shape, dtype, allocator=allocator) - .fill(0, wait_for=wait_for)) + .fill(0)) event, = result.events return result, event @@ -240,6 +241,35 @@ class TreeBuilder(object): # }}} + # {{{ process refine_weights + + from boxtree.tree_build_kernels import refine_weight_dtype + + if max_particles_in_box is not None and refine_weights is not None: + raise ValueError("may only specify one of max_particles_in_box and " + "refine_weights") + elif max_particles_in_box is None or refine_weights is None: + raise ValueError("must specify at least one of max_particles_in_box and " + "refine_weights") + elif max_particles_in_box is None: + refine_weights = ( + cl.array.empty( + queue, nsrcntgts, refine_weight_dtype, allocator=allocator) + .fill(refine_weight_dtype.type(1/max_particles_in_box))) + refine_weights, evt = cl.a + event, = refine_weights.events + prep_events.append(event) + elif refine_weights is not None: + if refine_weights.dtype != refine_weight_dtype: + raise TypeError("refine_weights must have dtype '%s'" + % refine_weight_dtype) + + total_refine_weight = cl.array.sum(refine_weights).get() + + del max_particles_in_box + + # }}} + # {{{ find and process bounding box bbox, _ = self.bbox_finder(srcntgts, srcntgt_radii, wait_for=wait_for) @@ -298,7 +328,8 @@ class TreeBuilder(object): # to test the reallocation code. nboxes_guess = kwargs.get("nboxes_guess") if nboxes_guess is None: - nboxes_guess = div_ceil(nsrcntgts, max_particles_in_box) * 2**dimensions + nboxes_guess = ( + (1 + int(total_refine_weight / 2)) * 2**dimensions) # per-box morton bin counts box_morton_bin_counts = empty(nboxes_guess, @@ -356,7 +387,7 @@ class TreeBuilder(object): from time import time start_time = time() - if nsrcntgts > max_particles_in_box: + if total_refine_weight > 1: level = 1 else: level = 0 @@ -384,10 +415,11 @@ class TreeBuilder(object): common_args = ((morton_bin_counts, morton_nrs, box_start_flags, srcntgt_box_ids, split_box_ids, box_morton_bin_counts, + refine_weights, box_srcntgt_starts, box_srcntgt_counts_cumul, box_parent_ids, box_morton_nrs, nboxes_dev, - level, max_particles_in_box, bbox, + level, bbox, user_srcntgt_ids) + tuple(srcntgts) + ((srcntgt_radii,) if srcntgts_have_extent else ()) @@ -408,8 +440,8 @@ class TreeBuilder(object): srcntgt_box_ids, box_srcntgt_starts, box_srcntgt_counts_cumul, - max_particles_in_box, box_morton_bin_counts, + refine_weights, box_levels, level, @@ -876,7 +908,6 @@ class TreeBuilder(object): box_srcntgt_counts_cumul, box_source_counts_cumul, box_target_counts_cumul, - max_particles_in_box, box_levels, nlevels, # output if srcntgts_have_extent, input+output otherwise diff --git a/boxtree/tree_build_kernels.py b/boxtree/tree_build_kernels.py index da84488..11ce547 100644 --- a/boxtree/tree_build_kernels.py +++ b/boxtree/tree_build_kernels.py @@ -98,14 +98,20 @@ class _KernelInfo(Record): # {{{ data types +refine_weight_dtype = np.dtype(np.float32) + + @memoize def make_morton_bin_count_type(device, dimensions, particle_id_dtype, srcntgts_have_extent): fields = [] + fields.append(("refine_weight", refine_weight_dtype)) + # Non-child srcntgts are sorted *before* all the child srcntgts. if srcntgts_have_extent: fields.append(("nonchild_srcntgts", particle_id_dtype)) + fields.append(("nonchild_refine_weight", refine_weight_dtype)) from boxtree.tools import padded_bin for mnr in range(2**dimensions): @@ -135,6 +141,7 @@ def make_morton_bin_count_type(device, dimensions, particle_id_dtype, TYPE_DECL_PREAMBLE_TPL = Template(r"""//CL// typedef ${dtype_to_ctype(morton_bin_count_dtype)} morton_counts_t; typedef morton_counts_t scan_t; + typedef ${dtype_to_ctype(refine_weight_dtype)} refine_weight_t; typedef ${dtype_to_ctype(bbox_dtype)} bbox_t; typedef ${dtype_to_ctype(coord_dtype)} coord_t; typedef ${dtype_to_ctype(coord_vec_dtype)} coord_vec_t; @@ -189,9 +196,13 @@ SCAN_PREAMBLE_TPL = Template(r"""//CL// scan_t scan_t_neutral() { scan_t result; + result.refine_weight = 0; + %if srcntgts_have_extent: result.nonchild_srcntgts = 0; + result.nonchild_refine_weight = 0; %endif + %for mnr in range(2**dimensions): result.pcnt${padded_bin(mnr, dimensions)} = 0; %endfor @@ -205,8 +216,11 @@ SCAN_PREAMBLE_TPL = Template(r"""//CL// { if (!across_seg_boundary) { + b.refine_weight += a.refine_weight; + %if srcntgts_have_extent: b.nonchild_srcntgts += a.nonchild_srcntgts; + b.nonchild_refine_weight += a.nonchild_refine_weight; %endif %for mnr in range(2**dimensions): @@ -228,6 +242,7 @@ SCAN_PREAMBLE_TPL = Template(r"""//CL// bbox_t const *bbox, global morton_nr_t *morton_nrs, // output/side effect global particle_id_t *user_srcntgt_ids + global refine_weight_t *refine_weights, %for ax in axis_names: , global const coord_t *${ax} %endfor @@ -321,6 +336,7 @@ SCAN_PREAMBLE_TPL = Template(r"""//CL// %if srcntgts_have_extent: result.nonchild_srcntgts = (level_morton_number == -1); %endif + result.refine_weight = refine_weights[user_srcntgt_id]; %for mnr in range(2**dimensions): <% field = "pcnt"+padded_bin(mnr, dimensions) %> result.${field} = (level_morton_number == ${mnr}); @@ -377,8 +393,8 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( box_id_t *srcntgt_box_ids, particle_id_t *box_srcntgt_starts, particle_id_t *box_srcntgt_counts_cumul, - particle_id_t max_particles_in_box, morton_counts_t *box_morton_bin_counts, + refine_weight_t *refine_weights, box_level_t *box_levels, box_level_t level, @@ -396,7 +412,6 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( __global particle_id_t *box_srcntgt_starts, __global particle_id_t *box_srcntgt_counts_cumul, __global morton_counts_t *box_morton_bin_counts, - particle_id_t max_particles_in_box, __global box_level_t *box_levels, box_level_t level ) @@ -408,10 +423,10 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( result += *nboxes; %if srcntgts_have_extent: - const particle_id_t nonchild_srcntgts_in_box = - box_morton_bin_counts[box_id].nonchild_srcntgts; + const particle_id_t nonchild_refine_weight = + box_morton_bin_counts[box_id].nonchild_refine_weight; %else: - const particle_id_t nonchild_srcntgts_in_box = 0; + const particle_id_t nonchild_refine_weight = 0; %endif particle_id_t first_particle_in_my_box = @@ -434,11 +449,11 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( && %if adaptive: /* box overfull? */ - box_srcntgt_counts_cumul[box_id] - nonchild_srcntgts_in_box - > max_particles_in_box + XXXXbox_srcntgt_counts_cumul[box_id] - nonchild_refine_weight + > 1 %else: /* box non-empty? */ - box_srcntgt_counts_cumul[box_id] - nonchild_srcntgts_in_box + XXXXbox_srcntgt_counts_cumul[box_id] - nonchild_refine_weight > 0 %endif ) @@ -452,7 +467,7 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( input_expr="""count_new_boxes_needed( i, srcntgt_box_ids[i], nboxes, box_srcntgt_starts, box_srcntgt_counts_cumul, box_morton_bin_counts, - max_particles_in_box, box_levels, level + box_levels, level )""", scan_expr="a + b", neutral="0", @@ -882,7 +897,6 @@ BOX_INFO_KERNEL_TPL = ElementwiseTemplate( particle_id_t *box_srcntgt_counts_cumul, particle_id_t *box_source_counts_cumul, particle_id_t *box_target_counts_cumul, - particle_id_t max_particles_in_box, box_level_t *box_levels, box_level_t nlevels, @@ -1105,6 +1119,7 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, coord_dtype=coord_dtype, coord_vec_dtype=coord_vec_dtype, bbox_dtype=bbox_dtype, + refine_weight_dtype=refine_weight_dtype, particle_id_dtype=particle_id_dtype, morton_bin_count_dtype=morton_bin_count_dtype, morton_nr_dtype=morton_nr_dtype, @@ -1169,6 +1184,9 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, VectorArg(morton_bin_count_dtype, "box_morton_bin_counts"), # [nsrcntgts] + VectorArg(refine_weight_dtype, "refine_weights"), + # [nsrcntgts] + # particle# at which each box starts VectorArg(particle_id_dtype, "box_srcntgt_starts"), # [nboxes] @@ -1186,7 +1204,6 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, VectorArg(box_id_dtype, "nboxes"), # [1] ScalarArg(np.int32, "level"), - ScalarArg(particle_id_dtype, "max_particles_in_box"), ScalarArg(bbox_dtype, "bbox"), VectorArg(particle_id_dtype, "user_srcntgt_ids"), # [nsrcntgts] @@ -1208,6 +1225,7 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, % ", ".join([ "i", "level", "&bbox", "morton_nrs", "user_srcntgt_ids", + "refine_weights", ] + ["%s" % ax for ax in axis_names] + (["srcntgt_radii"] if srcntgts_have_extent else []))), -- GitLab From f01bb0f36eedc1a186f8d4cae532d5a0de6cc133 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 5 Aug 2016 22:50:24 -0500 Subject: [PATCH 02/20] Treebuild with refine weights: initial cut. --- boxtree/tree_build.py | 67 +++++++++++++------ boxtree/tree_build_kernels.py | 118 ++++++++++++++++------------------ test/test_tree.py | 81 ++++++++++++++++++----- 3 files changed, 170 insertions(+), 96 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 39bb70c..da5526c 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -73,7 +73,7 @@ class TreeBuilder(object): def __call__(self, queue, particles, max_particles_in_box=None, allocator=None, debug=False, targets=None, source_radii=None, target_radii=None, stick_out_factor=0.25, - refine_weights=None, + refine_weights=None, max_leaf_refine_weight=None, wait_for=None, non_adaptive=False, **kwargs): """ @@ -245,28 +245,39 @@ class TreeBuilder(object): from boxtree.tree_build_kernels import refine_weight_dtype - if max_particles_in_box is not None and refine_weights is not None: + specified_max_particles_in_box = max_particles_in_box is not None + specified_refine_weights = refine_weights is not None and \ + max_leaf_refine_weight is not None + + if specified_max_particles_in_box and specified_refine_weights: raise ValueError("may only specify one of max_particles_in_box and " - "refine_weights") - elif max_particles_in_box is None or refine_weights is None: - raise ValueError("must specify at least one of max_particles_in_box and " - "refine_weights") - elif max_particles_in_box is None: + "refine_weights/max_leaf_refine_weight") + elif not specified_max_particles_in_box and not specified_refine_weights: + raise ValueError("must specify either max_particles_in_box or " + "refine_weights/max_leaf_refine_weight") + elif specified_max_particles_in_box: refine_weights = ( - cl.array.empty( - queue, nsrcntgts, refine_weight_dtype, allocator=allocator) - .fill(refine_weight_dtype.type(1/max_particles_in_box))) - refine_weights, evt = cl.a + cl.array.empty( + queue, nsrcntgts, refine_weight_dtype, allocator=allocator) + .fill(1)) event, = refine_weights.events prep_events.append(event) - elif refine_weights is not None: + max_leaf_refine_weight = max_particles_in_box + elif specified_refine_weights: if refine_weights.dtype != refine_weight_dtype: raise TypeError("refine_weights must have dtype '%s'" % refine_weight_dtype) + if max_leaf_refine_weight < cl.array.max(refine_weights).get(): + raise ValueError("woops") + if max_leaf_refine_weight <= 0: + raise ValueError("max_leaf_refine_weight must be positive") + total_refine_weight = cl.array.sum(refine_weights).get() del max_particles_in_box + del specified_max_particles_in_box + del specified_refine_weights # }}} @@ -324,12 +335,15 @@ class TreeBuilder(object): # you *must* also write reallocation code down below for the case when # nboxes_guess was too low. + nboxes_guess = kwargs.get("nboxes_guess") # Outside nboxes_guess feeding is solely for debugging purposes, # to test the reallocation code. - nboxes_guess = kwargs.get("nboxes_guess") if nboxes_guess is None: - nboxes_guess = ( - (1 + int(total_refine_weight / 2)) * 2**dimensions) + nboxes_guess = 2**dimensions * int( + (max_leaf_refine_weight + total_refine_weight - 1) + / max_leaf_refine_weight) + + assert nboxes_guess > 0 # per-box morton bin counts box_morton_bin_counts = empty(nboxes_guess, @@ -357,9 +371,13 @@ class TreeBuilder(object): prep_events.append(evt) # Initalize box 0 to contain all particles - evt = box_srcntgt_counts_cumul[0].fill( + box_srcntgt_counts_cumul[0].fill( nsrcntgts, queue=queue, wait_for=[evt]) + # box -> whether the box has a child + box_has_children, evt = zeros(nboxes_guess, dtype=np.dtype(np.int32)) + prep_events.append(evt) + # set parent of root box to itself evt = cl.enqueue_copy( queue, box_parent_ids.data, np.zeros((), dtype=box_parent_ids.dtype)) @@ -387,7 +405,7 @@ class TreeBuilder(object): from time import time start_time = time() - if total_refine_weight > 1: + if total_refine_weight > max_leaf_refine_weight: level = 1 else: level = 0 @@ -416,6 +434,7 @@ class TreeBuilder(object): box_start_flags, srcntgt_box_ids, split_box_ids, box_morton_bin_counts, refine_weights, + max_leaf_refine_weight, box_srcntgt_starts, box_srcntgt_counts_cumul, box_parent_ids, box_morton_nrs, nboxes_dev, @@ -442,6 +461,7 @@ class TreeBuilder(object): box_srcntgt_counts_cumul, box_morton_bin_counts, refine_weights, + max_leaf_refine_weight, box_levels, level, @@ -449,6 +469,7 @@ class TreeBuilder(object): nboxes_dev, # output: + box_has_children, split_box_ids, queue=queue, size=nsrcntgts, wait_for=wait_for) wait_for = [evt] @@ -489,6 +510,8 @@ class TreeBuilder(object): box_srcntgt_counts_cumul, evt = \ my_realloc_zeros(box_srcntgt_counts_cumul) resize_events.append(evt) + box_has_children, evt = my_realloc_zeros(box_has_children) + resize_events.append(evt) del my_realloc del my_realloc_zeros @@ -535,8 +558,8 @@ class TreeBuilder(object): split_and_sort_args = ( common_args + (new_user_srcntgt_ids, have_oversize_split_box, - new_srcntgt_box_ids, box_levels)) - + new_srcntgt_box_ids, box_levels, + box_has_children)) fin_debug("split and sort") evt = knl_info.split_and_sort_kernel(*split_and_sort_args, @@ -661,6 +684,9 @@ class TreeBuilder(object): box_srcntgt_counts_nonchild) prune_events.append(evt) + box_has_children, evt = prune_empty(box_has_children) + prune_events.append(evt) + # Remap level_start_box_nrs to new box IDs. # FIXME: It would be better to do this on the device. level_start_box_nrs = list( @@ -908,7 +934,7 @@ class TreeBuilder(object): box_srcntgt_counts_cumul, box_source_counts_cumul, box_target_counts_cumul, - box_levels, nlevels, + box_has_children, box_levels, nlevels, # output if srcntgts_have_extent, input+output otherwise box_source_counts_nonchild, box_target_counts_nonchild, @@ -939,6 +965,7 @@ class TreeBuilder(object): sources_are_targets=sources_are_targets, sources_have_extent=sources_have_extent, targets_have_extent=targets_have_extent, + box_has_children=box_has_children, particle_id_dtype=knl_info.particle_id_dtype, box_id_dtype=knl_info.box_id_dtype, diff --git a/boxtree/tree_build_kernels.py b/boxtree/tree_build_kernels.py index 11ce547..d33e5e0 100644 --- a/boxtree/tree_build_kernels.py +++ b/boxtree/tree_build_kernels.py @@ -98,7 +98,7 @@ class _KernelInfo(Record): # {{{ data types -refine_weight_dtype = np.dtype(np.float32) +refine_weight_dtype = np.dtype(np.int32) @memoize @@ -106,16 +106,16 @@ def make_morton_bin_count_type(device, dimensions, particle_id_dtype, srcntgts_have_extent): fields = [] - fields.append(("refine_weight", refine_weight_dtype)) - # Non-child srcntgts are sorted *before* all the child srcntgts. if srcntgts_have_extent: fields.append(("nonchild_srcntgts", particle_id_dtype)) - fields.append(("nonchild_refine_weight", refine_weight_dtype)) from boxtree.tools import padded_bin for mnr in range(2**dimensions): fields.append(("pcnt%s" % padded_bin(mnr, dimensions), particle_id_dtype)) + # Morton bin weight totals + for mnr in range(2**dimensions): + fields.append(("pwt%s" % padded_bin(mnr, dimensions), refine_weight_dtype)) dtype = np.dtype(fields) @@ -196,16 +196,17 @@ SCAN_PREAMBLE_TPL = Template(r"""//CL// scan_t scan_t_neutral() { scan_t result; - result.refine_weight = 0; %if srcntgts_have_extent: result.nonchild_srcntgts = 0; - result.nonchild_refine_weight = 0; %endif %for mnr in range(2**dimensions): result.pcnt${padded_bin(mnr, dimensions)} = 0; %endfor + %for mnr in range(2**dimensions): + result.pwt${padded_bin(mnr, dimensions)} = 0; + %endfor return result; } @@ -216,17 +217,20 @@ SCAN_PREAMBLE_TPL = Template(r"""//CL// { if (!across_seg_boundary) { - b.refine_weight += a.refine_weight; - %if srcntgts_have_extent: b.nonchild_srcntgts += a.nonchild_srcntgts; - b.nonchild_refine_weight += a.nonchild_refine_weight; %endif %for mnr in range(2**dimensions): <% field = "pcnt"+padded_bin(mnr, dimensions) %> b.${field} = a.${field} + b.${field}; %endfor + %for mnr in range(2**dimensions): + <% field = "pwt"+padded_bin(mnr, dimensions) %> + // XXX: This add_sat() seems to be miscompiled on POCL. + // Replace with + to make it work. + b.${field} = add_sat(a.${field}, b.${field}); + %endfor } return b; @@ -241,8 +245,8 @@ SCAN_PREAMBLE_TPL = Template(r"""//CL// const int level, bbox_t const *bbox, global morton_nr_t *morton_nrs, // output/side effect - global particle_id_t *user_srcntgt_ids - global refine_weight_t *refine_weights, + global particle_id_t *user_srcntgt_ids, + global refine_weight_t *refine_weights %for ax in axis_names: , global const coord_t *${ax} %endfor @@ -336,11 +340,15 @@ SCAN_PREAMBLE_TPL = Template(r"""//CL// %if srcntgts_have_extent: result.nonchild_srcntgts = (level_morton_number == -1); %endif - result.refine_weight = refine_weights[user_srcntgt_id]; %for mnr in range(2**dimensions): <% field = "pcnt"+padded_bin(mnr, dimensions) %> result.${field} = (level_morton_number == ${mnr}); %endfor + %for mnr in range(2**dimensions): + <% field = "pwt"+padded_bin(mnr, dimensions) %> + result.${field} = (level_morton_number == ${mnr}) ? + refine_weights[user_srcntgt_id] : 0; + %endfor morton_nrs[i] = level_morton_number; return result; @@ -395,6 +403,7 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( particle_id_t *box_srcntgt_counts_cumul, morton_counts_t *box_morton_bin_counts, refine_weight_t *refine_weights, + refine_weight_t max_leaf_refine_weight, box_level_t *box_levels, box_level_t level, @@ -402,17 +411,20 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( box_id_t *nboxes, /* output */ + int *box_has_children, box_id_t *split_box_ids, """, preamble=r"""//CL:mako// scan_t count_new_boxes_needed( particle_id_t i, box_id_t box_id, + refine_weight_t max_leaf_refine_weight, __global box_id_t *nboxes, __global particle_id_t *box_srcntgt_starts, __global particle_id_t *box_srcntgt_counts_cumul, __global morton_counts_t *box_morton_bin_counts, __global box_level_t *box_levels, + __global int *box_has_children, // output/side effect box_level_t level ) { @@ -422,15 +434,22 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( if (i == 0) result += *nboxes; + particle_id_t first_particle_in_my_box = + box_srcntgt_starts[box_id]; + %if srcntgts_have_extent: - const particle_id_t nonchild_refine_weight = - box_morton_bin_counts[box_id].nonchild_refine_weight; + const particle_id_t nonchild_srcntgts_in_box = + box_morton_bin_counts[box_id].nonchild_srcntgts; %else: - const particle_id_t nonchild_refine_weight = 0; + const particle_id_t nonchild_srcntgts_in_box = 0; %endif - particle_id_t first_particle_in_my_box = - box_srcntgt_starts[box_id]; + // Get box refine weight. + refine_weight_t box_refine_weight = 0; + %for mnr in range(2**dimensions): + box_refine_weight = add_sat(box_refine_weight, + box_morton_bin_counts[box_id].pwt${padded_bin(mnr, dimensions)}); + %endfor // Add 2**d to make enough room for a split of the current box // This will be the split_box_id for *all* particles in this box, @@ -442,32 +461,34 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( // If srcntgts have extent, then prior-level boxes // will keep asking for more boxes to be allocated. // Prevent that. - && box_levels[box_id] + 1 == level %endif && %if adaptive: /* box overfull? */ - XXXXbox_srcntgt_counts_cumul[box_id] - nonchild_refine_weight - > 1 + box_refine_weight + > max_leaf_refine_weight %else: /* box non-empty? */ - XXXXbox_srcntgt_counts_cumul[box_id] - nonchild_refine_weight + /* Note: Refine weights are allowed to be 0, + so check # of particles directly. */ + box_srcntgt_counts_cumul[box_id] - nonchild_srcntgts_in_box > 0 %endif ) { result += ${2**dimensions}; + box_has_children[box_id] = 1; } return result; } """, input_expr="""count_new_boxes_needed( - i, srcntgt_box_ids[i], nboxes, + i, srcntgt_box_ids[i], max_leaf_refine_weight, nboxes, box_srcntgt_starts, box_srcntgt_counts_cumul, box_morton_bin_counts, - box_levels, level + box_levels, box_has_children, level )""", scan_expr="a + b", neutral="0", @@ -520,26 +541,7 @@ SPLIT_AND_SORT_KERNEL_TPL = Template(r"""//CL// dbg_printf(("postproc %d:\n", i)); dbg_printf((" my box id: %d\n", ibox)); - particle_id_t box_srcntgt_count = box_srcntgt_counts_cumul[ibox]; - - %if srcntgts_have_extent: - const particle_id_t nonchild_srcntgt_count = - box_morton_bin_counts[ibox].nonchild_srcntgts; - - %else: - const particle_id_t nonchild_srcntgt_count = 0; - %endif - - %if adaptive: - bool do_split_box = - box_srcntgt_count - nonchild_srcntgt_count - > max_particles_in_box; - %else: - bool do_split_box = - box_srcntgt_count - nonchild_srcntgt_count - > 0; - %endif - + bool do_split_box = box_has_children[ibox]; %if srcntgts_have_extent: ## Only do split-box processing for srcntgts that were touched ## on the immediately preceding level. @@ -642,9 +644,11 @@ SPLIT_AND_SORT_KERNEL_TPL = Template(r"""//CL// box_srcntgt_counts_cumul[new_box_id] = new_count; box_levels[new_box_id] = level; - // For a non-adaptive run, max_particles_in_box drives the - // level loop. - if (new_count > max_particles_in_box) + refine_weight_t new_weight = + my_box_morton_bin_counts.pwt${padded_bin(mnr, dimensions)}; + + // This drives the level loop. + if (new_weight > max_leaf_refine_weight) { *have_oversize_split_box = 1; } @@ -897,6 +901,7 @@ BOX_INFO_KERNEL_TPL = ElementwiseTemplate( particle_id_t *box_srcntgt_counts_cumul, particle_id_t *box_source_counts_cumul, particle_id_t *box_target_counts_cumul, + int *box_has_children, box_level_t *box_levels, box_level_t nlevels, @@ -966,24 +971,10 @@ BOX_INFO_KERNEL_TPL = ElementwiseTemplate( PYOPENCL_ELWISE_CONTINUE; } - else if ( - %if adaptive: - particle_count - nonchild_srcntgt_count > max_particles_in_box - %else: - particle_count - nonchild_srcntgt_count > 0 - %endif - && box_levels[box_id] + 1 < nlevels) + else if (box_has_children[box_id]) { // This box has children, it is not a leaf. - // That second condition there covers a weird corner case. It's - // obviously true--a last-level box won't have children. But why - // is it necessary? It turns out that nonchild_srcntgt_count is not - // available (i.e. zero) for boxes on the last level. So these boxes - // look like they got split if they have enough non-child srcntgts, - // to the first part of the 'if' condition. But in fact they weren't, - // because of their non-child srcntgts. - my_box_flags |= BOX_HAS_CHILDREN; %if sources_are_targets: @@ -1187,6 +1178,8 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, VectorArg(refine_weight_dtype, "refine_weights"), # [nsrcntgts] + ScalarArg(refine_weight_dtype, "max_leaf_refine_weight"), + # particle# at which each box starts VectorArg(particle_id_dtype, "box_srcntgt_starts"), # [nboxes] @@ -1249,11 +1242,13 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, ("box_id_t", box_id_dtype), ("morton_counts_t", morton_bin_count_dtype), ("box_level_t", box_level_dtype), + ("refine_weight_t", refine_weight_dtype), ), var_values=( ("dimensions", dimensions), ("srcntgts_have_extent", srcntgts_have_extent), ("adaptive", adaptive), + ("padded_bin", padded_bin), ), more_preamble=generic_preamble) @@ -1282,6 +1277,7 @@ def get_tree_build_kernel_info(context, dimensions, coord_dtype, VectorArg(np.int32, "have_oversize_split_box", with_offset=True), VectorArg(box_id_dtype, "new_srcntgt_box_ids", with_offset=True), VectorArg(box_level_dtype, "box_levels", with_offset=True), + VectorArg(np.int32, "box_has_children", with_offset=True), ], str(split_and_sort_kernel_source), name="split_and_sort", preamble=( diff --git a/test/test_tree.py b/test/test_tree.py index b3d1841..580ce88 100644 --- a/test/test_tree.py +++ b/test/test_tree.py @@ -84,7 +84,8 @@ def test_bounding_box(ctx_getter, dtype, dims, nparticles): # {{{ test basic (no source/target distinction) tree build def run_build_test(builder, queue, dims, dtype, nparticles, do_plot, - max_particles_in_box=30, **kwargs): + max_particles_in_box=None, max_leaf_refine_weight=None, + refine_weights=None, **kwargs): dtype = np.dtype(dtype) if dtype == np.float32: @@ -100,9 +101,14 @@ def run_build_test(builder, queue, dims, dtype, nparticles, do_plot, pytest.xfail("2D float doesn't work on POCL") logger.info(75*"-") - logger.info("%dD %s - %d particles - max %d per box - %s" % ( + if max_particles_in_box is not None: + logger.info("%dD %s - %d particles - max %d per box - %s" % ( dims, dtype.type.__name__, nparticles, max_particles_in_box, " - ".join("%s: %s" % (k, v) for k, v in six.iteritems(kwargs)))) + else: + logger.info("%dD %s - %d particles - max leaf weight %d - %s" % ( + dims, dtype.type.__name__, nparticles, max_leaf_refine_weight, + " - ".join("%s: %s" % (k, v) for k, v in six.iteritems(kwargs)))) logger.info(75*"-") particles = make_normal_particle_array(queue, nparticles, dims, dtype) @@ -114,8 +120,10 @@ def run_build_test(builder, queue, dims, dtype, nparticles, do_plot, queue.finish() tree, _ = builder(queue, particles, - max_particles_in_box=max_particles_in_box, debug=True, - **kwargs) + max_particles_in_box=max_particles_in_box, + refine_weights=refine_weights, + max_leaf_refine_weight=max_leaf_refine_weight, + debug=True, **kwargs) tree = tree.get(queue=queue) sorted_particles = np.array(list(tree.sources)) @@ -124,6 +132,9 @@ def run_build_test(builder, queue, dims, dtype, nparticles, do_plot, assert (sorted_particles == unsorted_particles[:, tree.user_source_ids]).all() + if refine_weights is not None: + refine_weights_reordered = refine_weights.get()[tree.user_source_ids] + all_good_so_far = True if do_plot: @@ -136,7 +147,6 @@ def run_build_test(builder, queue, dims, dtype, nparticles, do_plot, scaled_tol = tol*tree.root_extent for ibox in range(tree.nboxes): - # Empty boxes exist in non-pruned trees--which themselves are undocumented. # These boxes will fail these tests. if not (tree.box_flags[ibox] & bfe.HAS_OWN_SRCNTGTS): @@ -177,6 +187,23 @@ def run_build_test(builder, queue, dims, dtype, nparticles, do_plot, if not all_good_here: print("BAD BOX", ibox) + if not (tree.box_flags[ibox] & bfe.HAS_CHILDREN): + # Check that leaf particle density is as promised. + nparticles_in_box = tree.box_source_counts_cumul[ibox] + if max_particles_in_box is not None: + if nparticles_in_box > max_particles_in_box: + print("too many particles ({0} > {1}); box {2}".format( + nparticles_in_box, max_particles_in_box, ibox)) + all_good_here = False + else: + assert refine_weights is not None + box_weight = np.sum( + refine_weights_reordered[start:start+nparticles_in_box]) + if box_weight > max_leaf_refine_weight: + print("refine weight exceeded ({0} > {1}); box {2}".format( + box_weight, max_leaf_refine_weight, ibox)) + all_good_here = False + all_good_so_far = all_good_so_far and all_good_here if do_plot: @@ -191,10 +218,6 @@ def particle_tree_test_decorator(f): f = pytest.mark.parametrize("dtype", [np.float64, np.float32])(f) f = pytest.mark.parametrize("dims", [2, 3])(f) - def wrapper(*args, **kwargs): - logging.basicConfig(level=logging.INFO) - f(*args, **kwargs) - return f @@ -207,7 +230,7 @@ def test_single_boxparticle_tree(ctx_getter, dtype, dims, do_plot=False): builder = TreeBuilder(ctx) run_build_test(builder, queue, dims, - dtype, 4, do_plot=do_plot) + dtype, 4, max_particles_in_box=30, do_plot=do_plot) @particle_tree_test_decorator @@ -219,7 +242,7 @@ def test_two_level_particle_tree(ctx_getter, dtype, dims, do_plot=False): builder = TreeBuilder(ctx) run_build_test(builder, queue, dims, - dtype, 50, do_plot=do_plot) + dtype, 50, max_particles_in_box=30, do_plot=do_plot) @particle_tree_test_decorator @@ -232,7 +255,7 @@ def test_unpruned_particle_tree(ctx_getter, dtype, dims, do_plot=False): # test unpruned tree build run_build_test(builder, queue, dims, dtype, 10**5, - do_plot=do_plot, skip_prune=True) + do_plot=do_plot, max_particles_in_box=30, skip_prune=True) @particle_tree_test_decorator @@ -244,7 +267,7 @@ def test_particle_tree_with_reallocations(ctx_getter, dtype, dims, do_plot=False builder = TreeBuilder(ctx) run_build_test(builder, queue, dims, dtype, 10**5, - do_plot=do_plot, nboxes_guess=5) + max_particles_in_box=30, do_plot=do_plot, nboxes_guess=5) @particle_tree_test_decorator @@ -257,7 +280,7 @@ def test_particle_tree_with_many_empty_leaves( builder = TreeBuilder(ctx) run_build_test(builder, queue, dims, dtype, 10**5, - do_plot=do_plot, max_particles_in_box=5) + max_particles_in_box=5, do_plot=do_plot) @particle_tree_test_decorator @@ -269,6 +292,34 @@ def test_vanilla_particle_tree(ctx_getter, dtype, dims, do_plot=False): builder = TreeBuilder(ctx) run_build_test(builder, queue, dims, dtype, 10**5, + max_particles_in_box=30, do_plot=do_plot) + + +@particle_tree_test_decorator +def test_explicit_refine_weights_particle_tree(ctx_getter, dtype, dims, + do_plot=False): + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + from boxtree import TreeBuilder + builder = TreeBuilder(ctx) + + nparticles = 10**5 + + if (dims == 3 + and queue.device.platform.name == "Portable Computing Language"): + pytest.xfail("suspected compiler issue with POCL") + + from pyopencl.clrandom import PhiloxGenerator + import random + random.seed(10) + rng = PhiloxGenerator(ctx) + refine_weights = cl.array.empty(queue, nparticles, np.int32) + evt = rng.fill_uniform(refine_weights, a=1, b=10) + cl.wait_for_events([evt]) + + run_build_test(builder, queue, dims, dtype, nparticles, + refine_weights=refine_weights, max_leaf_refine_weight=100, do_plot=do_plot) @@ -281,7 +332,7 @@ def test_non_adaptive_particle_tree(ctx_getter, dtype, dims, do_plot=False): builder = TreeBuilder(ctx) run_build_test(builder, queue, dims, dtype, 10**4, - do_plot=do_plot, non_adaptive=True) + max_particles_in_box=30, do_plot=do_plot, non_adaptive=True) # }}} -- GitLab From 5b8e8964111de300ab0d749ad32940f0bd2c37cb Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 5 Aug 2016 23:09:37 -0500 Subject: [PATCH 03/20] Elaborate on the tree build documentation. --- boxtree/tree_build_kernels.py | 37 +++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/boxtree/tree_build_kernels.py b/boxtree/tree_build_kernels.py index d33e5e0..f57ac16 100644 --- a/boxtree/tree_build_kernels.py +++ b/boxtree/tree_build_kernels.py @@ -73,21 +73,34 @@ logger = logging.getLogger(__name__) # HOW DOES THE PRIMARY SCAN WORK? # ------------------------------- # -# This code sorts particles into an nD-tree of boxes. It does this by doing a -# (parallel) scan over particles and a (local, i.e. independent for each particle) -# postprocessing step for each level. +# This code sorts particles into an nD-tree of boxes. It does this by doing two +# succesive (parallel) scans over particles and a (local, i.e. independent for +# each particle) postprocessing step for each level. # -# The following information is being pushed around by the scan, which -# proceeds over particles: +# The following information is being pushed around by the scans, which +# proceed over particles: # -# - a cumulative count ("counts") of particles in each subbox ("morton_nr") at -# the current level, should the current box need to be subdivided. +# - a cumulative count ("pcnt") and weight ("pwt") of particles in each subbox +# ("morton_nr") at the current level, should the current box need to be +# subdivided. # -# - the "split_box_id". The very first entry here gets intialized to -# the number of boxes present at the previous level. If a box knows it needs to -# be subdivided, its first particle asks for 2**d new boxes. This gets scanned -# over by summing globally (unsegmented-ly). The splits are then realized in -# the post-processing step. +# - the "split_box_id". This is the box number that the particle gets pushed +# into. The very first entry here gets initialized to the number of boxes +# present at the previous level. +# +# Using this data, the stages of the algorithm proceeds as follow: +# +# 1. Count the number of particles in each subbox. This stage uses a segmented +# (per-box) scan to fill "pcnt" and "pwt". +# +# 2. Using a global (non-segmented) scan over the particles, make a decision +# whether to refine each box and compute the total number of new boxes +# needed. This stage also computes the split_box_id for each particle. If a +# box knows it needs to be subdivided, its first particle asks for 2**d new +# boxes. +# +# 3. Realize the splitting determined in #2. This stage proceeds in an +# element-wise fashion over the boxes at the current level. # # ----------------------------------------------------------------------------- -- GitLab From 348416d85d4f3a88cad52f5483c9db4723b921dd Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 5 Aug 2016 23:54:05 -0500 Subject: [PATCH 04/20] Add an add_sat() workaround. --- boxtree/tree_build_kernels.py | 14 +++++++++++--- test/test_tree.py | 4 ---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/boxtree/tree_build_kernels.py b/boxtree/tree_build_kernels.py index f57ac16..e6174d7 100644 --- a/boxtree/tree_build_kernels.py +++ b/boxtree/tree_build_kernels.py @@ -225,6 +225,12 @@ SCAN_PREAMBLE_TPL = Template(r"""//CL// // }}} + inline int my_add_sat(int a, int b) + { + long result = (long) a + b; + return (result > INT_MAX) ? INT_MAX : result; + } + // {{{ scan 'add' operation scan_t scan_t_add(scan_t a, scan_t b, bool across_seg_boundary) { @@ -240,9 +246,11 @@ SCAN_PREAMBLE_TPL = Template(r"""//CL// %endfor %for mnr in range(2**dimensions): <% field = "pwt"+padded_bin(mnr, dimensions) %> - // XXX: This add_sat() seems to be miscompiled on POCL. - // Replace with + to make it work. - b.${field} = add_sat(a.${field}, b.${field}); + // XXX: The use of add_sat() seems to be causing trouble + // with multiple compilers. + // 1. POCL will miscompile and not work/crash. + // 2. Intel will seemingly go into an infinite loop. + b.${field} = my_add_sat(a.${field}, b.${field}); %endfor } diff --git a/test/test_tree.py b/test/test_tree.py index 580ce88..8ebdc1c 100644 --- a/test/test_tree.py +++ b/test/test_tree.py @@ -306,10 +306,6 @@ def test_explicit_refine_weights_particle_tree(ctx_getter, dtype, dims, nparticles = 10**5 - if (dims == 3 - and queue.device.platform.name == "Portable Computing Language"): - pytest.xfail("suspected compiler issue with POCL") - from pyopencl.clrandom import PhiloxGenerator import random random.seed(10) -- GitLab From de1cc4d1259f114f5a595f40568cd2fea1846af0 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 6 Aug 2016 00:35:01 -0500 Subject: [PATCH 05/20] Replace the other add_sat() with my_add_sat(). --- boxtree/tree_build_kernels.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/boxtree/tree_build_kernels.py b/boxtree/tree_build_kernels.py index e6174d7..f97439c 100644 --- a/boxtree/tree_build_kernels.py +++ b/boxtree/tree_build_kernels.py @@ -436,6 +436,12 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( box_id_t *split_box_ids, """, preamble=r"""//CL:mako// + inline int my_add_sat(int a, int b) + { + long result = (long) a + b; + return (result > INT_MAX) ? INT_MAX : result; + } + scan_t count_new_boxes_needed( particle_id_t i, box_id_t box_id, @@ -468,7 +474,7 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( // Get box refine weight. refine_weight_t box_refine_weight = 0; %for mnr in range(2**dimensions): - box_refine_weight = add_sat(box_refine_weight, + box_refine_weight = my_add_sat(box_refine_weight, box_morton_bin_counts[box_id].pwt${padded_bin(mnr, dimensions)}); %endfor -- GitLab From 3d66aebd7754a2cc8ec4a8c3cbf683957525c348 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 6 Aug 2016 00:45:56 -0500 Subject: [PATCH 06/20] (Experiment) Replace my_add_sat() with a+b, just too see what happens. --- boxtree/tree_build_kernels.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/boxtree/tree_build_kernels.py b/boxtree/tree_build_kernels.py index f97439c..eae2a8d 100644 --- a/boxtree/tree_build_kernels.py +++ b/boxtree/tree_build_kernels.py @@ -227,8 +227,7 @@ SCAN_PREAMBLE_TPL = Template(r"""//CL// inline int my_add_sat(int a, int b) { - long result = (long) a + b; - return (result > INT_MAX) ? INT_MAX : result; + return a + b; } // {{{ scan 'add' operation @@ -438,8 +437,7 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( preamble=r"""//CL:mako// inline int my_add_sat(int a, int b) { - long result = (long) a + b; - return (result > INT_MAX) ? INT_MAX : result; + return a + b; } scan_t count_new_boxes_needed( -- GitLab From a80d70a517b6cc521465f6cedcaa5616ad683b20 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 6 Aug 2016 02:37:17 -0500 Subject: [PATCH 07/20] Don't add box_has_children as an attribute to the tree. --- boxtree/tree_build.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index da5526c..fad99be 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -947,6 +947,8 @@ class TreeBuilder(object): # }}} + del box_has_children + # {{{ build output extra_tree_attrs = {} @@ -965,7 +967,6 @@ class TreeBuilder(object): sources_are_targets=sources_are_targets, sources_have_extent=sources_have_extent, targets_have_extent=targets_have_extent, - box_has_children=box_has_children, particle_id_dtype=knl_info.particle_id_dtype, box_id_dtype=knl_info.box_id_dtype, -- GitLab From 6f54142b10987cd9eabd0283d53bb17b78f8c284 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 6 Aug 2016 16:33:47 -0500 Subject: [PATCH 08/20] Fix error message; allow negative refine weights. --- boxtree/tree_build.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index fad99be..797e5e1 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -269,9 +269,10 @@ class TreeBuilder(object): % refine_weight_dtype) if max_leaf_refine_weight < cl.array.max(refine_weights).get(): - raise ValueError("woops") - if max_leaf_refine_weight <= 0: - raise ValueError("max_leaf_refine_weight must be positive") + raise ValueError( + "entries of refine_weights cannot exceed max_leaf_refine_weight") + if max_leaf_refine_weight < 0: + raise ValueError("max_leaf_refine_weight must be nonnegative") total_refine_weight = cl.array.sum(refine_weights).get() -- GitLab From bd6ad88a5ede2e7b43cbfde87c41895dcc1221e0 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 6 Aug 2016 16:34:07 -0500 Subject: [PATCH 09/20] Revert "(Experiment) Replace my_add_sat() with a+b, just too see what happens." This reverts commit 3d66aebd7754a2cc8ec4a8c3cbf683957525c348. --- boxtree/tree_build_kernels.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/boxtree/tree_build_kernels.py b/boxtree/tree_build_kernels.py index eae2a8d..f97439c 100644 --- a/boxtree/tree_build_kernels.py +++ b/boxtree/tree_build_kernels.py @@ -227,7 +227,8 @@ SCAN_PREAMBLE_TPL = Template(r"""//CL// inline int my_add_sat(int a, int b) { - return a + b; + long result = (long) a + b; + return (result > INT_MAX) ? INT_MAX : result; } // {{{ scan 'add' operation @@ -437,7 +438,8 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( preamble=r"""//CL:mako// inline int my_add_sat(int a, int b) { - return a + b; + long result = (long) a + b; + return (result > INT_MAX) ? INT_MAX : result; } scan_t count_new_boxes_needed( -- GitLab From afa79b8a0c1bdc4a9e55cb0058d906a3c7cbc6ee Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 6 Aug 2016 16:37:57 -0500 Subject: [PATCH 10/20] Fix refine weight validation one more time. --- boxtree/tree_build.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 797e5e1..6d61a2f 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -271,8 +271,10 @@ class TreeBuilder(object): if max_leaf_refine_weight < cl.array.max(refine_weights).get(): raise ValueError( "entries of refine_weights cannot exceed max_leaf_refine_weight") - if max_leaf_refine_weight < 0: - raise ValueError("max_leaf_refine_weight must be nonnegative") + if 0 > cl.array.min(refine_weights).get(): + raise ValueError("all entries of refine_weights must be nonnegative") + if max_leaf_refine_weight <= 0: + raise ValueError("max_leaf_refine_weight must be positive") total_refine_weight = cl.array.sum(refine_weights).get() -- GitLab From a49ca013445d83595e35480d728bab1a74de09ee Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 6 Aug 2016 16:43:16 -0500 Subject: [PATCH 11/20] Fix doc typos. --- boxtree/tree_build_kernels.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/boxtree/tree_build_kernels.py b/boxtree/tree_build_kernels.py index f97439c..ce28843 100644 --- a/boxtree/tree_build_kernels.py +++ b/boxtree/tree_build_kernels.py @@ -88,13 +88,13 @@ logger = logging.getLogger(__name__) # into. The very first entry here gets initialized to the number of boxes # present at the previous level. # -# Using this data, the stages of the algorithm proceeds as follow: +# Using this data, the stages of the algorithm proceed as follow: # # 1. Count the number of particles in each subbox. This stage uses a segmented # (per-box) scan to fill "pcnt" and "pwt". # # 2. Using a global (non-segmented) scan over the particles, make a decision -# whether to refine each box and compute the total number of new boxes +# whether to refine each box, and compute the total number of new boxes # needed. This stage also computes the split_box_id for each particle. If a # box knows it needs to be subdivided, its first particle asks for 2**d new # boxes. -- GitLab From 04b7c4d1a5117fda951f8c7dac21e2c548644e58 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 6 Aug 2016 16:44:05 -0500 Subject: [PATCH 12/20] Fix one more typo. --- boxtree/tree_build_kernels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/tree_build_kernels.py b/boxtree/tree_build_kernels.py index ce28843..e86a161 100644 --- a/boxtree/tree_build_kernels.py +++ b/boxtree/tree_build_kernels.py @@ -88,7 +88,7 @@ logger = logging.getLogger(__name__) # into. The very first entry here gets initialized to the number of boxes # present at the previous level. # -# Using this data, the stages of the algorithm proceed as follow: +# Using this data, the stages of the algorithm proceed as follows: # # 1. Count the number of particles in each subbox. This stage uses a segmented # (per-box) scan to fill "pcnt" and "pwt". -- GitLab From cc38becfd28a698f47725c542a932531ee06c264 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 6 Aug 2016 16:48:02 -0500 Subject: [PATCH 13/20] Tweak nboxes_guess code. --- boxtree/tree_build.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 6d61a2f..2bd561d 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -338,13 +338,13 @@ class TreeBuilder(object): # you *must* also write reallocation code down below for the case when # nboxes_guess was too low. - nboxes_guess = kwargs.get("nboxes_guess") # Outside nboxes_guess feeding is solely for debugging purposes, # to test the reallocation code. + nboxes_guess = kwargs.get("nboxes_guess") if nboxes_guess is None: - nboxes_guess = 2**dimensions * int( + nboxes_guess = 2**dimensions * ( (max_leaf_refine_weight + total_refine_weight - 1) - / max_leaf_refine_weight) + // max_leaf_refine_weight) assert nboxes_guess > 0 -- GitLab From 743e8933601a9e2cb657910704c98367a8774b6c Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 8 Aug 2016 14:04:44 -0500 Subject: [PATCH 14/20] Be careful about overflow. --- boxtree/tree_build.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 2bd561d..3491241 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -276,7 +276,8 @@ class TreeBuilder(object): if max_leaf_refine_weight <= 0: raise ValueError("max_leaf_refine_weight must be positive") - total_refine_weight = cl.array.sum(refine_weights).get() + total_refine_weight = cl.array.sum( + refine_weights, dtype=np.dtype(np.int64)).get() del max_particles_in_box del specified_max_particles_in_box -- GitLab From 99e0b9d6b68d8ac22716fe8adde290ddc1414ba3 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 8 Aug 2016 14:04:57 -0500 Subject: [PATCH 15/20] Elaborate on the bug in the comments, remove second my_add_sat(). --- boxtree/tree_build_kernels.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/boxtree/tree_build_kernels.py b/boxtree/tree_build_kernels.py index e86a161..b24c3b7 100644 --- a/boxtree/tree_build_kernels.py +++ b/boxtree/tree_build_kernels.py @@ -247,9 +247,11 @@ SCAN_PREAMBLE_TPL = Template(r"""//CL// %for mnr in range(2**dimensions): <% field = "pwt"+padded_bin(mnr, dimensions) %> // XXX: The use of add_sat() seems to be causing trouble - // with multiple compilers. - // 1. POCL will miscompile and not work/crash. - // 2. Intel will seemingly go into an infinite loop. + // with multiple compilers. For d=3: + // 1. POCL will miscompile and either give wrong + // results or crash. + // 2. Intel will use a large amount of memory. + // Versions tested: POCL 0.13, Intel OpenCL 16.1 b.${field} = my_add_sat(a.${field}, b.${field}); %endfor } @@ -436,12 +438,6 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( box_id_t *split_box_ids, """, preamble=r"""//CL:mako// - inline int my_add_sat(int a, int b) - { - long result = (long) a + b; - return (result > INT_MAX) ? INT_MAX : result; - } - scan_t count_new_boxes_needed( particle_id_t i, box_id_t box_id, @@ -474,7 +470,7 @@ SPLIT_BOX_ID_SCAN_TPL = ScanTemplate( // Get box refine weight. refine_weight_t box_refine_weight = 0; %for mnr in range(2**dimensions): - box_refine_weight = my_add_sat(box_refine_weight, + box_refine_weight = add_sat(box_refine_weight, box_morton_bin_counts[box_id].pwt${padded_bin(mnr, dimensions)}); %endfor -- GitLab From 1f7855dde1a6db4a39405ff80eb5c7749c84ae2a Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 8 Aug 2016 14:56:53 -0500 Subject: [PATCH 16/20] Bump Python version for Intel CL builds. --- .gitlab-ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6997187..614c2e0 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,12 +1,12 @@ -Python 2.7 Intel CPU: +Python 3.5 Intel CPU: script: - - py_version=2.7 + - py_version=3.5 - export PYOPENCL_TEST=int:pu - EXTRA_INSTALL="numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: - - python2.7 + - python3.5 - intel-cl-cpu except: - tags -- GitLab From 1428cc71a1a9e8b50d606daf3d1627d68cf7a992 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 9 Aug 2016 18:49:39 -0500 Subject: [PATCH 17/20] Fix duplication. --- boxtree/tree_build.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 5dfe883..7ba2cc0 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -24,8 +24,6 @@ THE SOFTWARE. from six.moves import range, zip -from six.moves import range, zip - import numpy as np from pytools import memoize_method import pyopencl as cl -- GitLab From 3f58e302d4f61bfb43dad6d6cf161b2a7f863c38 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 9 Aug 2016 18:51:12 -0500 Subject: [PATCH 18/20] Whitespace fix. --- boxtree/tree_build.py | 1 + 1 file changed, 1 insertion(+) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 7ba2cc0..226ef26 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -22,6 +22,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ + from six.moves import range, zip import numpy as np -- GitLab From ef63a2ef3da92d4b976349c246e77ee20a56b5b4 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 9 Aug 2016 18:58:31 -0500 Subject: [PATCH 19/20] Documentation. --- boxtree/tree_build.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 226ef26..e0bcb1c 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -85,14 +85,21 @@ class TreeBuilder(object): Must have the same (inner) dtype as *particles*. :arg source_radii: If not *None*, a :class:`pyopencl.array.Array` of the same dtype as *particles*. + If this is given, *targets* must also be given, i.e. sources and targets must be separate. See :ref:`extent`. :arg target_radii: Like *source_radii*, but for targets. :arg stick_out_factor: See :attr:`Tree.stick_out_factor` and :ref:`extent`. :arg refine_weights: If not *None*, a :class:`pyopencl.array.Array` of the - type :class:`numpy.float32`. A box will be split if it has a cumulative - refine_weight greater than 1. + type :class:`numpy.int32`. A box will be split if it has a cumulative + refine_weight greater than *max_leaf_refine_weight*. If this is given, + *max_leaf_refine_weight* must also be given. + :arg max_leaf_refine_weight: If not *None*, specifies the maximum weight + of a leaf box. + :arg max_particles_in_box: If not *None*, specifies the maximum number + of particles in a leaf box. If this is given, both + *refine_weights* and *max_leaf_refine_weight* must be *None*. :arg wait_for: may either be *None* or a list of :class:`pyopencl.Event` instances for whose completion this command waits before starting execution. @@ -162,7 +169,7 @@ class TreeBuilder(object): def zeros(shape, dtype): result = (cl.array.empty(queue, shape, dtype, allocator=allocator) - .fill(0)) + .fill(0, wait_for=wait_for)) event, = result.events return result, event -- GitLab From 930c5d83c0c7c520d4700056ac7d34869f88f72a Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 9 Aug 2016 19:06:45 -0500 Subject: [PATCH 20/20] Documentation tweak. --- boxtree/tree_build.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index e0bcb1c..f84c9ed 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -94,7 +94,8 @@ class TreeBuilder(object): :arg refine_weights: If not *None*, a :class:`pyopencl.array.Array` of the type :class:`numpy.int32`. A box will be split if it has a cumulative refine_weight greater than *max_leaf_refine_weight*. If this is given, - *max_leaf_refine_weight* must also be given. + *max_leaf_refine_weight* must also be given and *max_particles_in_box* + must be *None*. :arg max_leaf_refine_weight: If not *None*, specifies the maximum weight of a leaf box. :arg max_particles_in_box: If not *None*, specifies the maximum number -- GitLab