From 6c40f8f8b2a72c55f56c2166372bdd6ab206cc2d Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 29 May 2019 17:14:37 -0500 Subject: [PATCH 01/57] Start to teach the traversal builder to compute rotation angles for List 2. --- boxtree/traversal.py | 24 ++++++++++++++++ test/test_traversal.py | 63 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 2d60f68..4d61141 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1534,6 +1534,17 @@ class FMMTraversalInfo(DeviceDataRecord): ``box_id_t [*]`` + To facilitate rotation-based translations ("point and shoot"), the following + lists record the angle between box translation pairs and the z-axis. + + .. attributes:: from_sep_siblings_rotation_classes + + ``int32 [*]`` + + .. attributes:: from_sep_siblings_rotation_class_to_angle + + ``double [nfrom_sep_siblings_rotation_classes]`` + .. ------------------------------------------------------------------------ .. rubric:: Separated Smaller Boxes ("List 3") .. ------------------------------------------------------------------------ @@ -1695,6 +1706,10 @@ class FMMTraversalInfo(DeviceDataRecord): def ntarget_or_target_parent_boxes(self): return len(self.target_or_target_parent_boxes) + @property + def nfrom_sep_siblings_rotation_classes(self): + return len(self.from_sep_siblings_rotation_class_to_angle) + # }}} @@ -2179,6 +2194,11 @@ class FMMTraversalBuilder: wait_for = [evt] from_sep_siblings = result["from_sep_siblings"] + from_sep_siblings_rotation_classes = ( + cl.array.zeros(queue, len(from_sep_siblings.lists), np.int32)) + from_sep_siblings_rotation_class_to_angle = ( + cl.array.zeros(queue, 1, np.float64)) + # }}} with_extent = tree.sources_have_extent or tree.targets_have_extent @@ -2340,6 +2360,10 @@ class FMMTraversalBuilder: from_sep_siblings_starts=from_sep_siblings.starts, from_sep_siblings_lists=from_sep_siblings.lists, + from_sep_siblings_rotation_classes=( + from_sep_siblings_rotation_classes), + from_sep_siblings_rotation_class_to_angle=( + from_sep_siblings_rotation_class_to_angle), from_sep_smaller_by_level=from_sep_smaller_by_level, target_boxes_sep_smaller_by_source_level=( diff --git a/test/test_traversal.py b/test/test_traversal.py index 32645a2..e89bb99 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -346,6 +346,69 @@ def plot_traversal(ctx_factory, do_plot=False, well_sep_is_n_away=1): # }}} +# {{{ test_from_sep_siblings_rotation_classes + +@pytest.mark.parametrize("well_sep_is_n_away", (1, 2)) +def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + dims = 3 + nparticles = 10**4 + dtype = np.float64 + + # {{{ build tree + + from pyopencl.clrandom import PhiloxGenerator + rng = PhiloxGenerator(queue.context, seed=15) + + from pytools.obj_array import make_obj_array + particles = make_obj_array([ + rng.normal(queue, nparticles, dtype=dtype) + for i in range(dims)]) + + from boxtree import TreeBuilder + tb = TreeBuilder(ctx) + + queue.finish() + tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) + + # }}} + + # {{{ build traversal + + from boxtree.traversal import FMMTraversalBuilder + tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away) + trav, _ = tg(queue, tree) + + tree = tree.get(queue=queue) + trav = trav.get(queue=queue) + + centers = tree.box_centers.T + + # }}} + + # For each entry of from_sep_siblings, compute the source-target translation + # direction as a vector, and check that the from_sep_siblings rotation class + # in the traversal corresponds to the angle with the z-axis of the + # translation direction. + + for itgt_box, tgt_ibox in enumerate(trav.target_or_target_parent_boxes): + start, end = trav.from_sep_siblings_starts[itgt_box:itgt_box+2] + seps = trav.from_sep_siblings_lists[start:end] + rot_classes = trav.from_sep_siblings_rotation_classes[start:end] + + center = centers[tgt_ibox] + for box, rot_class in zip(seps, rot_classes): + translation_vec = centers[box] - center + cos_theta = translation_vec[2] / la.norm(translation_vec) + assert np.isclose( + cos_theta, + np.cos(trav.from_sep_siblings_rotation_class_to_angle[rot_class])) + +# }}} + + # You can test individual routines by typing # $ python test_traversal.py 'test_routine(cl.create_some_context)' -- GitLab From 44d7e5fcb4f5e1bbc84e3efaf9f2fa28c1dd1f2f Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 29 May 2019 23:33:55 -0500 Subject: [PATCH 02/57] Add a list renumberer tool. --- boxtree/tools.py | 122 ++++++++++++++++++++++++++++++++++++++++----- test/test_tools.py | 15 ++++++ 2 files changed, 125 insertions(+), 12 deletions(-) diff --git a/boxtree/tools.py b/boxtree/tools.py index 56180ac..18a800a 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -580,25 +580,38 @@ from mako.template import Template BINARY_SEARCH_TEMPLATE = Template(""" -inline size_t bsearch(__global ${idx_t} *starts, size_t len, ${idx_t} val) +/* + * Returns the smallest value of i such that val <= arr[i]. + * If no such value exists, returns (size_t) -1. + */ +inline size_t bsearch( + __global const ${elem_t} *arr, + size_t len, + const ${elem_t} val) { - size_t l_idx = 0, r_idx = len - 1, my_idx; - for (;;) + if (arr[len - 1] < val) { - my_idx = (l_idx + r_idx) / 2; + return -1; + } + + size_t l = 0, r = len, i; + + while (1) + { + i = l + (r - l) / 2; - if (starts[my_idx] <= val && val < starts[my_idx + 1]) + if (val <= arr[i] && (i == 0 || arr[i - 1] < val)) { - return my_idx; + return i; } - if (starts[my_idx] > val) + if (arr[i] < val) { - r_idx = my_idx - 1; + l = i; } else { - l_idx = my_idx + 1; + r = i; } } } @@ -607,12 +620,97 @@ inline size_t bsearch(__global ${idx_t} *starts, size_t len, ${idx_t} val) class InlineBinarySearch(object): - def __init__(self, idx_t): - self.idx_t = idx_t + def __init__(self, elem_type_name): + self.render_vars = {"elem_t": elem_type_name} @memoize_method def __str__(self): - return BINARY_SEARCH_TEMPLATE.render(idx_t=self.idx_t) + return BINARY_SEARCH_TEMPLATE.render(**self.render_vars) + +# }}} + + +# {{{ list renumberer + +LIST_RENUMBERER_TEMPLATE = ElementwiseTemplate( + arguments=r""" + elem_t *orig, + elem_t *sorted, + renumbered_t *renumbered_orig, + unsigned int len, + """, + operation=r"""//CL// + renumbered_orig[i] = bsearch(sorted, len, orig[i]); + """, + name="list_renumberer") + + +class ListRenumberer(object): + """Renumber a list (with repetition) of N values to the range [0,1,...,N). + + Useful e.g. for compacting lists into a dense range. + + Returns a tuple (*renumbered*, *new_to_old*, *evt*), where *renumbered* is + the renumbered list and *new_to_old* maps renumbered values to new ones. + + Example:: + + >>> arr = cl.array.to_device(queue, np.array([1, 6, 6, 4, 3])) + >>> lr = ListRenumberer(ctx, np.int) + >>> renumbered, new_to_old, evt = lr(arr) + >>> renumbered + array([0, 3, 3, 2, 1]) + >>> new_to_old + array([1, 3, 4, 6]) + + """ + + def __init__(self, context, from_element_dtype, to_element_dtype=None): + from pyopencl.algorithm import RadixSort, unique + + if to_element_dtype is None: + to_element_dtype = from_element_dtype + + self.sort = RadixSort( + context, + [VectorArg(from_element_dtype, "arr")], + "arr[i]", + ["arr"]) + + self.uniq = unique + + self.list_renumberer = ( + LIST_RENUMBERER_TEMPLATE.build( + context, + type_aliases=( + ("elem_t", from_element_dtype), + ("renumbered_t", to_element_dtype), + ), + var_values=(), + more_preamble=str(InlineBinarySearch("elem_t")))) + + self.to_element_dtype = to_element_dtype + + def __call__(self, items, queue=None, wait_for=None): + if queue is None: + queue = items.queue + + (sorted_items,), evt = self.sort(items, queue=queue, wait_for=wait_for) + + new_to_old, uniq_count, evt = self.uniq(sorted_items, wait_for=[evt]) + uniq_count = uniq_count.get() + + renumbered_items = cl.array.empty( + queue, len(items), dtype=self.to_element_dtype) + + new_to_old = new_to_old[:uniq_count] + + evt = self.list_renumberer( + items, new_to_old, renumbered_items, len(new_to_old), + queue=queue, + wait_for=[evt] + renumbered_items.events + new_to_old.events) + + return renumbered_items, new_to_old, evt # }}} diff --git a/test/test_tools.py b/test/test_tools.py index 2da427e..530bb8b 100644 --- a/test/test_tools.py +++ b/test/test_tools.py @@ -63,6 +63,21 @@ def test_device_record(): assert np.array_equal(record_host.obj_array[i], record.obj_array[i]) +def test_list_renumberer(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + from boxtree.tools import ListRenumberer + + arr = cl.array.to_device(queue, np.array([9, 4, 1, 4, 5, 6, 9])) + lr = ListRenumberer(ctx, np.int) + + (renumbered_arr, new_to_old), _ = lr(arr) + + assert (renumbered_arr.get() == np.array([4, 1, 0, 1, 2, 3, 4])).all() + assert (new_to_old.get() == np.array([1, 4, 5, 6, 9])).all() + + @pytest.mark.parametrize("array_factory", (p_normal, p_surface, p_uniform)) @pytest.mark.parametrize("dim", (2, 3)) @pytest.mark.parametrize("dtype", (np.float32, np.float64)) -- GitLab From 9e6d1abca1850020db415a30f965327a24c475c0 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 29 May 2019 23:35:15 -0500 Subject: [PATCH 03/57] Fix test --- test/test_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_tools.py b/test/test_tools.py index 530bb8b..0cbccd6 100644 --- a/test/test_tools.py +++ b/test/test_tools.py @@ -72,7 +72,7 @@ def test_list_renumberer(ctx_factory): arr = cl.array.to_device(queue, np.array([9, 4, 1, 4, 5, 6, 9])) lr = ListRenumberer(ctx, np.int) - (renumbered_arr, new_to_old), _ = lr(arr) + renumbered_arr, new_to_old, _ = lr(arr) assert (renumbered_arr.get() == np.array([4, 1, 0, 1, 2, 3, 4])).all() assert (new_to_old.get() == np.array([1, 4, 5, 6, 9])).all() -- GitLab From 2dfe64f4bfa30cff24b80339c4757ca1ce77d916 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 30 May 2019 00:49:28 -0500 Subject: [PATCH 04/57] Get rotation class finder to work. --- boxtree/tools.py | 3 +- boxtree/traversal.py | 193 +++++++++++++++++++++++++++++++++++++++-- test/test_tools.py | 12 +-- test/test_traversal.py | 12 +-- 4 files changed, 199 insertions(+), 21 deletions(-) diff --git a/boxtree/tools.py b/boxtree/tools.py index 18a800a..014728c 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -675,7 +675,8 @@ class ListRenumberer(object): context, [VectorArg(from_element_dtype, "arr")], "arr[i]", - ["arr"]) + ["arr"], + key_dtype=from_element_dtype) self.uniq = unique diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 4d61141..4e22fce 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -171,6 +171,8 @@ typedef ${dtype_to_ctype(box_id_dtype)} box_id_t; <% vec_types_dict = dict(vec_types) %> typedef ${dtype_to_ctype(coord_dtype)} coord_t; typedef ${dtype_to_ctype(vec_types_dict[coord_dtype, dimensions])} coord_vec_t; +typedef ${dtype_to_ctype( + vec_types_dict[np.dtype(np.int32), dimensions])} int_coord_vec_t; #define COORD_T_MACH_EPS ((coord_t) ${ repr(np.finfo(coord_dtype).eps) }) @@ -580,6 +582,24 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t target_box_number) # {{{ from well-separated siblings ("list 2") FROM_SEP_SIBLINGS_TEMPLATE = r"""//CL// +/* + * Return an integer vector indicating the translation direction as a multiple + * of the box radius. + */ +inline int_coord_vec_t get_translation_vector( + coord_t root_extent, + int level, + coord_vec_t source_center, + coord_vec_t target_center) +{ + int_coord_vec_t result = (int_coord_vec_t) 0; + %for i in range(dimensions): + result.s${i} = rint( + (target_center.s${i} - source_center.s${i}) + / LEVEL_TO_RAD(level)); + %endfor + return result; +} void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) { @@ -619,7 +639,10 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) if (sep) { + int_coord_vec_t vec = get_translation_vector( + root_extent, level, center, sib_center); APPEND_from_sep_siblings(sib_box_id); + APPEND_translation_vectors(vec); } } } @@ -1344,6 +1367,141 @@ class _ListMerger(object): # }}} +# {{{ rotation classes finder + +ROTATION_ANGLE_FINDER_PREAMBLE_TEMPLATE = Template(r"""//CL:mako// + inline int gcd(int a, int b) + { + while (b) + { + int tmp = a; + a = b; + b = tmp % b; + } + return a; + } + + /* + * Normalizes a integer vector by dividing it by its (absolute) GCD. + */ + inline int_coord_vec_t gcd_normalize(int_coord_vec_t vec) + { + int vec_gcd = abs(vec.s0); + + %for i in range(1, dimensions): + vec_gcd = gcd(vec_gcd, abs(vec.s${i})); + %endfor + + return vec / vec_gcd; + } + """, + strict_undefined=True) + + +ROTATION_ANGLE_FINDER_TEMPLATE = ElementwiseTemplate( + arguments=r"""//CL:mako// + /* input: */ + int_coord_vec_t *translation_vectors, + + /* output: */ + coord_t *rotation_angles, + """, + + operation=r"""//CL:mako// + /* + * Normalize the translation vector (by dividing by its GCD). + * + * We need this, because generally in in floating point arithmetic, + * if k is a scalar and v is a vector, we can't assume + * + * kv[-1] / sqrt(|kv|^2) == v[-1] / sqrt(|v|^2) + */ + int_coord_vec_t vec = gcd_normalize(translation_vectors[i]); + + coord_t num = vec.s${dimensions-1}; + coord_t denom = 0; + + %for i in range(dimensions): + denom += vec.s${i} * vec.s${i}; + %endfor + + denom = sqrt(denom); + + rotation_angles[i] = acos(num / denom); + """) + + +class _RotationClassesFinder(object): + + def __init__( + self, context, well_sep_is_n_away, dimensions, + int_coord_vec_dtype, coord_dtype): + + if coord_dtype.itemsize == 4: + self.equiv_int_dtype_for_coord_dtype = np.int32 + elif coord_dtype.itemsize == 8: + self.equiv_int_dtype_for_coord_dtype = np.int64 + else: + raise ValueError("unexpected coord_dtype") + + preamble = ROTATION_ANGLE_FINDER_PREAMBLE_TEMPLATE.render( + dimensions=dimensions) + + self.rotation_angle_finder = ( + ROTATION_ANGLE_FINDER_TEMPLATE.build( + context, + type_aliases=( + ("int_coord_vec_t", int_coord_vec_dtype), + ("coord_t", coord_dtype), + ), + var_values=( + ("dimensions", dimensions), + ), + more_preamble=preamble)) + + from boxtree.tools import ListRenumberer + + self.list_renumberer = ListRenumberer( + context, + from_element_dtype=self.equiv_int_dtype_for_coord_dtype, + to_element_dtype=np.int32) + + self.well_sep_is_n_away = well_sep_is_n_away + self.dimensions = dimensions + self.coord_dtype = coord_dtype + + def __call__(self, queue, translation_vectors, wait_for=None): + nvectors = len(translation_vectors) + + rotation_angles = cl.array.empty(queue, nvectors, dtype=self.coord_dtype) + + evt = self.rotation_angle_finder( + translation_vectors, rotation_angles, + queue=queue, wait_for=wait_for) + + rotation_classes, rotation_class_to_angle, evt = ( + self.list_renumberer( + rotation_angles.view(self.equiv_int_dtype_for_coord_dtype), + queue=queue, + wait_for=[evt])) + + # Sanity check. There should be no more than 2^(d-1) * (2n+1)^d distinct + # rotation classes, since that is an upper bound on the number of + # distinct list 2 boxes. + d = self.dimensions + n = self.well_sep_is_n_away + rotation_class_to_angle = rotation_class_to_angle.view(self.coord_dtype) + assert len(rotation_class_to_angle) <= 2 ** (d - 1) * (2 * n + 1) ** d + + result = {} + result["rotation_classes"] = rotation_classes + result["rotation_class_to_angle"] = rotation_class_to_angle + + return result, evt + +# }}} + + # {{{ traversal info (output) class FMMTraversalInfo(DeviceDataRecord): @@ -1874,6 +2032,9 @@ class FMMTraversalBuilder: VectorArg(box_flags_enum.dtype, "box_flags"), ] + int_coord_vec_dtype = cl.cltypes.vec_types[np.dtype(np.int32), dimensions] + + # extra_lists is for list_name, template, extra_args, extra_lists, eliminate_empty_list in [ ("same_level_non_well_sep_boxes", SAME_LEVEL_NON_WELL_SEP_BOXES_TEMPLATE, [], [], []), @@ -1890,7 +2051,7 @@ class FMMTraversalBuilder: "same_level_non_well_sep_boxes_starts"), VectorArg(box_id_dtype, "same_level_non_well_sep_boxes_lists"), - ], [], []), + ], [("translation_vectors", int_coord_vec_dtype)], []), ("from_sep_smaller", FROM_SEP_SMALLER_TEMPLATE, [ ScalarArg(coord_dtype, "stick_out_factor"), @@ -1908,7 +2069,7 @@ class FMMTraversalBuilder: "from_sep_smaller_min_nsources_cumul"), ScalarArg(box_id_dtype, "from_sep_smaller_source_level"), ], - ["from_sep_close_smaller"] + [("from_sep_close_smaller", box_id_dtype)] if sources_have_extent or targets_have_extent else [], ["from_sep_smaller"]), ("from_sep_bigger", FROM_SEP_BIGGER_TEMPLATE, @@ -1922,10 +2083,11 @@ class FMMTraversalBuilder: VectorArg(box_id_dtype, "same_level_non_well_sep_boxes_lists"), ], - ["from_sep_close_bigger"] + [("from_sep_close_bigger", box_id_dtype)] if sources_have_extent or targets_have_extent else [], []), ]: + src = Template( TRAVERSAL_PREAMBLE_TEMPLATE + HELPER_FUNCTION_TEMPLATE @@ -1933,9 +2095,7 @@ class FMMTraversalBuilder: strict_undefined=True).render(**render_vars) result[list_name+"_builder"] = ListOfListsBuilder(self.context, - [(list_name, box_id_dtype)] - + [(extra_list_name, box_id_dtype) - for extra_list_name in extra_lists], + [(list_name, box_id_dtype)] + extra_lists, str(src), arg_decls=base_args + extra_args, debug=debug, name_prefix=list_name, @@ -1944,6 +2104,18 @@ class FMMTraversalBuilder: # }}} + # {{{ rotation classes finder + + result["from_sep_siblings_rotation_classes_finder"] = ( + _RotationClassesFinder( + self.context, + self.well_sep_is_n_away, + dimensions, + int_coord_vec_dtype, + coord_dtype)) + + # }}} + return _KernelInfo(**result) # }}} @@ -2194,10 +2366,13 @@ class FMMTraversalBuilder: wait_for = [evt] from_sep_siblings = result["from_sep_siblings"] - from_sep_siblings_rotation_classes = ( - cl.array.zeros(queue, len(from_sep_siblings.lists), np.int32)) + result, evt = knl_info.from_sep_siblings_rotation_classes_finder( + queue, result["translation_vectors"].lists, wait_for=wait_for) + wait_for = [evt] + + from_sep_siblings_rotation_classes = result["rotation_classes"] from_sep_siblings_rotation_class_to_angle = ( - cl.array.zeros(queue, 1, np.float64)) + result["rotation_class_to_angle"]) # }}} diff --git a/test/test_tools.py b/test/test_tools.py index 0cbccd6..4f0c6a7 100644 --- a/test/test_tools.py +++ b/test/test_tools.py @@ -69,13 +69,15 @@ def test_list_renumberer(ctx_factory): from boxtree.tools import ListRenumberer - arr = cl.array.to_device(queue, np.array([9, 4, 1, 4, 5, 6, 9])) - lr = ListRenumberer(ctx, np.int) - - renumbered_arr, new_to_old, _ = lr(arr) + arr = cl.array.to_device( + queue, + np.array([9., 4., 1., 4., 5., 6., 9.], dtype=np.float64)) + + lr = ListRenumberer(ctx, np.int64, np.int) + renumbered_arr, new_to_old, _ = lr(arr.view(np.int64)) assert (renumbered_arr.get() == np.array([4, 1, 0, 1, 2, 3, 4])).all() - assert (new_to_old.get() == np.array([1, 4, 5, 6, 9])).all() + assert (new_to_old.view(np.float).get() == np.array([1., 4., 5., 6., 9.])).all() @pytest.mark.parametrize("array_factory", (p_normal, p_surface, p_uniform)) diff --git a/test/test_traversal.py b/test/test_traversal.py index e89bb99..64b5f19 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -399,12 +399,12 @@ def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away): rot_classes = trav.from_sep_siblings_rotation_classes[start:end] center = centers[tgt_ibox] - for box, rot_class in zip(seps, rot_classes): - translation_vec = centers[box] - center - cos_theta = translation_vec[2] / la.norm(translation_vec) - assert np.isclose( - cos_theta, - np.cos(trav.from_sep_siblings_rotation_class_to_angle[rot_class])) + translation_vecs = centers[seps] - center + cos_theta = ( + translation_vecs[:, dims - 1] + / la.norm(translation_vecs, axis=1)) + rot_angles = trav.from_sep_siblings_rotation_class_to_angle[rot_classes] + assert np.allclose(cos_theta, np.cos(rot_angles)) # }}} -- GitLab From 8cf5948b9ab223cbecbc0b20142f3d2d364873f8 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 30 May 2019 00:56:01 -0500 Subject: [PATCH 05/57] Improve comment --- boxtree/traversal.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 4e22fce..38db0f0 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1409,12 +1409,15 @@ ROTATION_ANGLE_FINDER_TEMPLATE = ElementwiseTemplate( operation=r"""//CL:mako// /* - * Normalize the translation vector (by dividing by its GCD). + * Normalize the translation vector (by dividing by its absolute GCD). * * We need this, because generally in in floating point arithmetic, - * if k is a scalar and v is a vector, we can't assume + * if k is a positive scalar and v is a vector, we can't assume * - * kv[-1] / sqrt(|kv|^2) == v[-1] / sqrt(|v|^2) + * kv[-1] / sqrt(|kv|^2) == v[-1] / sqrt(|v|^2). + * + * Normalizing ensures vectors that are positive integer multiples of each + * other get classified into the same equivalence class of rotations. */ int_coord_vec_t vec = gcd_normalize(translation_vectors[i]); -- GitLab From 676e21535503832699bcfddd0efac3496dc72c61 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 30 May 2019 00:58:27 -0500 Subject: [PATCH 06/57] Remove broken comment --- boxtree/traversal.py | 1 - 1 file changed, 1 deletion(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 38db0f0..57e2a3d 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -2037,7 +2037,6 @@ class FMMTraversalBuilder: int_coord_vec_dtype = cl.cltypes.vec_types[np.dtype(np.int32), dimensions] - # extra_lists is for list_name, template, extra_args, extra_lists, eliminate_empty_list in [ ("same_level_non_well_sep_boxes", SAME_LEVEL_NON_WELL_SEP_BOXES_TEMPLATE, [], [], []), -- GitLab From 0a9070ed80b4688214f231be3ffb2efbb21d6cb9 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 30 May 2019 01:47:07 -0500 Subject: [PATCH 07/57] Don't call acos() on device --- boxtree/tools.py | 1 + boxtree/traversal.py | 44 +++++++++++++++++++++++++++--------------- test/test_tools.py | 4 ++-- test/test_traversal.py | 2 +- 4 files changed, 32 insertions(+), 19 deletions(-) diff --git a/boxtree/tools.py b/boxtree/tools.py index 014728c..d75c729 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -699,6 +699,7 @@ class ListRenumberer(object): (sorted_items,), evt = self.sort(items, queue=queue, wait_for=wait_for) new_to_old, uniq_count, evt = self.uniq(sorted_items, wait_for=[evt]) + uniq_count = uniq_count.get() renumbered_items = cl.array.empty( diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 57e2a3d..6067bc5 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1369,7 +1369,7 @@ class _ListMerger(object): # {{{ rotation classes finder -ROTATION_ANGLE_FINDER_PREAMBLE_TEMPLATE = Template(r"""//CL:mako// +ROTATION_COSINE_FINDER_PREAMBLE_TEMPLATE = Template(r"""//CL:mako// inline int gcd(int a, int b) { while (b) @@ -1398,13 +1398,13 @@ ROTATION_ANGLE_FINDER_PREAMBLE_TEMPLATE = Template(r"""//CL:mako// strict_undefined=True) -ROTATION_ANGLE_FINDER_TEMPLATE = ElementwiseTemplate( +ROTATION_COSINE_FINDER_TEMPLATE = ElementwiseTemplate( arguments=r"""//CL:mako// /* input: */ int_coord_vec_t *translation_vectors, /* output: */ - coord_t *rotation_angles, + coord_t *rotation_cosines, """, operation=r"""//CL:mako// @@ -1430,7 +1430,7 @@ ROTATION_ANGLE_FINDER_TEMPLATE = ElementwiseTemplate( denom = sqrt(denom); - rotation_angles[i] = acos(num / denom); + rotation_cosines[i] = num / denom; """) @@ -1441,17 +1441,17 @@ class _RotationClassesFinder(object): int_coord_vec_dtype, coord_dtype): if coord_dtype.itemsize == 4: - self.equiv_int_dtype_for_coord_dtype = np.int32 + self.equiv_int_dtype_for_coord_dtype = np.uint32 elif coord_dtype.itemsize == 8: - self.equiv_int_dtype_for_coord_dtype = np.int64 + self.equiv_int_dtype_for_coord_dtype = np.uint64 else: raise ValueError("unexpected coord_dtype") - preamble = ROTATION_ANGLE_FINDER_PREAMBLE_TEMPLATE.render( + preamble = ROTATION_COSINE_FINDER_PREAMBLE_TEMPLATE.render( dimensions=dimensions) - self.rotation_angle_finder = ( - ROTATION_ANGLE_FINDER_TEMPLATE.build( + self.rotation_cosine_finder = ( + ROTATION_COSINE_FINDER_TEMPLATE.build( context, type_aliases=( ("int_coord_vec_t", int_coord_vec_dtype), @@ -1476,24 +1476,36 @@ class _RotationClassesFinder(object): def __call__(self, queue, translation_vectors, wait_for=None): nvectors = len(translation_vectors) - rotation_angles = cl.array.empty(queue, nvectors, dtype=self.coord_dtype) - - evt = self.rotation_angle_finder( - translation_vectors, rotation_angles, + # This computes the cosine of the rotation angles using the formula + # + # cos(theta) = a . b / (|a| * |b|). + # + # acos() is not supported universally in double precision on GPUs, so we + # don't invert the cosines to get the rotation angles on the device. + rotation_cosines = cl.array.empty(queue, nvectors, dtype=self.coord_dtype) + + evt = self.rotation_cosine_finder( + translation_vectors, + rotation_cosines, queue=queue, wait_for=wait_for) - rotation_classes, rotation_class_to_angle, evt = ( + rotation_classes, rotation_class_to_cosine, evt = ( self.list_renumberer( - rotation_angles.view(self.equiv_int_dtype_for_coord_dtype), + rotation_cosines.view(self.equiv_int_dtype_for_coord_dtype), queue=queue, wait_for=[evt])) + # Compute the rotation angles. + rotation_class_to_cosine = ( + rotation_class_to_cosine.view(self.coord_dtype).get()) + rotation_class_to_angle = np.arccos(rotation_class_to_cosine) + rotation_class_to_angle = cl.array.to_device(queue, rotation_class_to_angle) + # Sanity check. There should be no more than 2^(d-1) * (2n+1)^d distinct # rotation classes, since that is an upper bound on the number of # distinct list 2 boxes. d = self.dimensions n = self.well_sep_is_n_away - rotation_class_to_angle = rotation_class_to_angle.view(self.coord_dtype) assert len(rotation_class_to_angle) <= 2 ** (d - 1) * (2 * n + 1) ** d result = {} diff --git a/test/test_tools.py b/test/test_tools.py index 4f0c6a7..1807764 100644 --- a/test/test_tools.py +++ b/test/test_tools.py @@ -73,9 +73,9 @@ def test_list_renumberer(ctx_factory): queue, np.array([9., 4., 1., 4., 5., 6., 9.], dtype=np.float64)) - lr = ListRenumberer(ctx, np.int64, np.int) + lr = ListRenumberer(ctx, np.uint64, np.int) - renumbered_arr, new_to_old, _ = lr(arr.view(np.int64)) + renumbered_arr, new_to_old, _ = lr(arr.view(np.uint64)) assert (renumbered_arr.get() == np.array([4, 1, 0, 1, 2, 3, 4])).all() assert (new_to_old.view(np.float).get() == np.array([1., 4., 5., 6., 9.])).all() diff --git a/test/test_traversal.py b/test/test_traversal.py index 64b5f19..ae815e7 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -404,7 +404,7 @@ def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away): translation_vecs[:, dims - 1] / la.norm(translation_vecs, axis=1)) rot_angles = trav.from_sep_siblings_rotation_class_to_angle[rot_classes] - assert np.allclose(cos_theta, np.cos(rot_angles)) + assert np.allclose(np.arccos(cos_theta), rot_angles, atol=1e-3) # }}} -- GitLab From e284833cbdd0c6c2bc9ffff9b5f49e3d898e6cb8 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 30 May 2019 01:49:56 -0500 Subject: [PATCH 08/57] Bump atol --- test/test_traversal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_traversal.py b/test/test_traversal.py index ae815e7..fa31fe7 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -404,7 +404,7 @@ def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away): translation_vecs[:, dims - 1] / la.norm(translation_vecs, axis=1)) rot_angles = trav.from_sep_siblings_rotation_class_to_angle[rot_classes] - assert np.allclose(np.arccos(cos_theta), rot_angles, atol=1e-3) + assert np.allclose(np.arccos(cos_theta), rot_angles, atol=1e-13) # }}} -- GitLab From 2d2aaefdcadbf69b41505828dc858ac080980a1c Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 30 May 2019 01:50:16 -0500 Subject: [PATCH 09/57] Bump rtol --- test/test_traversal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_traversal.py b/test/test_traversal.py index fa31fe7..9a167b6 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -404,7 +404,7 @@ def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away): translation_vecs[:, dims - 1] / la.norm(translation_vecs, axis=1)) rot_angles = trav.from_sep_siblings_rotation_class_to_angle[rot_classes] - assert np.allclose(np.arccos(cos_theta), rot_angles, atol=1e-13) + assert np.allclose(np.arccos(cos_theta), rot_angles, atol=1e-13, rtol=1e-13) # }}} -- GitLab From fbe755559d65c505fa6a6fd0d3582f38eba07e60 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 30 May 2019 18:01:24 -0500 Subject: [PATCH 10/57] WIP --- boxtree/fmm.py | 2 +- boxtree/pyfmmlib_integration.py | 78 ++++++++++++++++++++++++++++++--- 2 files changed, 74 insertions(+), 6 deletions(-) diff --git a/boxtree/fmm.py b/boxtree/fmm.py index 818de7a..f61d513 100644 --- a/boxtree/fmm.py +++ b/boxtree/fmm.py @@ -116,7 +116,7 @@ def drive_fmm(traversal, expansion_wrangler, src_weights, timing_data=None): traversal.from_sep_siblings_starts, traversal.from_sep_siblings_lists, mpole_exps) - + recorder.add("multipole_to_local", timing_future) # local_exps represents both Gamma and Delta in [1] diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index a32d1e6..555b09d 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -38,6 +38,12 @@ __doc__ = """Integrates :mod:`boxtree` with """ +# Only use rotation matrix translations when the total size of the precomputation +# is below a certain amount of bytes. + +_ROTMAT_MEM_CUTOFF = 10**8 + + class FMMLibExpansionWrangler(object): """Implements the :class:`boxtree.fmm.ExpansionWranglerInterface` by using pyfmmlib. @@ -50,7 +56,9 @@ class FMMLibExpansionWrangler(object): # {{{ constructor def __init__(self, tree, helmholtz_k, fmm_level_to_nterms=None, ifgrad=False, - dipole_vec=None, dipoles_already_reordered=False, nterms=None): + dipole_vec=None, dipoles_already_reordered=False, nterms=None, + from_sep_siblings_rotation_angles=None, + from_sep_siblings_rotation_classes=None): """ :arg fmm_level_to_nterms: a callable that, upon being passed the tree and the tree level as an integer, returns the value of *nterms* for the @@ -106,6 +114,13 @@ class FMMLibExpansionWrangler(object): self.dipole_vec = None self.dp_suffix = "" + self.supports_rotmat_precomputation = ( + from_sep_siblings_rotation_classes is not None + and from_sep_siblings_rotation_angles is not None) + + self.rotation_classes = from_sep_siblings_rotation_classes + self.rotation_angles = from_sep_siblings_rotation_angles + # }}} def level_to_rscale(self, level): @@ -162,7 +177,7 @@ class FMMLibExpansionWrangler(object): def get_vec_routine(self, name): return self.get_routine(name, "_vec") - def get_translation_routine(self, name, vec_suffix="_vec"): + def get_translation_routine(self, name, vec_suffix="_vec"): suffix = "" if self.dim == 3: suffix = "quadu" @@ -578,8 +593,6 @@ class FMMLibExpansionWrangler(object): tree = self.tree local_exps = self.local_expansion_zeros() - mploc = self.get_translation_routine("%ddmploc", vec_suffix="_imany") - for lev in range(self.tree.nlevels): lstart, lstop = level_start_target_or_target_parent_box_nrs[lev:lev+2] if lstart == lstop: @@ -587,6 +600,60 @@ class FMMLibExpansionWrangler(object): starts_on_lvl = starts[lstart:lstop+1] + mploc = self.get_translation_routine("%ddmploc", vec_suffix="_imany") + + kwargs = {} + + # {{{ precompute rotation matrices, if applicable + + if self.supports_rotmat_precomputation: + order = self.level_nterms[lev] + + forward_rotation_angles = self.rotation_angles + + # Rotation matrix precomputation can use a large amount of memory. + # Only use it when the estimated size is below _ROTMAT_MEM_CUTOFF. + rotmat_mem_estimate = (8 + * (order + 1)**2 + * (2*order + 1) + * len(forward_rotation_angles)) + + if rotmat_mem_estimate < _ROTMAT_MEM_CUTOFF: + mploc = self.get_translation_routine( + "%ddmploc", vec_suffix="2_trunc_imany") + + print("start precomputation, order", order) + + # Compute forward and backward rotation matrices for this level. + import pyfmmlib + rotmat_builder = getattr(pyfmmlib, "rotviarecur3p_init_vec") + + c = np.ascontiguousarray + + ier, rotmatf = rotmat_builder(order, forward_rotation_angles) + assert (0 == ier).all() + + ier, rotmatb = rotmat_builder(order, -forward_rotation_angles) + assert (0 == ier).all() + + kwargs["ldm"] = order + + kwargs["rotmatf"] = c(rotmatf) + kwargs["rotmatf_offsets"] = 0 * self.rotation_classes + 50 + kwargs["rotmatf_starts"] = starts_on_lvl + + kwargs["rotmatb"] = c(rotmatb) + kwargs["rotmatb_offsets"] = 0 * self.rotation_classes + 50 + kwargs["rotmatb_starts"] = starts_on_lvl + + #assert all(0 <= cl < len(forward_rotation_angles) for cl in self.rotation_classes) + #assert all(0 <= i < len(self.rotation_classes) for i in starts_on_lvl) + + print("done precomputation, order", order) + print("rotmat size", rotmatb.size) + + # }}} + source_level_start_ibox, source_mpoles_view = \ self.multipole_expansions_view(mpole_exps, lev) target_level_start_ibox, target_local_exps_view = \ @@ -610,7 +677,6 @@ class FMMLibExpansionWrangler(object): rscale1 = np.ones(nsrc_boxes) * rscale rscale1_offsets = np.arange(nsrc_boxes) - kwargs = {} if self.dim == 3 and self.eqn_letter == "h": kwargs["radius"] = ( tree.root_extent * 2**(-lev) @@ -627,6 +693,8 @@ class FMMLibExpansionWrangler(object): (ntgt_boxes,) + self.expansion_shape(self.level_nterms[lev]), dtype=self.dtype) + print("expn2 order", np.isfortran(expn2)) + kwargs.update(self.kernel_kwargs) expn2 = mploc( -- GitLab From 5fcfffd095e6d49cfbf47675b6fdfc899c1e1d85 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 30 May 2019 18:05:42 -0500 Subject: [PATCH 11/57] WIP: Add timing code --- test/fmmlib-timing.py | 236 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100644 test/fmmlib-timing.py diff --git a/test/fmmlib-timing.py b/test/fmmlib-timing.py new file mode 100644 index 0000000..5d15b4b --- /dev/null +++ b/test/fmmlib-timing.py @@ -0,0 +1,236 @@ +from __future__ import division, absolute_import, print_function + +__copyright__ = "Copyright (C) 2013 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from six.moves import range + +import numpy as np +import numpy.linalg as la +import pyopencl as cl + +import pytest +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + +from boxtree.tools import ( # noqa: F401 + make_normal_particle_array as p_normal, + make_surface_particle_array as p_surface, + make_uniform_particle_array as p_uniform, + particle_array_to_host, + ConstantOneExpansionWrangler) + +import logging +logger = logging.getLogger(__name__) + + +# {{{ test fmmlib integration + +@pytest.mark.parametrize("dims", [2, 3]) +@pytest.mark.parametrize("use_dipoles", [True, False]) +@pytest.mark.parametrize("helmholtz_k", [0, 2]) +def test_pyfmmlib_fmm(ctx_factory, dims, use_dipoles, helmholtz_k): + logging.basicConfig(level=logging.INFO) + + from pytest import importorskip + importorskip("pyfmmlib") + + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + nsources = 3000 + ntargets = 1000 + dtype = np.float64 + + sources = p_normal(queue, nsources, dims, dtype, seed=15) + targets = ( + p_normal(queue, ntargets, dims, dtype, seed=18) + + np.array([2, 0, 0])[:dims]) + + sources_host = particle_array_to_host(sources) + targets_host = particle_array_to_host(targets) + + from boxtree import TreeBuilder + tb = TreeBuilder(ctx) + + tree, _ = tb(queue, sources, targets=targets, + max_particles_in_box=30, debug=True) + + from boxtree.traversal import FMMTraversalBuilder + tbuild = FMMTraversalBuilder(ctx) + trav, _ = tbuild(queue, tree, debug=True) + + trav = trav.get(queue=queue) + + from pyopencl.clrandom import PhiloxGenerator + rng = PhiloxGenerator(queue.context, seed=20) + + weights = rng.uniform(queue, nsources, dtype=np.float64).get() + #weights = np.ones(nsources) + + if use_dipoles: + np.random.seed(13) + dipole_vec = np.random.randn(dims, nsources) + else: + dipole_vec = None + + if dims == 2 and helmholtz_k == 0: + base_nterms = 20 + else: + base_nterms = 10 + + def fmm_level_to_nterms(tree, lev): + result = base_nterms + + if lev < 3 and helmholtz_k: + # exercise order-varies-by-level capability + result += 5 + + if use_dipoles: + result += 1 + + return result + + from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler + wrangler = FMMLibExpansionWrangler( + trav.tree, helmholtz_k, + fmm_level_to_nterms=fmm_level_to_nterms, + dipole_vec=dipole_vec, + from_sep_siblings_rotation_classes=trav.from_sep_siblings_rotation_classes, + from_sep_siblings_rotation_angles=trav.from_sep_siblings_rotation_class_to_angle) + + from boxtree.fmm import drive_fmm + + timing_data = {} + pot = drive_fmm(trav, wrangler, weights, timing_data=timing_data) + print("M2L timing", timing_data["multipole_to_local"]["wall_elapsed"]) + assert timing_data + + # {{{ ref fmmlib computation + + logger.info("computing direct (reference) result") + + import pyfmmlib + fmmlib_routine = getattr( + pyfmmlib, + "%spot%s%ddall%s_vec" % ( + wrangler.eqn_letter, + "fld" if dims == 3 else "grad", + dims, + "_dp" if use_dipoles else "")) + + kwargs = {} + if dims == 3: + kwargs["iffld"] = False + else: + kwargs["ifgrad"] = False + kwargs["ifhess"] = False + + if use_dipoles: + if helmholtz_k == 0 and dims == 2: + kwargs["dipstr"] = ( + -weights # pylint:disable=invalid-unary-operand-type + * (dipole_vec[0] + 1j * dipole_vec[1])) + else: + kwargs["dipstr"] = weights + kwargs["dipvec"] = dipole_vec + else: + kwargs["charge"] = weights + if helmholtz_k: + kwargs["zk"] = helmholtz_k + + ref_pot = wrangler.finalize_potentials( + fmmlib_routine( + sources=sources_host.T, targets=targets_host.T, + **kwargs)[0] + ) + + rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf) + logger.info("relative l2 error vs fmmlib direct: %g" % rel_err) + assert rel_err < 1e-5, rel_err + + # }}} + + # {{{ check against sumpy + + try: + import sumpy # noqa + except ImportError: + have_sumpy = False + from warnings import warn + warn("sumpy unavailable: cannot compute independent reference " + "values for pyfmmlib") + else: + have_sumpy = True + + if have_sumpy: + from sumpy.kernel import ( # pylint:disable=import-error + LaplaceKernel, HelmholtzKernel, DirectionalSourceDerivative) + from sumpy.p2p import P2P # pylint:disable=import-error + + sumpy_extra_kwargs = {} + if helmholtz_k: + knl = HelmholtzKernel(dims) + sumpy_extra_kwargs["k"] = helmholtz_k + else: + knl = LaplaceKernel(dims) + + if use_dipoles: + knl = DirectionalSourceDerivative(knl) + sumpy_extra_kwargs["src_derivative_dir"] = dipole_vec + + p2p = P2P(ctx, + [knl], + exclude_self=False) + + evt, (sumpy_ref_pot,) = p2p( + queue, targets, sources, [weights], + out_host=True, **sumpy_extra_kwargs) + + sumpy_rel_err = ( + la.norm(pot - sumpy_ref_pot, np.inf) + / la.norm(sumpy_ref_pot, np.inf)) + + logger.info("relative l2 error vs sumpy direct: %g" % sumpy_rel_err) + assert sumpy_rel_err < 1e-5, sumpy_rel_err + + # }}} + +# }}} + + +# You can test individual routines by typing +# $ python test_fmm.py 'test_routine(cl.create_some_context)' + +if __name__ == "__main__": + test_pyfmmlib_fmm(cl._csc, 3, True, 0) + + """ + import sys + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + """ + +# vim: fdm=marker -- GitLab From 47dacca07d82eeb346403fdf68f7eec6695ff4b0 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 30 May 2019 23:24:43 -0500 Subject: [PATCH 12/57] WIP --- boxtree/pyfmmlib_integration.py | 29 +++++++++++++---------------- boxtree/traversal.py | 2 +- test/fmmlib-timing.py | 29 ++++++++++++++++++----------- test/test_traversal.py | 9 +++++---- 4 files changed, 37 insertions(+), 32 deletions(-) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index 555b09d..6f6556c 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -124,6 +124,7 @@ class FMMLibExpansionWrangler(object): # }}} def level_to_rscale(self, level): + return 1 result = self.tree.root_extent * 2 ** -level * self.rscale_factor if abs(result) > 1: result = 1 @@ -593,6 +594,10 @@ class FMMLibExpansionWrangler(object): tree = self.tree local_exps = self.local_expansion_zeros() + #mpole_exps = 0 * mpole_exps + + use_rotmat = True + for lev in range(self.tree.nlevels): lstart, lstop = level_start_target_or_target_parent_box_nrs[lev:lev+2] if lstart == lstop: @@ -619,17 +624,15 @@ class FMMLibExpansionWrangler(object): * len(forward_rotation_angles)) if rotmat_mem_estimate < _ROTMAT_MEM_CUTOFF: + assert len(self.rotation_classes) == len(lists) + mploc = self.get_translation_routine( "%ddmploc", vec_suffix="2_trunc_imany") - print("start precomputation, order", order) - # Compute forward and backward rotation matrices for this level. import pyfmmlib rotmat_builder = getattr(pyfmmlib, "rotviarecur3p_init_vec") - c = np.ascontiguousarray - ier, rotmatf = rotmat_builder(order, forward_rotation_angles) assert (0 == ier).all() @@ -637,21 +640,17 @@ class FMMLibExpansionWrangler(object): assert (0 == ier).all() kwargs["ldm"] = order + kwargs["nterms"] = order + kwargs["nterms1"] = order - kwargs["rotmatf"] = c(rotmatf) - kwargs["rotmatf_offsets"] = 0 * self.rotation_classes + 50 + kwargs["rotmatf"] = rotmatf + kwargs["rotmatf_offsets"] = self.rotation_classes kwargs["rotmatf_starts"] = starts_on_lvl - kwargs["rotmatb"] = c(rotmatb) - kwargs["rotmatb_offsets"] = 0 * self.rotation_classes + 50 + kwargs["rotmatb"] = rotmatb + kwargs["rotmatb_offsets"] = self.rotation_classes kwargs["rotmatb_starts"] = starts_on_lvl - #assert all(0 <= cl < len(forward_rotation_angles) for cl in self.rotation_classes) - #assert all(0 <= i < len(self.rotation_classes) for i in starts_on_lvl) - - print("done precomputation, order", order) - print("rotmat size", rotmatb.size) - # }}} source_level_start_ibox, source_mpoles_view = \ @@ -693,8 +692,6 @@ class FMMLibExpansionWrangler(object): (ntgt_boxes,) + self.expansion_shape(self.level_nterms[lev]), dtype=self.dtype) - print("expn2 order", np.isfortran(expn2)) - kwargs.update(self.kernel_kwargs) expn2 = mploc( diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 6067bc5..f8fe017 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -640,7 +640,7 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) if (sep) { int_coord_vec_t vec = get_translation_vector( - root_extent, level, center, sib_center); + root_extent, level, sib_center, center); APPEND_from_sep_siblings(sib_box_id); APPEND_translation_vectors(vec); } diff --git a/test/fmmlib-timing.py b/test/fmmlib-timing.py index 5d15b4b..8319ff0 100644 --- a/test/fmmlib-timing.py +++ b/test/fmmlib-timing.py @@ -57,8 +57,8 @@ def test_pyfmmlib_fmm(ctx_factory, dims, use_dipoles, helmholtz_k): ctx = ctx_factory() queue = cl.CommandQueue(ctx) - nsources = 3000 - ntargets = 1000 + nsources = 30000 + ntargets = 10000 dtype = np.float64 sources = p_normal(queue, nsources, dims, dtype, seed=15) @@ -96,7 +96,7 @@ def test_pyfmmlib_fmm(ctx_factory, dims, use_dipoles, helmholtz_k): if dims == 2 and helmholtz_k == 0: base_nterms = 20 else: - base_nterms = 10 + base_nterms = 15 def fmm_level_to_nterms(tree, lev): result = base_nterms @@ -110,19 +110,26 @@ def test_pyfmmlib_fmm(ctx_factory, dims, use_dipoles, helmholtz_k): return result - from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler - wrangler = FMMLibExpansionWrangler( - trav.tree, helmholtz_k, - fmm_level_to_nterms=fmm_level_to_nterms, - dipole_vec=dipole_vec, - from_sep_siblings_rotation_classes=trav.from_sep_siblings_rotation_classes, - from_sep_siblings_rotation_angles=trav.from_sep_siblings_rotation_class_to_angle) + if 1: + from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler + wrangler = FMMLibExpansionWrangler( + trav.tree, helmholtz_k, + fmm_level_to_nterms=fmm_level_to_nterms, + dipole_vec=dipole_vec) + else: + from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler + wrangler = FMMLibExpansionWrangler( + trav.tree, helmholtz_k, + fmm_level_to_nterms=fmm_level_to_nterms, + dipole_vec=dipole_vec, + from_sep_siblings_rotation_classes=trav.from_sep_siblings_rotation_classes, + from_sep_siblings_rotation_angles=trav.from_sep_siblings_rotation_class_to_angle) from boxtree.fmm import drive_fmm timing_data = {} pot = drive_fmm(trav, wrangler, weights, timing_data=timing_data) - print("M2L timing", timing_data["multipole_to_local"]["wall_elapsed"]) + print("M2L timing", timing_data["multipole_to_local"]["process_elapsed"]) assert timing_data # {{{ ref fmmlib computation diff --git a/test/test_traversal.py b/test/test_traversal.py index 9a167b6..e70edb2 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -400,11 +400,12 @@ def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away): center = centers[tgt_ibox] translation_vecs = centers[seps] - center - cos_theta = ( - translation_vecs[:, dims - 1] - / la.norm(translation_vecs, axis=1)) + theta = np.arctan2( + la.norm(translation_vecs[:, :dims - 1], axis=1), + translation_vecs[:, dims - 1]) rot_angles = trav.from_sep_siblings_rotation_class_to_angle[rot_classes] - assert np.allclose(np.arccos(cos_theta), rot_angles, atol=1e-13, rtol=1e-13) + + assert np.allclose(theta, rot_angles, atol=1e-13, rtol=1e-13) # }}} -- GitLab From d5a7c628bc9ea52f76fbc210c43b1e1fbe8a09c3 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 31 May 2019 00:29:24 -0500 Subject: [PATCH 13/57] Various fixes --- boxtree/pyfmmlib_integration.py | 62 ++++++++++++----------- boxtree/tools.py | 2 +- boxtree/traversal.py | 30 ++++++++---- test/test_fmm.py | 87 +++++++++++++++++++++++++++++++++ 4 files changed, 141 insertions(+), 40 deletions(-) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index 6f6556c..6aca634 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -37,11 +37,14 @@ __doc__ = """Integrates :mod:`boxtree` with `pyfmmlib `_. """ +# {{{ constants -# Only use rotation matrix translations when the total size of the precomputation -# is below a certain amount of bytes. +# Only use M2L translations with precomputed rotation matrices ("optimized M2L") +# when the total size of the precomputation is below a certain amount of bytes. -_ROTMAT_MEM_CUTOFF = 10**8 +_ROTMAT_PRECOMPUTATION_CUTOFF_BYTES = 10**8 + +# }}} class FMMLibExpansionWrangler(object): @@ -57,12 +60,19 @@ class FMMLibExpansionWrangler(object): def __init__(self, tree, helmholtz_k, fmm_level_to_nterms=None, ifgrad=False, dipole_vec=None, dipoles_already_reordered=False, nterms=None, - from_sep_siblings_rotation_angles=None, - from_sep_siblings_rotation_classes=None): + m2l_rotation_lists=None, m2l_rotation_angles=None): """ :arg fmm_level_to_nterms: a callable that, upon being passed the tree and the tree level as an integer, returns the value of *nterms* for the multipole and local expansions on that level. + + :arg m2l_rotation_lists: corresponds to the + *from_sep_siblings_rotation_classes* attribute of the traversal + object + + :arg m2l_rotation_angles:corresponds to the + *from_sep_siblings_rotation_class_to_angle* attribute of the traversal + object """ if nterms is not None and fmm_level_to_nterms is not None: @@ -114,12 +124,13 @@ class FMMLibExpansionWrangler(object): self.dipole_vec = None self.dp_suffix = "" - self.supports_rotmat_precomputation = ( - from_sep_siblings_rotation_classes is not None - and from_sep_siblings_rotation_angles is not None) + self.supports_optimized_m2l = ( + tree.dimensions == 3 + and m2l_rotation_lists is not None + and m2l_rotation_angles is not None) - self.rotation_classes = from_sep_siblings_rotation_classes - self.rotation_angles = from_sep_siblings_rotation_angles + self.m2l_rotation_lists = m2l_rotation_lists + self.m2l_rotation_angles = m2l_rotation_angles # }}} @@ -594,10 +605,6 @@ class FMMLibExpansionWrangler(object): tree = self.tree local_exps = self.local_expansion_zeros() - #mpole_exps = 0 * mpole_exps - - use_rotmat = True - for lev in range(self.tree.nlevels): lstart, lstop = level_start_target_or_target_parent_box_nrs[lev:lev+2] if lstart == lstop: @@ -609,34 +616,31 @@ class FMMLibExpansionWrangler(object): kwargs = {} - # {{{ precompute rotation matrices, if applicable + # {{{ precompute rotation matrices for optimized m2l, if applicable - if self.supports_rotmat_precomputation: + if self.supports_optimized_m2l: order = self.level_nterms[lev] - forward_rotation_angles = self.rotation_angles - # Rotation matrix precomputation can use a large amount of memory. - # Only use it when the estimated size is below _ROTMAT_MEM_CUTOFF. + # Only use it when the estimated size is below the cutoff. rotmat_mem_estimate = (8 * (order + 1)**2 * (2*order + 1) - * len(forward_rotation_angles)) + * len(self.m2l_rotation_angles)) - if rotmat_mem_estimate < _ROTMAT_MEM_CUTOFF: - assert len(self.rotation_classes) == len(lists) + if rotmat_mem_estimate < _ROTMAT_PRECOMPUTATION_CUTOFF_BYTES: + assert len(self.m2l_rotation_lists) == len(lists) mploc = self.get_translation_routine( "%ddmploc", vec_suffix="2_trunc_imany") - # Compute forward and backward rotation matrices for this level. - import pyfmmlib - rotmat_builder = getattr(pyfmmlib, "rotviarecur3p_init_vec") + from pyfmmlib import rotviarecur3p_init_vec as rotmat_builder - ier, rotmatf = rotmat_builder(order, forward_rotation_angles) + # Compute forward and backward rotation matrices for this level. + ier, rotmatf = rotmat_builder(order, self.m2l_rotation_angles) assert (0 == ier).all() - ier, rotmatb = rotmat_builder(order, -forward_rotation_angles) + ier, rotmatb = rotmat_builder(order, -self.m2l_rotation_angles) assert (0 == ier).all() kwargs["ldm"] = order @@ -644,11 +648,11 @@ class FMMLibExpansionWrangler(object): kwargs["nterms1"] = order kwargs["rotmatf"] = rotmatf - kwargs["rotmatf_offsets"] = self.rotation_classes + kwargs["rotmatf_offsets"] = self.m2l_rotation_lists kwargs["rotmatf_starts"] = starts_on_lvl kwargs["rotmatb"] = rotmatb - kwargs["rotmatb_offsets"] = self.rotation_classes + kwargs["rotmatb_offsets"] = self.m2l_rotation_lists kwargs["rotmatb_starts"] = starts_on_lvl # }}} diff --git a/boxtree/tools.py b/boxtree/tools.py index d75c729..55c0b5e 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -651,7 +651,7 @@ class ListRenumberer(object): Useful e.g. for compacting lists into a dense range. Returns a tuple (*renumbered*, *new_to_old*, *evt*), where *renumbered* is - the renumbered list and *new_to_old* maps renumbered values to new ones. + the renumbered list and *new_to_old* maps renumbered values to old ones. Example:: diff --git a/boxtree/traversal.py b/boxtree/traversal.py index f8fe017..506ece1 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1708,15 +1708,21 @@ class FMMTraversalInfo(DeviceDataRecord): ``box_id_t [*]`` To facilitate rotation-based translations ("point and shoot"), the following - lists record the angle between box translation pairs and the z-axis. + lists record the angle between box translation pairs and the *z*-axis. - .. attributes:: from_sep_siblings_rotation_classes + .. attribute:: from_sep_siblings_rotation_classes ``int32 [*]`` - .. attributes:: from_sep_siblings_rotation_class_to_angle + A list, corresponding to *from_sep_siblings_lists*, of the rotation + class of each box pair. - ``double [nfrom_sep_siblings_rotation_classes]`` + .. attribute:: from_sep_siblings_rotation_class_to_angle + + ``coord_t [nfrom_sep_siblings_rotation_classes]`` + + Maps rotation classes in *from_sep_siblings_rotation_classes* to + rotation angles. .. ------------------------------------------------------------------------ .. rubric:: Separated Smaller Boxes ("List 3") @@ -2380,13 +2386,17 @@ class FMMTraversalBuilder: wait_for = [evt] from_sep_siblings = result["from_sep_siblings"] - result, evt = knl_info.from_sep_siblings_rotation_classes_finder( - queue, result["translation_vectors"].lists, wait_for=wait_for) - wait_for = [evt] + if tree.dimensions == 3: + result, evt = knl_info.from_sep_siblings_rotation_classes_finder( + queue, result["translation_vectors"].lists, wait_for=wait_for) + wait_for = [evt] - from_sep_siblings_rotation_classes = result["rotation_classes"] - from_sep_siblings_rotation_class_to_angle = ( - result["rotation_class_to_angle"]) + from_sep_siblings_rotation_classes = result["rotation_classes"] + from_sep_siblings_rotation_class_to_angle = ( + result["rotation_class_to_angle"]) + else: + from_sep_siblings_rotation_classes = None + from_sep_siblings_rotation_class_to_angle = None # }}} diff --git a/test/test_fmm.py b/test/test_fmm.py index fbd5564..73136ba 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -619,6 +619,93 @@ def test_fmm_float32(ctx_factory, enable_extents): # }}} +# {{{ test with fmm optimized 3d m2l + +@pytest.mark.parametrize("well_sep_is_n_away", (1, 2)) +@pytest.mark.parametrize("helmholtz_k", (0, 2)) +def test_fmm_with_optimized_3d_m2l(ctx_factory, helmholtz_k, well_sep_is_n_away): + logging.basicConfig(level=logging.INFO) + + from pytest import importorskip + importorskip("pyfmmlib") + + dims = 3 + + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + nsources = 10000 + ntargets = 10000 + dtype = np.float64 + + sources = p_normal(queue, nsources, dims, dtype, seed=15) + targets = ( + p_normal(queue, ntargets, dims, dtype, seed=18) + + np.array([2, 0, 0])[:dims]) + + from boxtree import TreeBuilder + tb = TreeBuilder(ctx) + + tree, _ = tb(queue, sources, targets=targets, + max_particles_in_box=30, debug=True) + + from boxtree.traversal import FMMTraversalBuilder + tbuild = FMMTraversalBuilder(ctx) + trav, _ = tbuild(queue, tree, debug=True) + + trav = trav.get(queue=queue) + + from pyopencl.clrandom import PhiloxGenerator + rng = PhiloxGenerator(queue.context, seed=20) + + weights = rng.uniform(queue, nsources, dtype=np.float64).get() + + base_nterms = 10 + + def fmm_level_to_nterms(tree, lev): + result = base_nterms + + if lev < 3 and helmholtz_k: + # exercise order-varies-by-level capability + result += 5 + + return result + + from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler + + baseline_wrangler = FMMLibExpansionWrangler( + trav.tree, helmholtz_k, + fmm_level_to_nterms=fmm_level_to_nterms) + + optimized_wrangler = FMMLibExpansionWrangler( + trav.tree, helmholtz_k, + fmm_level_to_nterms=fmm_level_to_nterms, + m2l_rotation_lists=trav.from_sep_siblings_rotation_classes, + m2l_rotation_angles=trav.from_sep_siblings_rotation_class_to_angle) + + from boxtree.fmm import drive_fmm + + baseline_timing_data = {} + baseline_pot = drive_fmm( + trav, baseline_wrangler, weights, timing_data=baseline_timing_data) + + optimized_timing_data = {} + optimized_pot = drive_fmm( + trav, optimized_wrangler, weights, timing_data=optimized_timing_data) + + print("Baseline M2L time : %.4g s" % + baseline_timing_data["multipole_to_local"] + .get("process_elapsed", -1)) + + print("Optimized M2L time: %.4g s" % + optimized_timing_data["multipole_to_local"] + .get("process_elapsed", -1)) + + assert np.allclose(baseline_pot, optimized_pot, atol=1e-13, rtol=1e-13) + +# }}} + + # You can test individual routines by typing # $ python test_fmm.py 'test_routine(cl.create_some_context)' -- GitLab From 1de55cd83f797a57af2827a5ec3e60715478dc8f Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 31 May 2019 00:36:24 -0500 Subject: [PATCH 14/57] Point requirements.txt to wrap-rotations branch of pyfmmlib --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c3174a4..4f0aed1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ numpy git+https://github.com/inducer/pyopencl git+https://github.com/inducer/islpy git+https://github.com/inducer/loopy -git+https://gitlab.tiker.net/inducer/pyfmmlib +git+https://gitlab.tiker.net/inducer/pyfmmlib@wrap-rotations # only for reference values for the fmmlib test # (unable to use--circular dep) -- GitLab From 347c591463fa02123f5edf72f3185a1bf3cde471 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 31 May 2019 00:52:33 -0500 Subject: [PATCH 15/57] Fixes --- test/fmmlib-timing.py | 243 ------------------------------------------ 1 file changed, 243 deletions(-) delete mode 100644 test/fmmlib-timing.py diff --git a/test/fmmlib-timing.py b/test/fmmlib-timing.py deleted file mode 100644 index 8319ff0..0000000 --- a/test/fmmlib-timing.py +++ /dev/null @@ -1,243 +0,0 @@ -from __future__ import division, absolute_import, print_function - -__copyright__ = "Copyright (C) 2013 Andreas Kloeckner" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - -from six.moves import range - -import numpy as np -import numpy.linalg as la -import pyopencl as cl - -import pytest -from pyopencl.tools import ( # noqa - pytest_generate_tests_for_pyopencl as pytest_generate_tests) - -from boxtree.tools import ( # noqa: F401 - make_normal_particle_array as p_normal, - make_surface_particle_array as p_surface, - make_uniform_particle_array as p_uniform, - particle_array_to_host, - ConstantOneExpansionWrangler) - -import logging -logger = logging.getLogger(__name__) - - -# {{{ test fmmlib integration - -@pytest.mark.parametrize("dims", [2, 3]) -@pytest.mark.parametrize("use_dipoles", [True, False]) -@pytest.mark.parametrize("helmholtz_k", [0, 2]) -def test_pyfmmlib_fmm(ctx_factory, dims, use_dipoles, helmholtz_k): - logging.basicConfig(level=logging.INFO) - - from pytest import importorskip - importorskip("pyfmmlib") - - ctx = ctx_factory() - queue = cl.CommandQueue(ctx) - - nsources = 30000 - ntargets = 10000 - dtype = np.float64 - - sources = p_normal(queue, nsources, dims, dtype, seed=15) - targets = ( - p_normal(queue, ntargets, dims, dtype, seed=18) - + np.array([2, 0, 0])[:dims]) - - sources_host = particle_array_to_host(sources) - targets_host = particle_array_to_host(targets) - - from boxtree import TreeBuilder - tb = TreeBuilder(ctx) - - tree, _ = tb(queue, sources, targets=targets, - max_particles_in_box=30, debug=True) - - from boxtree.traversal import FMMTraversalBuilder - tbuild = FMMTraversalBuilder(ctx) - trav, _ = tbuild(queue, tree, debug=True) - - trav = trav.get(queue=queue) - - from pyopencl.clrandom import PhiloxGenerator - rng = PhiloxGenerator(queue.context, seed=20) - - weights = rng.uniform(queue, nsources, dtype=np.float64).get() - #weights = np.ones(nsources) - - if use_dipoles: - np.random.seed(13) - dipole_vec = np.random.randn(dims, nsources) - else: - dipole_vec = None - - if dims == 2 and helmholtz_k == 0: - base_nterms = 20 - else: - base_nterms = 15 - - def fmm_level_to_nterms(tree, lev): - result = base_nterms - - if lev < 3 and helmholtz_k: - # exercise order-varies-by-level capability - result += 5 - - if use_dipoles: - result += 1 - - return result - - if 1: - from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler - wrangler = FMMLibExpansionWrangler( - trav.tree, helmholtz_k, - fmm_level_to_nterms=fmm_level_to_nterms, - dipole_vec=dipole_vec) - else: - from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler - wrangler = FMMLibExpansionWrangler( - trav.tree, helmholtz_k, - fmm_level_to_nterms=fmm_level_to_nterms, - dipole_vec=dipole_vec, - from_sep_siblings_rotation_classes=trav.from_sep_siblings_rotation_classes, - from_sep_siblings_rotation_angles=trav.from_sep_siblings_rotation_class_to_angle) - - from boxtree.fmm import drive_fmm - - timing_data = {} - pot = drive_fmm(trav, wrangler, weights, timing_data=timing_data) - print("M2L timing", timing_data["multipole_to_local"]["process_elapsed"]) - assert timing_data - - # {{{ ref fmmlib computation - - logger.info("computing direct (reference) result") - - import pyfmmlib - fmmlib_routine = getattr( - pyfmmlib, - "%spot%s%ddall%s_vec" % ( - wrangler.eqn_letter, - "fld" if dims == 3 else "grad", - dims, - "_dp" if use_dipoles else "")) - - kwargs = {} - if dims == 3: - kwargs["iffld"] = False - else: - kwargs["ifgrad"] = False - kwargs["ifhess"] = False - - if use_dipoles: - if helmholtz_k == 0 and dims == 2: - kwargs["dipstr"] = ( - -weights # pylint:disable=invalid-unary-operand-type - * (dipole_vec[0] + 1j * dipole_vec[1])) - else: - kwargs["dipstr"] = weights - kwargs["dipvec"] = dipole_vec - else: - kwargs["charge"] = weights - if helmholtz_k: - kwargs["zk"] = helmholtz_k - - ref_pot = wrangler.finalize_potentials( - fmmlib_routine( - sources=sources_host.T, targets=targets_host.T, - **kwargs)[0] - ) - - rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf) - logger.info("relative l2 error vs fmmlib direct: %g" % rel_err) - assert rel_err < 1e-5, rel_err - - # }}} - - # {{{ check against sumpy - - try: - import sumpy # noqa - except ImportError: - have_sumpy = False - from warnings import warn - warn("sumpy unavailable: cannot compute independent reference " - "values for pyfmmlib") - else: - have_sumpy = True - - if have_sumpy: - from sumpy.kernel import ( # pylint:disable=import-error - LaplaceKernel, HelmholtzKernel, DirectionalSourceDerivative) - from sumpy.p2p import P2P # pylint:disable=import-error - - sumpy_extra_kwargs = {} - if helmholtz_k: - knl = HelmholtzKernel(dims) - sumpy_extra_kwargs["k"] = helmholtz_k - else: - knl = LaplaceKernel(dims) - - if use_dipoles: - knl = DirectionalSourceDerivative(knl) - sumpy_extra_kwargs["src_derivative_dir"] = dipole_vec - - p2p = P2P(ctx, - [knl], - exclude_self=False) - - evt, (sumpy_ref_pot,) = p2p( - queue, targets, sources, [weights], - out_host=True, **sumpy_extra_kwargs) - - sumpy_rel_err = ( - la.norm(pot - sumpy_ref_pot, np.inf) - / la.norm(sumpy_ref_pot, np.inf)) - - logger.info("relative l2 error vs sumpy direct: %g" % sumpy_rel_err) - assert sumpy_rel_err < 1e-5, sumpy_rel_err - - # }}} - -# }}} - - -# You can test individual routines by typing -# $ python test_fmm.py 'test_routine(cl.create_some_context)' - -if __name__ == "__main__": - test_pyfmmlib_fmm(cl._csc, 3, True, 0) - - """ - import sys - if len(sys.argv) > 1: - exec(sys.argv[1]) - else: - from pytest import main - main([__file__]) - """ - -# vim: fdm=marker -- GitLab From 413bfc3f5fff4ad8a42fef75b99cda7273926020 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 31 May 2019 00:54:44 -0500 Subject: [PATCH 16/57] More fixes --- boxtree/pyfmmlib_integration.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index 6aca634..5001328 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -135,7 +135,6 @@ class FMMLibExpansionWrangler(object): # }}} def level_to_rscale(self, level): - return 1 result = self.tree.root_extent * 2 ** -level * self.rscale_factor if abs(result) > 1: result = 1 @@ -189,7 +188,7 @@ class FMMLibExpansionWrangler(object): def get_vec_routine(self, name): return self.get_routine(name, "_vec") - def get_translation_routine(self, name, vec_suffix="_vec"): + def get_translation_routine(self, name, vec_suffix="_vec"): suffix = "" if self.dim == 3: suffix = "quadu" -- GitLab From c319bc4d690f412a40c15a60177499af80108f13 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 31 May 2019 00:56:41 -0500 Subject: [PATCH 17/57] Improve docs --- boxtree/traversal.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 506ece1..4998a7b 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1707,8 +1707,9 @@ class FMMTraversalInfo(DeviceDataRecord): ``box_id_t [*]`` - To facilitate rotation-based translations ("point and shoot"), the following - lists record the angle between box translation pairs and the *z*-axis. + To facilitate matrix precomputation for rotation-based translations ("point + and shoot"), the following lists record the angle between box translation + pairs and the *z*-axis. .. attribute:: from_sep_siblings_rotation_classes -- GitLab From e0625ed0c28483e67889d71f1cf7e095511eeb81 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 31 May 2019 00:59:15 -0500 Subject: [PATCH 18/57] linter fixes --- boxtree/fmm.py | 2 +- boxtree/pyfmmlib_integration.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/boxtree/fmm.py b/boxtree/fmm.py index f61d513..818de7a 100644 --- a/boxtree/fmm.py +++ b/boxtree/fmm.py @@ -116,7 +116,7 @@ def drive_fmm(traversal, expansion_wrangler, src_weights, timing_data=None): traversal.from_sep_siblings_starts, traversal.from_sep_siblings_lists, mpole_exps) - + recorder.add("multipole_to_local", timing_future) # local_exps represents both Gamma and Delta in [1] diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index 5001328..d741a14 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -639,7 +639,7 @@ class FMMLibExpansionWrangler(object): ier, rotmatf = rotmat_builder(order, self.m2l_rotation_angles) assert (0 == ier).all() - ier, rotmatb = rotmat_builder(order, -self.m2l_rotation_angles) + ier, rotmatb = rotmat_builder(order, -self.m2l_rotation_angles) # noqa pylint:disable=invalid-unary-operand-type assert (0 == ier).all() kwargs["ldm"] = order -- GitLab From 3a38603f19063cd1d02ea8312bd5592fc37e7f17 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 31 May 2019 01:02:29 -0500 Subject: [PATCH 19/57] More linter fixes --- test/test_tools.py | 2 +- test/test_traversal.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_tools.py b/test/test_tools.py index 1807764..6e9827d 100644 --- a/test/test_tools.py +++ b/test/test_tools.py @@ -72,7 +72,7 @@ def test_list_renumberer(ctx_factory): arr = cl.array.to_device( queue, np.array([9., 4., 1., 4., 5., 6., 9.], dtype=np.float64)) - + lr = ListRenumberer(ctx, np.uint64, np.int) renumbered_arr, new_to_old, _ = lr(arr.view(np.uint64)) diff --git a/test/test_traversal.py b/test/test_traversal.py index e70edb2..6e8a334 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -401,10 +401,10 @@ def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away): center = centers[tgt_ibox] translation_vecs = centers[seps] - center theta = np.arctan2( - la.norm(translation_vecs[:, :dims - 1], axis=1), + la.norm(translation_vecs[:, :dims - 1], axis=1), translation_vecs[:, dims - 1]) rot_angles = trav.from_sep_siblings_rotation_class_to_angle[rot_classes] - + assert np.allclose(theta, rot_angles, atol=1e-13, rtol=1e-13) # }}} -- GitLab From 5f3d9b224d1ddb19a369a91083e48e262db98b53 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 31 May 2019 11:01:28 -0500 Subject: [PATCH 20/57] Fix binary search --- boxtree/area_query.py | 2 +- boxtree/tools.py | 25 ++++++++++++++++++++----- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/boxtree/area_query.py b/boxtree/area_query.py index 700b647..cb7a886 100644 --- a/boxtree/area_query.py +++ b/boxtree/area_query.py @@ -461,7 +461,7 @@ STARTS_EXPANDER_TEMPLATE = ElementwiseTemplate( dst[i] = bsearch(starts, starts_len, i); """, name="starts_expander", - preamble=str(InlineBinarySearch("idx_t"))) + preamble=str(InlineBinarySearch("right", "idx_t"))) # }}} diff --git a/boxtree/tools.py b/boxtree/tools.py index 55c0b5e..406a04e 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -581,15 +581,22 @@ from mako.template import Template BINARY_SEARCH_TEMPLATE = Template(""" /* - * Returns the smallest value of i such that val <= arr[i]. - * If no such value exists, returns (size_t) -1. + * If "bias" = "left", returns the smallest value of i such that + * val <= arr[i], or (size_t) -1 if val is greater than all values. + * + * If "bias" = "right", returns the largest value of i such that + * arr[i] <= val, or (size_t) -1 if val is less than all values. */ inline size_t bsearch( __global const ${elem_t} *arr, size_t len, const ${elem_t} val) { +%if bias == "left": if (arr[len - 1] < val) +%elif bias == "right": + if (val < arr[0]) +%endif { return -1; } @@ -600,12 +607,20 @@ inline size_t bsearch( { i = l + (r - l) / 2; + %if bias == "left": if (val <= arr[i] && (i == 0 || arr[i - 1] < val)) + %elif bias == "right": + if (arr[i] <= val && (i == len - 1 || val < arr[i + 1])) + %endif { return i; } + %if bias == "left": if (arr[i] < val) + %elif bias == "right": + if (arr[i] <= val) + %endif { l = i; } @@ -620,8 +635,8 @@ inline size_t bsearch( class InlineBinarySearch(object): - def __init__(self, elem_type_name): - self.render_vars = {"elem_t": elem_type_name} + def __init__(self, bias, elem_type_name): + self.render_vars = {"bias": bias, "elem_t": elem_type_name} @memoize_method def __str__(self): @@ -688,7 +703,7 @@ class ListRenumberer(object): ("renumbered_t", to_element_dtype), ), var_values=(), - more_preamble=str(InlineBinarySearch("elem_t")))) + more_preamble=str(InlineBinarySearch("left", "elem_t")))) self.to_element_dtype = to_element_dtype -- GitLab From 862c01208bf5b321ded387e96cf0d10152ca4fcd Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 31 May 2019 11:07:25 -0500 Subject: [PATCH 21/57] Change printing --- test/test_fmm.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/test/test_fmm.py b/test/test_fmm.py index 73136ba..b344cad 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -693,13 +693,16 @@ def test_fmm_with_optimized_3d_m2l(ctx_factory, helmholtz_k, well_sep_is_n_away) optimized_pot = drive_fmm( trav, optimized_wrangler, weights, timing_data=optimized_timing_data) - print("Baseline M2L time : %.4g s" % - baseline_timing_data["multipole_to_local"] - .get("process_elapsed", -1)) - print("Optimized M2L time: %.4g s" % - optimized_timing_data["multipole_to_local"] - .get("process_elapsed", -1)) + try: + print("Baseline M2L time : %#.4g s" % + baseline_timing_data["multipole_to_local"]["process_elapsed"]) + + print("Optimized M2L time: %#.4g s" % + optimized_timing_data["multipole_to_local"]["process_elapsed"]) + + except KeyError: + pass assert np.allclose(baseline_pot, optimized_pot, atol=1e-13, rtol=1e-13) -- GitLab From c6ac416a8cfd0894800fede1f0345fa92f712f0f Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 31 May 2019 11:26:21 -0500 Subject: [PATCH 22/57] Fix test --- test/test_traversal.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_traversal.py b/test/test_traversal.py index 6e8a334..cd37105 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -398,8 +398,7 @@ def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away): seps = trav.from_sep_siblings_lists[start:end] rot_classes = trav.from_sep_siblings_rotation_classes[start:end] - center = centers[tgt_ibox] - translation_vecs = centers[seps] - center + translation_vecs = centers[tgt_ibox] - centers[seps] theta = np.arctan2( la.norm(translation_vecs[:, :dims - 1], axis=1), translation_vecs[:, dims - 1]) -- GitLab From 99feacbb7eecaa29610f6448b4ed75e82f3d6cbf Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 31 May 2019 11:27:14 -0500 Subject: [PATCH 23/57] flake8 fix --- test/test_fmm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_fmm.py b/test/test_fmm.py index b344cad..0469cda 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -693,7 +693,6 @@ def test_fmm_with_optimized_3d_m2l(ctx_factory, helmholtz_k, well_sep_is_n_away) optimized_pot = drive_fmm( trav, optimized_wrangler, weights, timing_data=optimized_timing_data) - try: print("Baseline M2L time : %#.4g s" % baseline_timing_data["multipole_to_local"]["process_elapsed"]) -- GitLab From ab10257a3de8be6196725fae8953049ecb664996 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 31 May 2019 11:28:26 -0500 Subject: [PATCH 24/57] Point back to master pyfmmlib --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4f0aed1..c3174a4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ numpy git+https://github.com/inducer/pyopencl git+https://github.com/inducer/islpy git+https://github.com/inducer/loopy -git+https://gitlab.tiker.net/inducer/pyfmmlib@wrap-rotations +git+https://gitlab.tiker.net/inducer/pyfmmlib # only for reference values for the fmmlib test # (unable to use--circular dep) -- GitLab From 7278348feacec6545cfd817b17d4b742e4d33f4f Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 31 May 2019 11:50:09 -0500 Subject: [PATCH 25/57] Actually fix printing --- test/test_fmm.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/test/test_fmm.py b/test/test_fmm.py index 0469cda..27ec3ce 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -693,15 +693,13 @@ def test_fmm_with_optimized_3d_m2l(ctx_factory, helmholtz_k, well_sep_is_n_away) optimized_pot = drive_fmm( trav, optimized_wrangler, weights, timing_data=optimized_timing_data) - try: - print("Baseline M2L time : %#.4g s" % - baseline_timing_data["multipole_to_local"]["process_elapsed"]) - - print("Optimized M2L time: %#.4g s" % - optimized_timing_data["multipole_to_local"]["process_elapsed"]) + baseline_time = baseline_timing_data["multipole_to_local"]["process_elapsed"] + if baseline_time is not None: + print("Baseline M2L time : %#.4g s" % baseline_time) - except KeyError: - pass + opt_time = optimized_timing_data["multipole_to_local"]["process_elapsed"] + if opt_time is not None: + print("Optimized M2L time: %#.4g s" % opt_time) assert np.allclose(baseline_pot, optimized_pot, atol=1e-13, rtol=1e-13) -- GitLab From 45384886233c8934ad7bff8d6573d08538209bb9 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 31 May 2019 12:35:02 -0500 Subject: [PATCH 26/57] Reduce the size of the optimized M2L test --- test/test_fmm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_fmm.py b/test/test_fmm.py index 27ec3ce..6f7580d 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -634,8 +634,8 @@ def test_fmm_with_optimized_3d_m2l(ctx_factory, helmholtz_k, well_sep_is_n_away) ctx = ctx_factory() queue = cl.CommandQueue(ctx) - nsources = 10000 - ntargets = 10000 + nsources = 5000 + ntargets = 5000 dtype = np.float64 sources = p_normal(queue, nsources, dims, dtype, seed=15) -- GitLab From fe88ecde4cd76d4bcd00a33c199183be1859ffa0 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 31 May 2019 14:51:12 -0500 Subject: [PATCH 27/57] Refactor rotation matrix computation, only compute the matrices of largest order and reuse them for the other levels --- boxtree/pyfmmlib_integration.py | 99 ++++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 33 deletions(-) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index d741a14..2c78973 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -595,6 +595,55 @@ class FMMLibExpansionWrangler(object): return output + # {{{ precompute rotation matrices for optimized m2l + + @memoize_method + def m2l_rotation_matrices(self): + # Returns a tuple (rotmatf, rotmatb, rotmat_order), consisting of the + # forward rotation matrices, backward rotation matrices, and the + # translation order of the matrices. rotmat_order is -1 if not + # supported. + + def mem_estimate(order): + # Rotation matrix memory cost estimate. + return (8 + * (order + 1)**2 + * (2*order + 1) + * len(self.m2l_rotation_angles)) + + rotmatf = None + rotmatb = None + rotmat_order = -1 + + if not self.supports_optimized_m2l: + return (rotmatf, rotmatb, rotmat_order) + + # Find the largest order we can use. Because the memory cost of the + # matrices could be large, only precompute them if the cost estimate + # for the order does not exceed the cutoff. + for order in sorted(self.level_nterms, key=lambda x: -x): + if mem_estimate(order) < _ROTMAT_PRECOMPUTATION_CUTOFF_BYTES: + rotmat_order = order + break + + if rotmat_order == -1: + return (rotmatf, rotmatb, rotmat_order) + + # Compute the rotation matrices. + from pyfmmlib import rotviarecur3p_init_vec as rotmat_builder + + ier, rotmatf = ( + rotmat_builder(rotmat_order, self.m2l_rotation_angles)) + assert (0 == ier).all() + + ier, rotmatb = ( + rotmat_builder(rotmat_order, -self.m2l_rotation_angles)) # noqa pylint:disable=invalid-unary-operand-type + assert (0 == ier).all() + + return (rotmatf, rotmatb, rotmat_order) + + # }}} + @log_process(logger) @return_timing_data def multipole_to_local(self, @@ -604,6 +653,9 @@ class FMMLibExpansionWrangler(object): tree = self.tree local_exps = self.local_expansion_zeros() + # Precomputed rotation matrices + rotmatf, rotmatb, rotmat_order = self.m2l_rotation_matrices() + for lev in range(self.tree.nlevels): lstart, lstop = level_start_target_or_target_parent_box_nrs[lev:lev+2] if lstart == lstop: @@ -615,44 +667,25 @@ class FMMLibExpansionWrangler(object): kwargs = {} - # {{{ precompute rotation matrices for optimized m2l, if applicable - - if self.supports_optimized_m2l: - order = self.level_nterms[lev] - - # Rotation matrix precomputation can use a large amount of memory. - # Only use it when the estimated size is below the cutoff. - rotmat_mem_estimate = (8 - * (order + 1)**2 - * (2*order + 1) - * len(self.m2l_rotation_angles)) - - if rotmat_mem_estimate < _ROTMAT_PRECOMPUTATION_CUTOFF_BYTES: - assert len(self.m2l_rotation_lists) == len(lists) - - mploc = self.get_translation_routine( - "%ddmploc", vec_suffix="2_trunc_imany") - - from pyfmmlib import rotviarecur3p_init_vec as rotmat_builder + # {{{ set up optimized m2l, if applicable - # Compute forward and backward rotation matrices for this level. - ier, rotmatf = rotmat_builder(order, self.m2l_rotation_angles) - assert (0 == ier).all() + if self.level_nterms[lev] <= rotmat_order: + assert len(self.m2l_rotation_lists) == len(lists) - ier, rotmatb = rotmat_builder(order, -self.m2l_rotation_angles) # noqa pylint:disable=invalid-unary-operand-type - assert (0 == ier).all() + mploc = self.get_translation_routine( + "%ddmploc", vec_suffix="2_trunc_imany") - kwargs["ldm"] = order - kwargs["nterms"] = order - kwargs["nterms1"] = order + kwargs["ldm"] = rotmat_order + kwargs["nterms"] = self.level_nterms[lev] + kwargs["nterms1"] = self.level_nterms[lev] - kwargs["rotmatf"] = rotmatf - kwargs["rotmatf_offsets"] = self.m2l_rotation_lists - kwargs["rotmatf_starts"] = starts_on_lvl + kwargs["rotmatf"] = rotmatf + kwargs["rotmatf_offsets"] = self.m2l_rotation_lists + kwargs["rotmatf_starts"] = starts_on_lvl - kwargs["rotmatb"] = rotmatb - kwargs["rotmatb_offsets"] = self.m2l_rotation_lists - kwargs["rotmatb_starts"] = starts_on_lvl + kwargs["rotmatb"] = rotmatb + kwargs["rotmatb_offsets"] = self.m2l_rotation_lists + kwargs["rotmatb_starts"] = starts_on_lvl # }}} -- GitLab From 4593c599afb523efe7036c6cdd3d0bfa04fc8873 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 31 May 2019 14:59:27 -0500 Subject: [PATCH 28/57] Improve comment --- boxtree/pyfmmlib_integration.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index 2c78973..f803a41 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -653,7 +653,8 @@ class FMMLibExpansionWrangler(object): tree = self.tree local_exps = self.local_expansion_zeros() - # Precomputed rotation matrices + # Precomputed rotation matrices (matrices of larger order can be used + # for translations of smaller order) rotmatf, rotmatb, rotmat_order = self.m2l_rotation_matrices() for lev in range(self.tree.nlevels): -- GitLab From 3dce604985423c33f019f49106a48e736eaec72a Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 31 May 2019 15:42:24 -0500 Subject: [PATCH 29/57] More comments --- boxtree/tools.py | 6 ++++++ boxtree/traversal.py | 14 ++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/boxtree/tools.py b/boxtree/tools.py index 406a04e..2bcb2c4 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -678,6 +678,12 @@ class ListRenumberer(object): >>> new_to_old array([1, 3, 4, 6]) + .. note:: + + Due to radix sort not handling signed types or floats + (https://gitlab.tiker.net/inducer/pyopencl/issues/16), + make sure *from_element_dtype* is an unsigned integer type. + """ def __init__(self, context, from_element_dtype, to_element_dtype=None): diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 4998a7b..2d82960 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1382,7 +1382,7 @@ ROTATION_COSINE_FINDER_PREAMBLE_TEMPLATE = Template(r"""//CL:mako// } /* - * Normalizes a integer vector by dividing it by its (absolute) GCD. + * Normalizes a integer vector by dividing it by its GCD. */ inline int_coord_vec_t gcd_normalize(int_coord_vec_t vec) { @@ -1409,10 +1409,11 @@ ROTATION_COSINE_FINDER_TEMPLATE = ElementwiseTemplate( operation=r"""//CL:mako// /* - * Normalize the translation vector (by dividing by its absolute GCD). + * Normalize the translation vector (by dividing by its GCD). * - * We need this, because generally in in floating point arithmetic, - * if k is a positive scalar and v is a vector, we can't assume + * We need this before computing the cosine of the rotation angle, because + * generally in in floating point arithmetic, if k is a positive scalar and v + * is a vector, we can't assume * * kv[-1] / sqrt(|kv|^2) == v[-1] / sqrt(|v|^2). * @@ -1440,6 +1441,11 @@ class _RotationClassesFinder(object): self, context, well_sep_is_n_away, dimensions, int_coord_vec_dtype, coord_dtype): + # equiv_int_dtype_for_coord_dtype is an unsigned equivalent width dtype + # for coord_dtype. We use this in ListRenumberer, which only supports + # unsigned types. (That means the rotation cosines are sorted according + # to the interpretation of the bits as unsigned integers, but the order + # doesn't matter.) if coord_dtype.itemsize == 4: self.equiv_int_dtype_for_coord_dtype = np.uint32 elif coord_dtype.itemsize == 8: -- GitLab From f089a8281dc8dfb6c369513d2c724e20cf4a69af Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 1 Jun 2019 00:25:28 -0500 Subject: [PATCH 30/57] WIP: Make rotation class computations on-the-fly. --- boxtree/traversal.py | 489 ++++++++++++++++++++++------------------- test/test_traversal.py | 17 +- 2 files changed, 270 insertions(+), 236 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 2d82960..42f1d79 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -29,7 +29,7 @@ import pyopencl.array # noqa import pyopencl.cltypes # noqa from pyopencl.elementwise import ElementwiseTemplate from mako.template import Template -from boxtree.tools import AXIS_NAMES, DeviceDataRecord +from boxtree.tools import AXIS_NAMES, DeviceDataRecord, InlineBinarySearch import logging logger = logging.getLogger(__name__) @@ -171,8 +171,6 @@ typedef ${dtype_to_ctype(box_id_dtype)} box_id_t; <% vec_types_dict = dict(vec_types) %> typedef ${dtype_to_ctype(coord_dtype)} coord_t; typedef ${dtype_to_ctype(vec_types_dict[coord_dtype, dimensions])} coord_vec_t; -typedef ${dtype_to_ctype( - vec_types_dict[np.dtype(np.int32), dimensions])} int_coord_vec_t; #define COORD_T_MACH_EPS ((coord_t) ${ repr(np.finfo(coord_dtype).eps) }) @@ -582,24 +580,6 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t target_box_number) # {{{ from well-separated siblings ("list 2") FROM_SEP_SIBLINGS_TEMPLATE = r"""//CL// -/* - * Return an integer vector indicating the translation direction as a multiple - * of the box radius. - */ -inline int_coord_vec_t get_translation_vector( - coord_t root_extent, - int level, - coord_vec_t source_center, - coord_vec_t target_center) -{ - int_coord_vec_t result = (int_coord_vec_t) 0; - %for i in range(dimensions): - result.s${i} = rint( - (target_center.s${i} - source_center.s${i}) - / LEVEL_TO_RAD(level)); - %endfor - return result; -} void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) { @@ -639,10 +619,7 @@ void generate(LIST_ARG_DECL USER_ARG_DECL box_id_t itarget_or_target_parent_box) if (sep) { - int_coord_vec_t vec = get_translation_vector( - root_extent, level, sib_center, center); APPEND_from_sep_siblings(sib_box_id); - APPEND_translation_vectors(vec); } } } @@ -1367,162 +1344,6 @@ class _ListMerger(object): # }}} -# {{{ rotation classes finder - -ROTATION_COSINE_FINDER_PREAMBLE_TEMPLATE = Template(r"""//CL:mako// - inline int gcd(int a, int b) - { - while (b) - { - int tmp = a; - a = b; - b = tmp % b; - } - return a; - } - - /* - * Normalizes a integer vector by dividing it by its GCD. - */ - inline int_coord_vec_t gcd_normalize(int_coord_vec_t vec) - { - int vec_gcd = abs(vec.s0); - - %for i in range(1, dimensions): - vec_gcd = gcd(vec_gcd, abs(vec.s${i})); - %endfor - - return vec / vec_gcd; - } - """, - strict_undefined=True) - - -ROTATION_COSINE_FINDER_TEMPLATE = ElementwiseTemplate( - arguments=r"""//CL:mako// - /* input: */ - int_coord_vec_t *translation_vectors, - - /* output: */ - coord_t *rotation_cosines, - """, - - operation=r"""//CL:mako// - /* - * Normalize the translation vector (by dividing by its GCD). - * - * We need this before computing the cosine of the rotation angle, because - * generally in in floating point arithmetic, if k is a positive scalar and v - * is a vector, we can't assume - * - * kv[-1] / sqrt(|kv|^2) == v[-1] / sqrt(|v|^2). - * - * Normalizing ensures vectors that are positive integer multiples of each - * other get classified into the same equivalence class of rotations. - */ - int_coord_vec_t vec = gcd_normalize(translation_vectors[i]); - - coord_t num = vec.s${dimensions-1}; - coord_t denom = 0; - - %for i in range(dimensions): - denom += vec.s${i} * vec.s${i}; - %endfor - - denom = sqrt(denom); - - rotation_cosines[i] = num / denom; - """) - - -class _RotationClassesFinder(object): - - def __init__( - self, context, well_sep_is_n_away, dimensions, - int_coord_vec_dtype, coord_dtype): - - # equiv_int_dtype_for_coord_dtype is an unsigned equivalent width dtype - # for coord_dtype. We use this in ListRenumberer, which only supports - # unsigned types. (That means the rotation cosines are sorted according - # to the interpretation of the bits as unsigned integers, but the order - # doesn't matter.) - if coord_dtype.itemsize == 4: - self.equiv_int_dtype_for_coord_dtype = np.uint32 - elif coord_dtype.itemsize == 8: - self.equiv_int_dtype_for_coord_dtype = np.uint64 - else: - raise ValueError("unexpected coord_dtype") - - preamble = ROTATION_COSINE_FINDER_PREAMBLE_TEMPLATE.render( - dimensions=dimensions) - - self.rotation_cosine_finder = ( - ROTATION_COSINE_FINDER_TEMPLATE.build( - context, - type_aliases=( - ("int_coord_vec_t", int_coord_vec_dtype), - ("coord_t", coord_dtype), - ), - var_values=( - ("dimensions", dimensions), - ), - more_preamble=preamble)) - - from boxtree.tools import ListRenumberer - - self.list_renumberer = ListRenumberer( - context, - from_element_dtype=self.equiv_int_dtype_for_coord_dtype, - to_element_dtype=np.int32) - - self.well_sep_is_n_away = well_sep_is_n_away - self.dimensions = dimensions - self.coord_dtype = coord_dtype - - def __call__(self, queue, translation_vectors, wait_for=None): - nvectors = len(translation_vectors) - - # This computes the cosine of the rotation angles using the formula - # - # cos(theta) = a . b / (|a| * |b|). - # - # acos() is not supported universally in double precision on GPUs, so we - # don't invert the cosines to get the rotation angles on the device. - rotation_cosines = cl.array.empty(queue, nvectors, dtype=self.coord_dtype) - - evt = self.rotation_cosine_finder( - translation_vectors, - rotation_cosines, - queue=queue, wait_for=wait_for) - - rotation_classes, rotation_class_to_cosine, evt = ( - self.list_renumberer( - rotation_cosines.view(self.equiv_int_dtype_for_coord_dtype), - queue=queue, - wait_for=[evt])) - - # Compute the rotation angles. - rotation_class_to_cosine = ( - rotation_class_to_cosine.view(self.coord_dtype).get()) - rotation_class_to_angle = np.arccos(rotation_class_to_cosine) - rotation_class_to_angle = cl.array.to_device(queue, rotation_class_to_angle) - - # Sanity check. There should be no more than 2^(d-1) * (2n+1)^d distinct - # rotation classes, since that is an upper bound on the number of - # distinct list 2 boxes. - d = self.dimensions - n = self.well_sep_is_n_away - assert len(rotation_class_to_angle) <= 2 ** (d - 1) * (2 * n + 1) ** d - - result = {} - result["rotation_classes"] = rotation_classes - result["rotation_class_to_angle"] = rotation_class_to_angle - - return result, evt - -# }}} - - # {{{ traversal info (output) class FMMTraversalInfo(DeviceDataRecord): @@ -1713,24 +1534,6 @@ class FMMTraversalInfo(DeviceDataRecord): ``box_id_t [*]`` - To facilitate matrix precomputation for rotation-based translations ("point - and shoot"), the following lists record the angle between box translation - pairs and the *z*-axis. - - .. attribute:: from_sep_siblings_rotation_classes - - ``int32 [*]`` - - A list, corresponding to *from_sep_siblings_lists*, of the rotation - class of each box pair. - - .. attribute:: from_sep_siblings_rotation_class_to_angle - - ``coord_t [nfrom_sep_siblings_rotation_classes]`` - - Maps rotation classes in *from_sep_siblings_rotation_classes* to - rotation angles. - .. ------------------------------------------------------------------------ .. rubric:: Separated Smaller Boxes ("List 3") .. ------------------------------------------------------------------------ @@ -1892,10 +1695,6 @@ class FMMTraversalInfo(DeviceDataRecord): def ntarget_or_target_parent_boxes(self): return len(self.target_or_target_parent_boxes) - @property - def nfrom_sep_siblings_rotation_classes(self): - return len(self.from_sep_siblings_rotation_class_to_angle) - # }}} @@ -1922,6 +1721,9 @@ class FMMTraversalBuilder: self.well_sep_is_n_away = well_sep_is_n_away self.from_sep_smaller_crit = from_sep_smaller_crit + def get_extra_kernel_info(self, dimensions): + pass + # {{{ kernel builder @memoize_method @@ -2060,7 +1862,6 @@ class FMMTraversalBuilder: VectorArg(box_flags_enum.dtype, "box_flags"), ] - int_coord_vec_dtype = cl.cltypes.vec_types[np.dtype(np.int32), dimensions] for list_name, template, extra_args, extra_lists, eliminate_empty_list in [ ("same_level_non_well_sep_boxes", @@ -2078,7 +1879,7 @@ class FMMTraversalBuilder: "same_level_non_well_sep_boxes_starts"), VectorArg(box_id_dtype, "same_level_non_well_sep_boxes_lists"), - ], [("translation_vectors", int_coord_vec_dtype)], []), + ], [], []), ("from_sep_smaller", FROM_SEP_SMALLER_TEMPLATE, [ ScalarArg(coord_dtype, "stick_out_factor"), @@ -2131,18 +1932,6 @@ class FMMTraversalBuilder: # }}} - # {{{ rotation classes finder - - result["from_sep_siblings_rotation_classes_finder"] = ( - _RotationClassesFinder( - self.context, - self.well_sep_is_n_away, - dimensions, - int_coord_vec_dtype, - coord_dtype)) - - # }}} - return _KernelInfo(**result) # }}} @@ -2393,18 +2182,6 @@ class FMMTraversalBuilder: wait_for = [evt] from_sep_siblings = result["from_sep_siblings"] - if tree.dimensions == 3: - result, evt = knl_info.from_sep_siblings_rotation_classes_finder( - queue, result["translation_vectors"].lists, wait_for=wait_for) - wait_for = [evt] - - from_sep_siblings_rotation_classes = result["rotation_classes"] - from_sep_siblings_rotation_class_to_angle = ( - result["rotation_class_to_angle"]) - else: - from_sep_siblings_rotation_classes = None - from_sep_siblings_rotation_class_to_angle = None - # }}} with_extent = tree.sources_have_extent or tree.targets_have_extent @@ -2566,10 +2343,6 @@ class FMMTraversalBuilder: from_sep_siblings_starts=from_sep_siblings.starts, from_sep_siblings_lists=from_sep_siblings.lists, - from_sep_siblings_rotation_classes=( - from_sep_siblings_rotation_classes), - from_sep_siblings_rotation_class_to_angle=( - from_sep_siblings_rotation_class_to_angle), from_sep_smaller_by_level=from_sep_smaller_by_level, target_boxes_sep_smaller_by_source_level=( @@ -2587,4 +2360,256 @@ class FMMTraversalBuilder: # }}} + +# {{{ rotation classes builder + +ROTATION_COSINE_FINDER_PREAMBLE_TEMPLATE = Template(r"""//CL:mako// + #define LEVEL_TO_RAD(level) \ + (root_extent * 1 / (coord_t) (1 << (level + 1))) + + /* + * Return an integer vector indicating the a translation direction + * as a multiple of the box radius. + */ + inline int_coord_vec_t get_translation_vector( + coord_t root_extent, + int level, + coord_vec_t source_center, + coord_vec_t target_center) + { + int_coord_vec_t result = (int_coord_vec_t) 0; + %for i in range(dimensions): + result.s${i} = rint( + (target_center.s${i} - source_center.s${i}) + / LEVEL_TO_RAD(level)); + %endfor + return result; + } + + /* + * Compute the gcd of two positive integers. + */ + inline int gcd(int a, int b) + { + while (b) + { + int tmp = a; + a = b; + b = tmp % b; + } + return a; + } + + /* + * Normalizes a integer vector by dividing it by its GCD. + */ + inline int_coord_vec_t gcd_normalize(int_coord_vec_t vec) + { + int vec_gcd = abs(vec.s0); + + %for i in range(1, dimensions): + vec_gcd = gcd(vec_gcd, abs(vec.s${i})); + %endfor + + return vec / vec_gcd; + } + """ + str(InlineBinarySearch("right", "box_id_t")), + strict_undefined=True) + + +ROTATION_COSINE_FINDER_TEMPLATE = ElementwiseTemplate( + arguments=r"""//CL:mako// + /* input: */ + box_id_t *from_sep_siblings_lists, + box_id_t *from_sep_siblings_starts, + box_id_t *target_or_target_parent_boxes, + int ntarget_or_target_parent_boxes, + coord_t *box_centers, + int aligned_nboxes, + coord_t root_extent, + box_level_t *box_levels, + + /* output: */ + coord_t *rotation_cosines, + """, + + operation=TRAVERSAL_PREAMBLE_MAKO_DEFS + r"""//CL:mako// + /* + * Find the target box for this source box. + */ + box_id_t source_box_id = from_sep_siblings_lists[i]; + + size_t itarget_box = bsearch( + from_sep_siblings_starts, 1 + ntarget_or_target_parent_boxes, i); + + box_id_t target_box_id = target_or_target_parent_boxes[itarget_box]; + + /* + * Compute the translation vector. + */ + ${load_center("source_center", "source_box_id")} + ${load_center("target_center", "target_box_id")} + + int_coord_vec_t translation_vector = get_translation_vector( + root_extent, box_levels[source_box_id], source_center, target_center); + + /* + * Normalize the translation vector (by dividing by its GCD). + * + * We need this before computing the cosine of the rotation angle, because + * generally in in floating point arithmetic, if k is a positive scalar and v + * is a vector, we can't assume + * + * kv[-1] / sqrt(|kv|^2) == v[-1] / sqrt(|v|^2). + * + * Normalizing ensures vectors that are positive integer multiples of each + * other get classified into the same equivalence class of rotations. + */ + int_coord_vec_t vec = gcd_normalize(translation_vector); + + coord_t num = vec.s${dimensions-1}; + coord_t denom = 0; + + %for i in range(dimensions): + denom += vec.s${i} * vec.s${i}; + %endfor + + denom = sqrt(denom); + + rotation_cosines[i] = num / denom; + """) + + +class RotationClassesBuilder(object): + """Build rotation classes for List 2 translations. + + This class builds lists to facilitate matrix precomputations for + rotation-based translations ("point and shoot"), + """ + + def __init__(self, context, well_sep_is_n_away, dimensions, box_id_dtype, + box_level_dtype, coord_dtype): + + coord_vec_dtype = cl.cltypes.vec_types[coord_dtype, dimensions] + int_coord_vec_dtype = cl.cltypes.vec_types[np.dtype(np.int32), dimensions] + + # equiv_int_dtype_for_coord_dtype is an unsigned equivalent width dtype + # for coord_dtype. We use this in ListRenumberer, which only supports + # unsigned types. (That means the rotation cosines are sorted according + # to the interpretation of the bits as unsigned integers, but the order + # doesn't matter.) + if coord_dtype.itemsize == 4: + self.equiv_int_dtype_for_coord_dtype = np.uint32 + elif coord_dtype.itemsize == 8: + self.equiv_int_dtype_for_coord_dtype = np.uint64 + else: + raise ValueError("unexpected coord_dtype") + + preamble = ROTATION_COSINE_FINDER_PREAMBLE_TEMPLATE.render( + dimensions=dimensions) + + self.rotation_cosine_finder = ( + ROTATION_COSINE_FINDER_TEMPLATE.build( + context, + type_aliases=( + ("int_coord_vec_t", int_coord_vec_dtype), + ("coord_vec_t", coord_vec_dtype), + ("coord_t", coord_dtype), + ("box_id_t", box_id_dtype), + ("box_level_t", box_level_dtype), + ), + var_values=( + ("dimensions", dimensions), + ), + more_preamble=preamble)) + + from boxtree.tools import ListRenumberer + + self.list_renumberer = ListRenumberer( + context, + from_element_dtype=self.equiv_int_dtype_for_coord_dtype, + to_element_dtype=np.int32) + + self.well_sep_is_n_away = well_sep_is_n_away + self.dimensions = dimensions + self.coord_dtype = coord_dtype + + def __call__(self, queue, trav, wait_for=None): + """Build a dictionary with information about List 2 rotation classes. + + The following entries are in the dictionary. + + .. attribute:: nfrom_sep_siblings_rotation_classes + + The number of distinct rotation classes. + + .. attribute:: from_sep_siblings_rotation_classes + + ``int32 [*]`` + + A list, corresponding to *from_sep_siblings_lists* of *trav*, of + the rotation class of each box pair. + + .. attribute:: from_sep_siblings_rotation_class_to_angle + + ``coord_t [nfrom_sep_siblings_rotation_classes]`` + + Maps rotation classes in *from_sep_siblings_rotation_classes* to + rotation angles. This represents the angle between box translation + pairs and the *z*-axis. + """ + trav = trav.to_device(queue) + tree = trav.tree.to_device(queue) + + nitems = len(trav.from_sep_siblings_lists) + + # This computes the cosine of the rotation angles using the formula + # + # cos(theta) = a . b / (|a| * |b|). + # + # acos() is not supported universally in double precision on GPUs, so we + # don't invert the cosines to get the rotation angles on the device. + rotation_cosines = cl.array.empty(queue, nitems, dtype=self.coord_dtype) + + evt = self.rotation_cosine_finder( + trav.from_sep_siblings_lists, + trav.from_sep_siblings_starts, + trav.target_or_target_parent_boxes, + trav.ntarget_or_target_parent_boxes, + tree.box_centers, + tree.aligned_nboxes, + tree.root_extent, + tree.box_levels, + rotation_cosines, + queue=queue, wait_for=wait_for) + + rotation_classes, rotation_class_to_cosine, evt = ( + self.list_renumberer( + rotation_cosines.view(self.equiv_int_dtype_for_coord_dtype), + queue=queue, + wait_for=[evt])) + + # Compute the rotation angles. + rotation_class_to_cosine = ( + rotation_class_to_cosine.view(self.coord_dtype).get()) + rotation_class_to_angle = np.arccos(rotation_class_to_cosine) + rotation_class_to_angle = cl.array.to_device(queue, rotation_class_to_angle) + + # Sanity check. There should be no more than 2^(d-1) * (2n+1)^d distinct + # rotation classes, since that is an upper bound on the number of + # distinct list 2 boxes. + d = self.dimensions + n = self.well_sep_is_n_away + assert len(rotation_class_to_angle) <= 2**(d-1) * (2*n+1)**d + + result = {} + result["from_sep_siblings_rotation_classes"] = ( + rotation_classes.with_queue(None)) + result["from_sep_siblings_rotation_class_to_angle"] = ( + rotation_class_to_angle.with_queue(None)) + + return result, evt + +# }}} + # vim: filetype=pyopencl:fdm=marker diff --git a/test/test_traversal.py b/test/test_traversal.py index cd37105..8c55cce 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -377,7 +377,8 @@ def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away): # {{{ build traversal - from boxtree.traversal import FMMTraversalBuilder + from boxtree.traversal import FMMTraversalBuilder, RotationClassesBuilder + tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away) trav, _ = tg(queue, tree) @@ -386,6 +387,14 @@ def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away): centers = tree.box_centers.T + rb = RotationClassesBuilder(ctx, well_sep_is_n_away, tree.dimensions, + tree.box_id_dtype, tree.box_level_dtype, tree.coord_dtype) + + result, _ = rb(queue, trav) + + rot_classes = result["from_sep_siblings_rotation_classes"].get(queue) + rot_angles = result["from_sep_siblings_rotation_class_to_angle"].get(queue) + # }}} # For each entry of from_sep_siblings, compute the source-target translation @@ -396,15 +405,15 @@ def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away): for itgt_box, tgt_ibox in enumerate(trav.target_or_target_parent_boxes): start, end = trav.from_sep_siblings_starts[itgt_box:itgt_box+2] seps = trav.from_sep_siblings_lists[start:end] - rot_classes = trav.from_sep_siblings_rotation_classes[start:end] + level_rot_classes = rot_classes[start:end] translation_vecs = centers[tgt_ibox] - centers[seps] theta = np.arctan2( la.norm(translation_vecs[:, :dims - 1], axis=1), translation_vecs[:, dims - 1]) - rot_angles = trav.from_sep_siblings_rotation_class_to_angle[rot_classes] + level_rot_angles = rot_angles[level_rot_classes] - assert np.allclose(theta, rot_angles, atol=1e-13, rtol=1e-13) + assert np.allclose(theta, level_rot_angles, atol=1e-13, rtol=1e-13) # }}} -- GitLab From 2c86676accba2db60a089af2e56484c6ed05a269 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 1 Jun 2019 00:25:47 -0500 Subject: [PATCH 31/57] WIP: Add a geometry data interface to the wrangler --- boxtree/pyfmmlib_integration.py | 73 ++++++++++++++++++++++++++++++--- 1 file changed, 68 insertions(+), 5 deletions(-) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index f803a41..a4d55f6 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -37,12 +37,73 @@ __doc__ = """Integrates :mod:`boxtree` with `pyfmmlib `_. """ -# {{{ constants -# Only use M2L translations with precomputed rotation matrices ("optimized M2L") -# when the total size of the precomputation is below a certain amount of bytes. +# {{{ geometry data interface -_ROTMAT_PRECOMPUTATION_CUTOFF_BYTES = 10**8 +class FMMLibGeometryDataInterface(object): + """Abstract interface for additional, optional geometry data passed to the + expansion wrangler. + + """ + + def m2l_rotation_classes(self): + """Return a NumPy array mapping entries of List 2 to rotation classes, + or *None* if not available. + + If available, this list should follow the same format as the list + `from_sep_siblings_rotation_classes`. + """ + raise NotImplementedError + + def m2l_rotation_angles(self): + """Return a NumPy array mapping rotation classes to rotation angles, + or *None* if not available. + """ + raise NotImplementedError + + +class EmptyFMMLibGeometryData(FMMLibGeometryDataInterface): + """A default implementation of the :class:`FMMLibGeometryDataInterface`. + Returns no data. + + """ + + def m2l_rotation_classes(self): + return None + + def m2l_rotation_angles(self): + return None + + +class FMMLibGeometryData(FMMLibGeometryDataInterface): + """An implementation of the :class:`FMMLibGeometryDataInterface`.""" + + def __init__(self, queue, trav): + self.queue = queue + self.trav = trav + self.tree = trav.tree + + @property + @memoize_method + def rotation_classes_builder(self): + return RotationClassesBuilder( + self.queue.context, + self.trav.well_sep_is_n_away, + self.trav.dimensions, + self.tree.box_id_dtype, + self.tree.coord_dtype) + + @memoize_method + def build_rotation_classes_lists(self): + return self.rotation_classes_builder(self.queue, self.trav) + + @memoize_method + def m2l_rotation_classes(self): + return self.build_rotation_classes_lists()["rotation_classes"] + + @memoize_method + def m2l_rotation_angles(self): + return self.build_rotation_classes_lists()["rotation_angles"] # }}} @@ -60,7 +121,8 @@ class FMMLibExpansionWrangler(object): def __init__(self, tree, helmholtz_k, fmm_level_to_nterms=None, ifgrad=False, dipole_vec=None, dipoles_already_reordered=False, nterms=None, - m2l_rotation_lists=None, m2l_rotation_angles=None): + optimized_m2l_precomputation_memory_cutoff_bytes=10**8, + geo_data=EmptyFMMLibGeometryData()): """ :arg fmm_level_to_nterms: a callable that, upon being passed the tree and the tree level as an integer, returns the value of *nterms* for the @@ -88,6 +150,7 @@ class FMMLibExpansionWrangler(object): return nterms self.tree = tree + self.geo_data = geo_data if helmholtz_k == 0: self.eqn_letter = "l" -- GitLab From df4747c15fce731b212106dd6f7fe68dc58c1f9f Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 1 Jun 2019 12:38:08 -0500 Subject: [PATCH 32/57] Finish refactoring rotation classes generation --- boxtree/traversal.py | 96 +++++++++++++++++++++--------------------- test/test_traversal.py | 16 +++---- 2 files changed, 55 insertions(+), 57 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 42f1d79..c5f0a13 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1721,9 +1721,6 @@ class FMMTraversalBuilder: self.well_sep_is_n_away = well_sep_is_n_away self.from_sep_smaller_crit = from_sep_smaller_crit - def get_extra_kernel_info(self, dimensions): - pass - # {{{ kernel builder @memoize_method @@ -1862,7 +1859,6 @@ class FMMTraversalBuilder: VectorArg(box_flags_enum.dtype, "box_flags"), ] - for list_name, template, extra_args, extra_lists, eliminate_empty_list in [ ("same_level_non_well_sep_boxes", SAME_LEVEL_NON_WELL_SEP_BOXES_TEMPLATE, [], [], []), @@ -1897,7 +1893,7 @@ class FMMTraversalBuilder: "from_sep_smaller_min_nsources_cumul"), ScalarArg(box_id_dtype, "from_sep_smaller_source_level"), ], - [("from_sep_close_smaller", box_id_dtype)] + ["from_sep_close_smaller"] if sources_have_extent or targets_have_extent else [], ["from_sep_smaller"]), ("from_sep_bigger", FROM_SEP_BIGGER_TEMPLATE, @@ -1911,11 +1907,10 @@ class FMMTraversalBuilder: VectorArg(box_id_dtype, "same_level_non_well_sep_boxes_lists"), ], - [("from_sep_close_bigger", box_id_dtype)] + ["from_sep_close_bigger"] if sources_have_extent or targets_have_extent else [], []), ]: - src = Template( TRAVERSAL_PREAMBLE_TEMPLATE + HELPER_FUNCTION_TEMPLATE @@ -1923,7 +1918,9 @@ class FMMTraversalBuilder: strict_undefined=True).render(**render_vars) result[list_name+"_builder"] = ListOfListsBuilder(self.context, - [(list_name, box_id_dtype)] + extra_lists, + [(list_name, box_id_dtype)] + + [(extra_list_name, box_id_dtype) + for extra_list_name in extra_lists], str(src), arg_decls=base_args + extra_args, debug=debug, name_prefix=list_name, @@ -2480,16 +2477,42 @@ ROTATION_COSINE_FINDER_TEMPLATE = ElementwiseTemplate( """) +class RotationClassesInfo(DeviceDataRecord): + r"""Interaction lists to help with matrix precomputations for rotation-based + translations ("point and shoot"), + + .. attribute:: nfrom_sep_siblings_rotation_classes + + The number of distinct rotation classes. + + .. attribute:: from_sep_siblings_rotation_classes + + ``int32 [*]`` + + A list, corresponding to *from_sep_siblings_lists* of *trav*, of + the rotation class of each box pair. + + .. attribute:: from_sep_siblings_rotation_class_to_angle + + ``coord_t [nfrom_sep_siblings_rotation_classes]`` + + Maps rotation classes in *from_sep_siblings_rotation_classes* to + rotation angles. This represents the angle between box translation + pairs and the *z*-axis. + + """ + + @property + def nfrom_sep_siblings_rotation_classes(self): + return len(self.from_sep_siblings_rotation_class_to_angle) + + class RotationClassesBuilder(object): """Build rotation classes for List 2 translations. - - This class builds lists to facilitate matrix precomputations for - rotation-based translations ("point and shoot"), """ def __init__(self, context, well_sep_is_n_away, dimensions, box_id_dtype, box_level_dtype, coord_dtype): - coord_vec_dtype = cl.cltypes.vec_types[coord_dtype, dimensions] int_coord_vec_dtype = cl.cltypes.vec_types[np.dtype(np.int32), dimensions] @@ -2534,34 +2557,14 @@ class RotationClassesBuilder(object): self.dimensions = dimensions self.coord_dtype = coord_dtype + @log_process(logger, "build rotation classes") def __call__(self, queue, trav, wait_for=None): - """Build a dictionary with information about List 2 rotation classes. - - The following entries are in the dictionary. - - .. attribute:: nfrom_sep_siblings_rotation_classes - - The number of distinct rotation classes. - - .. attribute:: from_sep_siblings_rotation_classes - - ``int32 [*]`` - - A list, corresponding to *from_sep_siblings_lists* of *trav*, of - the rotation class of each box pair. - - .. attribute:: from_sep_siblings_rotation_class_to_angle - - ``coord_t [nfrom_sep_siblings_rotation_classes]`` - - Maps rotation classes in *from_sep_siblings_rotation_classes* to - rotation angles. This represents the angle between box translation - pairs and the *z*-axis. + """Returns a pair *info*, *evt* where info is a :class:`RotationClassesInfo`. """ - trav = trav.to_device(queue) - tree = trav.tree.to_device(queue) + rotation_cosines = cl.array.empty( + queue, len(trav.from_sep_siblings_lists), dtype=self.coord_dtype) - nitems = len(trav.from_sep_siblings_lists) + tree = trav.tree # This computes the cosine of the rotation angles using the formula # @@ -2569,8 +2572,6 @@ class RotationClassesBuilder(object): # # acos() is not supported universally in double precision on GPUs, so we # don't invert the cosines to get the rotation angles on the device. - rotation_cosines = cl.array.empty(queue, nitems, dtype=self.coord_dtype) - evt = self.rotation_cosine_finder( trav.from_sep_siblings_lists, trav.from_sep_siblings_starts, @@ -2595,20 +2596,17 @@ class RotationClassesBuilder(object): rotation_class_to_angle = np.arccos(rotation_class_to_cosine) rotation_class_to_angle = cl.array.to_device(queue, rotation_class_to_angle) - # Sanity check. There should be no more than 2^(d-1) * (2n+1)^d distinct - # rotation classes, since that is an upper bound on the number of - # distinct list 2 boxes. + # There should be no more than 2^(d-1) * (2n+1)^d distinct rotation + # classes, since that is an upper bound on the number of distinct list 2 + # boxes. d = self.dimensions n = self.well_sep_is_n_away assert len(rotation_class_to_angle) <= 2**(d-1) * (2*n+1)**d - result = {} - result["from_sep_siblings_rotation_classes"] = ( - rotation_classes.with_queue(None)) - result["from_sep_siblings_rotation_class_to_angle"] = ( - rotation_class_to_angle.with_queue(None)) - - return result, evt + return RotationClassesInfo( + from_sep_siblings_rotation_classes=rotation_classes, + from_sep_siblings_rotation_class_to_angle=rotation_class_to_angle, + ).with_queue(None), evt # }}} diff --git a/test/test_traversal.py b/test/test_traversal.py index 8c55cce..07027ba 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -378,22 +378,22 @@ def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away): # {{{ build traversal from boxtree.traversal import FMMTraversalBuilder, RotationClassesBuilder - + tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away) trav, _ = tg(queue, tree) - tree = tree.get(queue=queue) - trav = trav.get(queue=queue) - - centers = tree.box_centers.T - rb = RotationClassesBuilder(ctx, well_sep_is_n_away, tree.dimensions, tree.box_id_dtype, tree.box_level_dtype, tree.coord_dtype) result, _ = rb(queue, trav) - rot_classes = result["from_sep_siblings_rotation_classes"].get(queue) - rot_angles = result["from_sep_siblings_rotation_class_to_angle"].get(queue) + rot_classes = result.from_sep_siblings_rotation_classes.get(queue) + rot_angles = result.from_sep_siblings_rotation_class_to_angle.get(queue) + + tree = tree.get(queue=queue) + trav = trav.get(queue=queue) + + centers = tree.box_centers.T # }}} -- GitLab From 4167cf1cc10d4c5696095407dde05dc9d3751fa3 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 1 Jun 2019 12:59:24 -0500 Subject: [PATCH 33/57] Finish refactoring pyfmmlib interface --- boxtree/pyfmmlib_integration.py | 97 ++++++++++++++------------------- boxtree/traversal.py | 4 +- doc/fmm.rst | 4 ++ test/test_fmm.py | 6 +- test/test_traversal.py | 2 +- 5 files changed, 50 insertions(+), 63 deletions(-) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index a4d55f6..c04f387 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -44,37 +44,22 @@ class FMMLibGeometryDataInterface(object): """Abstract interface for additional, optional geometry data passed to the expansion wrangler. - """ + .. automethod:: m2l_rotation_lists - def m2l_rotation_classes(self): - """Return a NumPy array mapping entries of List 2 to rotation classes, - or *None* if not available. + .. automethod:: m2l_rotation_angles + """ - If available, this list should follow the same format as the list - `from_sep_siblings_rotation_classes`. + def m2l_rotation_lists(self): + """Return a NumPy array mapping entries of List 2 to rotation classes. """ raise NotImplementedError def m2l_rotation_angles(self): - """Return a NumPy array mapping rotation classes to rotation angles, - or *None* if not available. + """Return a NumPy array mapping List 2 rotation classes to rotation angles. """ raise NotImplementedError -class EmptyFMMLibGeometryData(FMMLibGeometryDataInterface): - """A default implementation of the :class:`FMMLibGeometryDataInterface`. - Returns no data. - - """ - - def m2l_rotation_classes(self): - return None - - def m2l_rotation_angles(self): - return None - - class FMMLibGeometryData(FMMLibGeometryDataInterface): """An implementation of the :class:`FMMLibGeometryDataInterface`.""" @@ -86,24 +71,34 @@ class FMMLibGeometryData(FMMLibGeometryDataInterface): @property @memoize_method def rotation_classes_builder(self): + from boxtree.traversal import RotationClassesBuilder return RotationClassesBuilder( self.queue.context, self.trav.well_sep_is_n_away, - self.trav.dimensions, + self.tree.dimensions, self.tree.box_id_dtype, + self.tree.box_level_dtype, self.tree.coord_dtype) @memoize_method def build_rotation_classes_lists(self): - return self.rotation_classes_builder(self.queue, self.trav) + trav = self.trav.to_device(self.queue) + tree = self.tree.to_device(self.queue) + return self.rotation_classes_builder(self.queue, trav, tree)[0] @memoize_method - def m2l_rotation_classes(self): - return self.build_rotation_classes_lists()["rotation_classes"] + def m2l_rotation_lists(self): + return (self + .build_rotation_classes_lists() + .from_sep_siblings_rotation_classes + .get(self.queue)) @memoize_method def m2l_rotation_angles(self): - return self.build_rotation_classes_lists()["rotation_angles"] + return (self + .build_rotation_classes_lists() + .from_sep_siblings_rotation_class_to_angle + .get(self.queue)) # }}} @@ -122,19 +117,11 @@ class FMMLibExpansionWrangler(object): def __init__(self, tree, helmholtz_k, fmm_level_to_nterms=None, ifgrad=False, dipole_vec=None, dipoles_already_reordered=False, nterms=None, optimized_m2l_precomputation_memory_cutoff_bytes=10**8, - geo_data=EmptyFMMLibGeometryData()): + geo_data=None): """ :arg fmm_level_to_nterms: a callable that, upon being passed the tree and the tree level as an integer, returns the value of *nterms* for the multipole and local expansions on that level. - - :arg m2l_rotation_lists: corresponds to the - *from_sep_siblings_rotation_classes* attribute of the traversal - object - - :arg m2l_rotation_angles:corresponds to the - *from_sep_siblings_rotation_class_to_angle* attribute of the traversal - object """ if nterms is not None and fmm_level_to_nterms is not None: @@ -151,6 +138,7 @@ class FMMLibExpansionWrangler(object): self.tree = tree self.geo_data = geo_data + self.rotmat_cutoff_bytes = optimized_m2l_precomputation_memory_cutoff_bytes if helmholtz_k == 0: self.eqn_letter = "l" @@ -175,6 +163,8 @@ class FMMLibExpansionWrangler(object): self.dim = tree.dimensions + self.supports_optimized_m2l = self.dim == 3 and geo_data is not None + if dipole_vec is not None: assert dipole_vec.shape == (self.dim, self.tree.nsources) @@ -187,14 +177,6 @@ class FMMLibExpansionWrangler(object): self.dipole_vec = None self.dp_suffix = "" - self.supports_optimized_m2l = ( - tree.dimensions == 3 - and m2l_rotation_lists is not None - and m2l_rotation_angles is not None) - - self.m2l_rotation_lists = m2l_rotation_lists - self.m2l_rotation_angles = m2l_rotation_angles - # }}} def level_to_rscale(self, level): @@ -667,13 +649,6 @@ class FMMLibExpansionWrangler(object): # translation order of the matrices. rotmat_order is -1 if not # supported. - def mem_estimate(order): - # Rotation matrix memory cost estimate. - return (8 - * (order + 1)**2 - * (2*order + 1) - * len(self.m2l_rotation_angles)) - rotmatf = None rotmatb = None rotmat_order = -1 @@ -681,11 +656,20 @@ class FMMLibExpansionWrangler(object): if not self.supports_optimized_m2l: return (rotmatf, rotmatb, rotmat_order) + m2l_rotation_angles = self.geo_data.m2l_rotation_angles() + + def mem_estimate(order): + # Rotation matrix memory cost estimate. + return (8 + * (order + 1)**2 + * (2*order + 1) + * len(m2l_rotation_angles)) + # Find the largest order we can use. Because the memory cost of the # matrices could be large, only precompute them if the cost estimate # for the order does not exceed the cutoff. for order in sorted(self.level_nterms, key=lambda x: -x): - if mem_estimate(order) < _ROTMAT_PRECOMPUTATION_CUTOFF_BYTES: + if mem_estimate(order) < self.rotmat_cutoff_bytes: rotmat_order = order break @@ -696,11 +680,11 @@ class FMMLibExpansionWrangler(object): from pyfmmlib import rotviarecur3p_init_vec as rotmat_builder ier, rotmatf = ( - rotmat_builder(rotmat_order, self.m2l_rotation_angles)) + rotmat_builder(rotmat_order, m2l_rotation_angles)) assert (0 == ier).all() ier, rotmatb = ( - rotmat_builder(rotmat_order, -self.m2l_rotation_angles)) # noqa pylint:disable=invalid-unary-operand-type + rotmat_builder(rotmat_order, -m2l_rotation_angles)) # noqa pylint:disable=invalid-unary-operand-type assert (0 == ier).all() return (rotmatf, rotmatb, rotmat_order) @@ -734,7 +718,8 @@ class FMMLibExpansionWrangler(object): # {{{ set up optimized m2l, if applicable if self.level_nterms[lev] <= rotmat_order: - assert len(self.m2l_rotation_lists) == len(lists) + m2l_rotation_lists = self.geo_data.m2l_rotation_lists() + assert len(m2l_rotation_lists) == len(lists) mploc = self.get_translation_routine( "%ddmploc", vec_suffix="2_trunc_imany") @@ -744,11 +729,11 @@ class FMMLibExpansionWrangler(object): kwargs["nterms1"] = self.level_nterms[lev] kwargs["rotmatf"] = rotmatf - kwargs["rotmatf_offsets"] = self.m2l_rotation_lists + kwargs["rotmatf_offsets"] = m2l_rotation_lists kwargs["rotmatf_starts"] = starts_on_lvl kwargs["rotmatb"] = rotmatb - kwargs["rotmatb_offsets"] = self.m2l_rotation_lists + kwargs["rotmatb_offsets"] = m2l_rotation_lists kwargs["rotmatb_starts"] = starts_on_lvl # }}} diff --git a/boxtree/traversal.py b/boxtree/traversal.py index c5f0a13..49b1861 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -2558,14 +2558,12 @@ class RotationClassesBuilder(object): self.coord_dtype = coord_dtype @log_process(logger, "build rotation classes") - def __call__(self, queue, trav, wait_for=None): + def __call__(self, queue, trav, tree, wait_for=None): """Returns a pair *info*, *evt* where info is a :class:`RotationClassesInfo`. """ rotation_cosines = cl.array.empty( queue, len(trav.from_sep_siblings_lists), dtype=self.coord_dtype) - tree = trav.tree - # This computes the cosine of the rotation angles using the formula # # cos(theta) = a . b / (|a| * |b|). diff --git a/doc/fmm.rst b/doc/fmm.rst index 97ea4e0..3d4656b 100644 --- a/doc/fmm.rst +++ b/doc/fmm.rst @@ -19,6 +19,10 @@ Integration with PyFMMLib .. automodule:: boxtree.pyfmmlib_integration +.. autoclass:: FMMLibGeometryDataInterface + +.. autoclass:: FMMLibGeometryData + .. autoclass:: FMMLibExpansionWrangler .. vim: sw=4 diff --git a/test/test_fmm.py b/test/test_fmm.py index 6f7580d..b26ee5f 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -671,7 +671,8 @@ def test_fmm_with_optimized_3d_m2l(ctx_factory, helmholtz_k, well_sep_is_n_away) return result - from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler + from boxtree.pyfmmlib_integration import ( + FMMLibExpansionWrangler, FMMLibGeometryData) baseline_wrangler = FMMLibExpansionWrangler( trav.tree, helmholtz_k, @@ -680,8 +681,7 @@ def test_fmm_with_optimized_3d_m2l(ctx_factory, helmholtz_k, well_sep_is_n_away) optimized_wrangler = FMMLibExpansionWrangler( trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms, - m2l_rotation_lists=trav.from_sep_siblings_rotation_classes, - m2l_rotation_angles=trav.from_sep_siblings_rotation_class_to_angle) + geo_data=FMMLibGeometryData(queue, trav)) from boxtree.fmm import drive_fmm diff --git a/test/test_traversal.py b/test/test_traversal.py index 07027ba..54e97d6 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -385,7 +385,7 @@ def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away): rb = RotationClassesBuilder(ctx, well_sep_is_n_away, tree.dimensions, tree.box_id_dtype, tree.box_level_dtype, tree.coord_dtype) - result, _ = rb(queue, trav) + result, _ = rb(queue, trav, tree) rot_classes = result.from_sep_siblings_rotation_classes.get(queue) rot_angles = result.from_sep_siblings_rotation_class_to_angle.get(queue) -- GitLab From 68ac53cca79c2b9d9f95a61dcad3330622044356 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 1 Jun 2019 13:06:15 -0500 Subject: [PATCH 34/57] Remove pylint:disable= comment, probably not needed --- boxtree/pyfmmlib_integration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index c04f387..00cbbac 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -684,7 +684,7 @@ class FMMLibExpansionWrangler(object): assert (0 == ier).all() ier, rotmatb = ( - rotmat_builder(rotmat_order, -m2l_rotation_angles)) # noqa pylint:disable=invalid-unary-operand-type + rotmat_builder(rotmat_order, -m2l_rotation_angles)) assert (0 == ier).all() return (rotmatf, rotmatb, rotmat_order) -- GitLab From 7a9209e786b1cbbe6e46bc0830c4f4aea2da3b96 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 1 Jun 2019 18:54:21 -0500 Subject: [PATCH 35/57] Simplify API for rotation classes builder, and use a more efficient rotation class builder algorithm that doesn't rely on radix sort. --- boxtree/area_query.py | 2 +- boxtree/pyfmmlib_integration.py | 9 +- boxtree/tools.py | 116 +------------ boxtree/traversal.py | 285 +++++++++++++++++++------------- test/test_tools.py | 17 -- test/test_traversal.py | 4 +- 6 files changed, 182 insertions(+), 251 deletions(-) diff --git a/boxtree/area_query.py b/boxtree/area_query.py index cb7a886..700b647 100644 --- a/boxtree/area_query.py +++ b/boxtree/area_query.py @@ -461,7 +461,7 @@ STARTS_EXPANDER_TEMPLATE = ElementwiseTemplate( dst[i] = bsearch(starts, starts_len, i); """, name="starts_expander", - preamble=str(InlineBinarySearch("right", "idx_t"))) + preamble=str(InlineBinarySearch("idx_t"))) # }}} diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index 00cbbac..6f09aeb 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -72,13 +72,7 @@ class FMMLibGeometryData(FMMLibGeometryDataInterface): @memoize_method def rotation_classes_builder(self): from boxtree.traversal import RotationClassesBuilder - return RotationClassesBuilder( - self.queue.context, - self.trav.well_sep_is_n_away, - self.tree.dimensions, - self.tree.box_id_dtype, - self.tree.box_level_dtype, - self.tree.coord_dtype) + return RotationClassesBuilder(self.queue.context) @memoize_method def build_rotation_classes_lists(self): @@ -718,6 +712,7 @@ class FMMLibExpansionWrangler(object): # {{{ set up optimized m2l, if applicable if self.level_nterms[lev] <= rotmat_order: + print("using optimized with order", rotmat_order) m2l_rotation_lists = self.geo_data.m2l_rotation_lists() assert len(m2l_rotation_lists) == len(lists) diff --git a/boxtree/tools.py b/boxtree/tools.py index 2bcb2c4..5dc5831 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -581,22 +581,15 @@ from mako.template import Template BINARY_SEARCH_TEMPLATE = Template(""" /* - * If "bias" = "left", returns the smallest value of i such that - * val <= arr[i], or (size_t) -1 if val is greater than all values. - * - * If "bias" = "right", returns the largest value of i such that - * arr[i] <= val, or (size_t) -1 if val is less than all values. + * Returns the largest value of i such that arr[i] <= val, or (size_t) -1 if val + * is less than all values. */ inline size_t bsearch( __global const ${elem_t} *arr, size_t len, const ${elem_t} val) { -%if bias == "left": - if (arr[len - 1] < val) -%elif bias == "right": if (val < arr[0]) -%endif { return -1; } @@ -607,20 +600,12 @@ inline size_t bsearch( { i = l + (r - l) / 2; - %if bias == "left": - if (val <= arr[i] && (i == 0 || arr[i - 1] < val)) - %elif bias == "right": if (arr[i] <= val && (i == len - 1 || val < arr[i + 1])) - %endif { return i; } - %if bias == "left": - if (arr[i] < val) - %elif bias == "right": if (arr[i] <= val) - %endif { l = i; } @@ -635,8 +620,8 @@ inline size_t bsearch( class InlineBinarySearch(object): - def __init__(self, bias, elem_type_name): - self.render_vars = {"bias": bias, "elem_t": elem_type_name} + def __init__(self, elem_type_name): + self.render_vars = {"elem_t": elem_type_name} @memoize_method def __str__(self): @@ -645,99 +630,6 @@ class InlineBinarySearch(object): # }}} -# {{{ list renumberer - -LIST_RENUMBERER_TEMPLATE = ElementwiseTemplate( - arguments=r""" - elem_t *orig, - elem_t *sorted, - renumbered_t *renumbered_orig, - unsigned int len, - """, - operation=r"""//CL// - renumbered_orig[i] = bsearch(sorted, len, orig[i]); - """, - name="list_renumberer") - - -class ListRenumberer(object): - """Renumber a list (with repetition) of N values to the range [0,1,...,N). - - Useful e.g. for compacting lists into a dense range. - - Returns a tuple (*renumbered*, *new_to_old*, *evt*), where *renumbered* is - the renumbered list and *new_to_old* maps renumbered values to old ones. - - Example:: - - >>> arr = cl.array.to_device(queue, np.array([1, 6, 6, 4, 3])) - >>> lr = ListRenumberer(ctx, np.int) - >>> renumbered, new_to_old, evt = lr(arr) - >>> renumbered - array([0, 3, 3, 2, 1]) - >>> new_to_old - array([1, 3, 4, 6]) - - .. note:: - - Due to radix sort not handling signed types or floats - (https://gitlab.tiker.net/inducer/pyopencl/issues/16), - make sure *from_element_dtype* is an unsigned integer type. - - """ - - def __init__(self, context, from_element_dtype, to_element_dtype=None): - from pyopencl.algorithm import RadixSort, unique - - if to_element_dtype is None: - to_element_dtype = from_element_dtype - - self.sort = RadixSort( - context, - [VectorArg(from_element_dtype, "arr")], - "arr[i]", - ["arr"], - key_dtype=from_element_dtype) - - self.uniq = unique - - self.list_renumberer = ( - LIST_RENUMBERER_TEMPLATE.build( - context, - type_aliases=( - ("elem_t", from_element_dtype), - ("renumbered_t", to_element_dtype), - ), - var_values=(), - more_preamble=str(InlineBinarySearch("left", "elem_t")))) - - self.to_element_dtype = to_element_dtype - - def __call__(self, items, queue=None, wait_for=None): - if queue is None: - queue = items.queue - - (sorted_items,), evt = self.sort(items, queue=queue, wait_for=wait_for) - - new_to_old, uniq_count, evt = self.uniq(sorted_items, wait_for=[evt]) - - uniq_count = uniq_count.get() - - renumbered_items = cl.array.empty( - queue, len(items), dtype=self.to_element_dtype) - - new_to_old = new_to_old[:uniq_count] - - evt = self.list_renumberer( - items, new_to_old, renumbered_items, len(new_to_old), - queue=queue, - wait_for=[evt] + renumbered_items.events + new_to_old.events) - - return renumbered_items, new_to_old, evt - -# }}} - - # {{{ constant one wrangler class ConstantOneExpansionWrangler(object): diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 49b1861..ccde8b1 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -2360,13 +2360,13 @@ class FMMTraversalBuilder: # {{{ rotation classes builder -ROTATION_COSINE_FINDER_PREAMBLE_TEMPLATE = Template(r"""//CL:mako// +TRANSLATION_CLASS_FINDER_PREAMBLE_TEMPLATE = Template(r"""//CL:mako// #define LEVEL_TO_RAD(level) \ (root_extent * 1 / (coord_t) (1 << (level + 1))) /* * Return an integer vector indicating the a translation direction - * as a multiple of the box radius. + * as a multiple of the box diameter. */ inline int_coord_vec_t get_translation_vector( coord_t root_extent, @@ -2375,46 +2375,45 @@ ROTATION_COSINE_FINDER_PREAMBLE_TEMPLATE = Template(r"""//CL:mako// coord_vec_t target_center) { int_coord_vec_t result = (int_coord_vec_t) 0; + coord_t diam = 2 * LEVEL_TO_RAD(level); %for i in range(dimensions): - result.s${i} = rint( - (target_center.s${i} - source_center.s${i}) - / LEVEL_TO_RAD(level)); + result.s${i} = rint((target_center.s${i} - source_center.s${i}) / diam); %endfor return result; } /* - * Compute the gcd of two positive integers. - */ - inline int gcd(int a, int b) - { - while (b) - { - int tmp = a; - a = b; - b = tmp % b; - } - return a; - } - - /* - * Normalizes a integer vector by dividing it by its GCD. + * Compute the translation class for the given translation vector. The + * translation class maps a translation vector (a_1, a_2, ..., a_d) into + * a dense range of integers [0, ..., (4*n+3)^d - 1], where + * d is the dimension and n is well_sep_is_n_away. + * + * This relies on the fact that the entries of the vector will + * always be in the range [-2n-1,...,2n+1]. + * + * The formula is: + * + * \== d k-1 + * cls(a ,a ,...,a ) = > (2n+1+a ) (4n+3). + * 1 2 d /__ k=1 k + * */ - inline int_coord_vec_t gcd_normalize(int_coord_vec_t vec) + inline int get_translation_class(int_coord_vec_t vec, int well_sep_is_n_away) { - int vec_gcd = abs(vec.s0); - - %for i in range(1, dimensions): - vec_gcd = gcd(vec_gcd, abs(vec.s${i})); + int result = 0; + int base = 4 * well_sep_is_n_away + 3; + int mult = 1; + %for i in range(dimensions): + result += (2 * well_sep_is_n_away + 1 + vec.s${i}) * mult; + mult *= base; %endfor - - return vec / vec_gcd; + return result; } - """ + str(InlineBinarySearch("right", "box_id_t")), + """ + str(InlineBinarySearch("box_id_t")), strict_undefined=True) -ROTATION_COSINE_FINDER_TEMPLATE = ElementwiseTemplate( +TRANSLATION_CLASS_FINDER_TEMPLATE = ElementwiseTemplate( arguments=r"""//CL:mako// /* input: */ box_id_t *from_sep_siblings_lists, @@ -2425,9 +2424,11 @@ ROTATION_COSINE_FINDER_TEMPLATE = ElementwiseTemplate( int aligned_nboxes, coord_t root_extent, box_level_t *box_levels, + int well_sep_is_n_away, /* output: */ - coord_t *rotation_cosines, + int *translation_classes, + int *translation_class_is_used, """, operation=TRAVERSAL_PREAMBLE_MAKO_DEFS + r"""//CL:mako// @@ -2442,38 +2443,18 @@ ROTATION_COSINE_FINDER_TEMPLATE = ElementwiseTemplate( box_id_t target_box_id = target_or_target_parent_boxes[itarget_box]; /* - * Compute the translation vector. + * Compute the translation vector and translation class. */ ${load_center("source_center", "source_box_id")} ${load_center("target_center", "target_box_id")} - int_coord_vec_t translation_vector = get_translation_vector( + int_coord_vec_t vec = get_translation_vector( root_extent, box_levels[source_box_id], source_center, target_center); - /* - * Normalize the translation vector (by dividing by its GCD). - * - * We need this before computing the cosine of the rotation angle, because - * generally in in floating point arithmetic, if k is a positive scalar and v - * is a vector, we can't assume - * - * kv[-1] / sqrt(|kv|^2) == v[-1] / sqrt(|v|^2). - * - * Normalizing ensures vectors that are positive integer multiples of each - * other get classified into the same equivalence class of rotations. - */ - int_coord_vec_t vec = gcd_normalize(translation_vector); - - coord_t num = vec.s${dimensions-1}; - coord_t denom = 0; - - %for i in range(dimensions): - denom += vec.s${i} * vec.s${i}; - %endfor - - denom = sqrt(denom); + int translation_class = get_translation_class(vec, well_sep_is_n_away); - rotation_cosines[i] = num / denom; + translation_classes[i] = translation_class; + translation_class_is_used[translation_class] = 1; """) @@ -2511,29 +2492,21 @@ class RotationClassesBuilder(object): """Build rotation classes for List 2 translations. """ - def __init__(self, context, well_sep_is_n_away, dimensions, box_id_dtype, - box_level_dtype, coord_dtype): + def __init__(self, context): + self.context = context + + @memoize_method + def get_kernel_info(self, dimensions, well_sep_is_n_away, + box_id_dtype, box_level_dtype, coord_dtype): coord_vec_dtype = cl.cltypes.vec_types[coord_dtype, dimensions] int_coord_vec_dtype = cl.cltypes.vec_types[np.dtype(np.int32), dimensions] - # equiv_int_dtype_for_coord_dtype is an unsigned equivalent width dtype - # for coord_dtype. We use this in ListRenumberer, which only supports - # unsigned types. (That means the rotation cosines are sorted according - # to the interpretation of the bits as unsigned integers, but the order - # doesn't matter.) - if coord_dtype.itemsize == 4: - self.equiv_int_dtype_for_coord_dtype = np.uint32 - elif coord_dtype.itemsize == 8: - self.equiv_int_dtype_for_coord_dtype = np.uint64 - else: - raise ValueError("unexpected coord_dtype") - - preamble = ROTATION_COSINE_FINDER_PREAMBLE_TEMPLATE.render( + preamble = TRANSLATION_CLASS_FINDER_PREAMBLE_TEMPLATE.render( dimensions=dimensions) - self.rotation_cosine_finder = ( - ROTATION_COSINE_FINDER_TEMPLATE.build( - context, + translation_class_finder = ( + TRANSLATION_CLASS_FINDER_TEMPLATE.build( + self.context, type_aliases=( ("int_coord_vec_t", int_coord_vec_dtype), ("coord_vec_t", coord_vec_dtype), @@ -2546,31 +2519,111 @@ class RotationClassesBuilder(object): ), more_preamble=preamble)) - from boxtree.tools import ListRenumberer - - self.list_renumberer = ListRenumberer( - context, - from_element_dtype=self.equiv_int_dtype_for_coord_dtype, - to_element_dtype=np.int32) - - self.well_sep_is_n_away = well_sep_is_n_away - self.dimensions = dimensions - self.coord_dtype = coord_dtype + return _KernelInfo(translation_class_finder=translation_class_finder) + + @staticmethod + def vec_gcd(vec): + """Return the GCD of a list of integers.""" + def gcd(a, b): + while b: + a, b = b, a % b + return a + + result = abs(vec[0]) + for elem in vec[1:]: + result = gcd(result, abs(elem)) + return result + + def compute_rotation_classes(self, + well_sep_is_n_away, dimensions, used_translation_classes): + """Convert translation classes to a list of rotation classes and angles.""" + angle_to_rot_class = {} + angles = [] + + ntranslation_classes = ( + self.ntranslation_classes(well_sep_is_n_away, dimensions)) + + translation_class_to_rot_class = ( + np.empty(ntranslation_classes, dtype=np.int32)) + + translation_class_to_rot_class[:] = -1 + + for cls in used_translation_classes: + vec = self.translation_class_to_vector( + well_sep_is_n_away, dimensions, cls) + + # Normalize the translation vector (by dividing by its GCD). + # + # We need this before computing the cosine of the rotation angle, + # because generally in in floating point arithmetic, if k is a + # positive scalar and v is a vector, we can't assume + # + # kv[-1] / sqrt(|kv|^2) == v[-1] / sqrt(|v|^2). + # + # Normalizing ensures vectors that are positive integer multiples of + # each other get classified into the same equivalence class of + # rotations. + vec //= self.vec_gcd(vec) + + # Compute the rotation angle for the vector. + norm = np.linalg.norm(vec) + assert norm != 0 + angle = np.arccos(vec[-1] / norm) + + # Find the rotation class. + if angle in angle_to_rot_class: + rot_class = angle_to_rot_class[angle] + else: + rot_class = len(angles) + angle_to_rot_class[angle] = rot_class + angles.append(angle) + + translation_class_to_rot_class[cls] = rot_class + + return translation_class_to_rot_class, angles + + @staticmethod + def ntranslation_classes(well_sep_is_n_away, dimensions): + return (4 * well_sep_is_n_away + 3) ** dimensions + + @staticmethod + def translation_class_to_vector(well_sep_is_n_away, dimensions, cls): + # This computes the vector for the translation class, using the inverse + # of the formula found in get_translation_class() defined in + # TRANSLATION_CLASS_FINDER_PREAMBLE_TEMPLATE. + result = np.zeros(dimensions, dtype=np.int32) + shift = 2 * well_sep_is_n_away + 1 + base = 4 * well_sep_is_n_away + 3 + for i in range(dimensions): + result[i] = cls % base - shift + cls //= base + return result @log_process(logger, "build rotation classes") def __call__(self, queue, trav, tree, wait_for=None): """Returns a pair *info*, *evt* where info is a :class:`RotationClassesInfo`. """ - rotation_cosines = cl.array.empty( - queue, len(trav.from_sep_siblings_lists), dtype=self.coord_dtype) - - # This computes the cosine of the rotation angles using the formula - # - # cos(theta) = a . b / (|a| * |b|). - # - # acos() is not supported universally in double precision on GPUs, so we - # don't invert the cosines to get the rotation angles on the device. - evt = self.rotation_cosine_finder( + + # {{{ compute translation classes for list 2 + + well_sep_is_n_away = trav.well_sep_is_n_away + dimensions = tree.dimensions + coord_dtype = tree.coord_dtype + + knl_info = self.get_kernel_info( + dimensions, well_sep_is_n_away, tree.box_id_dtype, + tree.box_level_dtype, coord_dtype) + + ntranslation_classes = ( + self.ntranslation_classes(well_sep_is_n_away, dimensions)) + + translation_classes_lists = cl.array.empty( + queue, len(trav.from_sep_siblings_lists), dtype=np.int32) + + translation_class_is_used = cl.array.zeros( + queue, ntranslation_classes, dtype=np.int32) + + evt = knl_info.translation_class_finder( trav.from_sep_siblings_lists, trav.from_sep_siblings_starts, trav.target_or_target_parent_boxes, @@ -2579,31 +2632,41 @@ class RotationClassesBuilder(object): tree.aligned_nboxes, tree.root_extent, tree.box_levels, - rotation_cosines, + well_sep_is_n_away, + translation_classes_lists, + translation_class_is_used, queue=queue, wait_for=wait_for) - rotation_classes, rotation_class_to_cosine, evt = ( - self.list_renumberer( - rotation_cosines.view(self.equiv_int_dtype_for_coord_dtype), - queue=queue, - wait_for=[evt])) + # }}} - # Compute the rotation angles. - rotation_class_to_cosine = ( - rotation_class_to_cosine.view(self.coord_dtype).get()) - rotation_class_to_angle = np.arccos(rotation_class_to_cosine) - rotation_class_to_angle = cl.array.to_device(queue, rotation_class_to_angle) + # {{{ convert translation classes to rotation classes + + used_translation_classes = ( + np.flatnonzero(translation_class_is_used.get())) + + translation_class_to_rotation_class, rotation_angles = ( + self.compute_rotation_classes( + well_sep_is_n_away, dimensions, used_translation_classes)) # There should be no more than 2^(d-1) * (2n+1)^d distinct rotation - # classes, since that is an upper bound on the number of distinct list 2 - # boxes. - d = self.dimensions - n = self.well_sep_is_n_away - assert len(rotation_class_to_angle) <= 2**(d-1) * (2*n+1)**d + # classes, since that is an upper bound on the number of distinct + # positions for list 2 boxes. + d = dimensions + n = well_sep_is_n_away + assert len(rotation_angles) <= 2**(d-1) * (2*n+1)**d + + rotation_classes_lists = ( + cl.array.take( + cl.array.to_device(queue, translation_class_to_rotation_class), + translation_classes_lists)) + + rotation_angles = cl.array.to_device(queue, np.array(rotation_angles)) + + # }}} return RotationClassesInfo( - from_sep_siblings_rotation_classes=rotation_classes, - from_sep_siblings_rotation_class_to_angle=rotation_class_to_angle, + from_sep_siblings_rotation_classes=rotation_classes_lists, + from_sep_siblings_rotation_class_to_angle=rotation_angles, ).with_queue(None), evt # }}} diff --git a/test/test_tools.py b/test/test_tools.py index 6e9827d..2da427e 100644 --- a/test/test_tools.py +++ b/test/test_tools.py @@ -63,23 +63,6 @@ def test_device_record(): assert np.array_equal(record_host.obj_array[i], record.obj_array[i]) -def test_list_renumberer(ctx_factory): - ctx = ctx_factory() - queue = cl.CommandQueue(ctx) - - from boxtree.tools import ListRenumberer - - arr = cl.array.to_device( - queue, - np.array([9., 4., 1., 4., 5., 6., 9.], dtype=np.float64)) - - lr = ListRenumberer(ctx, np.uint64, np.int) - - renumbered_arr, new_to_old, _ = lr(arr.view(np.uint64)) - assert (renumbered_arr.get() == np.array([4, 1, 0, 1, 2, 3, 4])).all() - assert (new_to_old.view(np.float).get() == np.array([1., 4., 5., 6., 9.])).all() - - @pytest.mark.parametrize("array_factory", (p_normal, p_surface, p_uniform)) @pytest.mark.parametrize("dim", (2, 3)) @pytest.mark.parametrize("dtype", (np.float32, np.float64)) diff --git a/test/test_traversal.py b/test/test_traversal.py index 54e97d6..b0855aa 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -382,9 +382,7 @@ def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away): tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away) trav, _ = tg(queue, tree) - rb = RotationClassesBuilder(ctx, well_sep_is_n_away, tree.dimensions, - tree.box_id_dtype, tree.box_level_dtype, tree.coord_dtype) - + rb = RotationClassesBuilder(ctx) result, _ = rb(queue, trav, tree) rot_classes = result.from_sep_siblings_rotation_classes.get(queue) -- GitLab From ede5bda66d9b77809457e0a4f89a635ae35659d3 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 1 Jun 2019 18:56:39 -0500 Subject: [PATCH 36/57] Change description --- boxtree/traversal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index ccde8b1..1525f23 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -2599,7 +2599,7 @@ class RotationClassesBuilder(object): cls //= base return result - @log_process(logger, "build rotation classes") + @log_process(logger, "build m2l rotation classes") def __call__(self, queue, trav, tree, wait_for=None): """Returns a pair *info*, *evt* where info is a :class:`RotationClassesInfo`. """ -- GitLab From ac464014032eea5748ecea75406e43bc1fa46540 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 1 Jun 2019 19:46:13 -0500 Subject: [PATCH 37/57] Remove debugging print statement --- boxtree/pyfmmlib_integration.py | 1 - 1 file changed, 1 deletion(-) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index 6f09aeb..f1a736e 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -712,7 +712,6 @@ class FMMLibExpansionWrangler(object): # {{{ set up optimized m2l, if applicable if self.level_nterms[lev] <= rotmat_order: - print("using optimized with order", rotmat_order) m2l_rotation_lists = self.geo_data.m2l_rotation_lists() assert len(m2l_rotation_lists) == len(lists) -- GitLab From e497c74a1326698cc6e329686a6c91f6c9747b67 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 1 Jun 2019 23:29:39 -0500 Subject: [PATCH 38/57] Prettify ASCII art --- boxtree/traversal.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 1525f23..12430f4 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from __future__ import division __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -2393,7 +2394,7 @@ TRANSLATION_CLASS_FINDER_PREAMBLE_TEMPLATE = Template(r"""//CL:mako// * * The formula is: * - * \== d k-1 + * \‾‾ d k-1 * cls(a ,a ,...,a ) = > (2n+1+a ) (4n+3). * 1 2 d /__ k=1 k * -- GitLab From a29bb8287194762fa6e5db7436a510efc5d84d09 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 1 Jun 2019 23:41:23 -0500 Subject: [PATCH 39/57] Don't use UTF-8 --- boxtree/traversal.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 12430f4..743a6c5 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from __future__ import division __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -2394,9 +2393,9 @@ TRANSLATION_CLASS_FINDER_PREAMBLE_TEMPLATE = Template(r"""//CL:mako// * * The formula is: * - * \‾‾ d k-1 - * cls(a ,a ,...,a ) = > (2n+1+a ) (4n+3). - * 1 2 d /__ k=1 k + * \~~ d k-1 + * cls(a ,a ,...,a ) = > (2n+1+a ) (4n+3) + * 1 2 d /__ k=1 k * */ inline int get_translation_class(int_coord_vec_t vec, int well_sep_is_n_away) -- GitLab From a94557fceee0fbd259b30c2035ace85d06eb3a58 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 3 Jun 2019 10:25:08 -0500 Subject: [PATCH 40/57] Translation class computation: be more defensive, avoid potential out of bounds writes and overflow --- boxtree/traversal.py | 81 ++++++++++++++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 26 deletions(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 743a6c5..c0e9567 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -2364,10 +2364,8 @@ TRANSLATION_CLASS_FINDER_PREAMBLE_TEMPLATE = Template(r"""//CL:mako// #define LEVEL_TO_RAD(level) \ (root_extent * 1 / (coord_t) (1 << (level + 1))) - /* - * Return an integer vector indicating the a translation direction - * as a multiple of the box diameter. - */ + // Return an integer vector indicating the a translation direction + // as a multiple of the box diameter. inline int_coord_vec_t get_translation_vector( coord_t root_extent, int level, @@ -2382,24 +2380,31 @@ TRANSLATION_CLASS_FINDER_PREAMBLE_TEMPLATE = Template(r"""//CL:mako// return result; } - /* - * Compute the translation class for the given translation vector. The - * translation class maps a translation vector (a_1, a_2, ..., a_d) into - * a dense range of integers [0, ..., (4*n+3)^d - 1], where - * d is the dimension and n is well_sep_is_n_away. - * - * This relies on the fact that the entries of the vector will - * always be in the range [-2n-1,...,2n+1]. - * - * The formula is: - * - * \~~ d k-1 - * cls(a ,a ,...,a ) = > (2n+1+a ) (4n+3) - * 1 2 d /__ k=1 k - * - */ + // Compute the translation class for the given translation vector. The + // translation class maps a translation vector (a_1, a_2, ..., a_d) into + // a dense range of integers [0, ..., (4*n+3)^d - 1], where + // d is the dimension and n is well_sep_is_n_away. + // + // This relies on the fact that the entries of the vector will + // always be in the range [-2n-1,...,2n+1]. + // + // The mapping from vector to class is: + // + // \~~ d k-1 + // cls(a ,a ,...,a ) = > (2n+1+a ) (4n+3) + // 1 2 d /__ k=1 k + // + // Returns -1 on error. inline int get_translation_class(int_coord_vec_t vec, int well_sep_is_n_away) { + int dim_bound = 2 * well_sep_is_n_away + 1; + %for i in range(dimensions): + if (!(-dim_bound <= vec.s${i} && vec.s${i} <= dim_bound)) + { + return -1; + } + %endfor + int result = 0; int base = 4 * well_sep_is_n_away + 3; int mult = 1; @@ -2429,12 +2434,11 @@ TRANSLATION_CLASS_FINDER_TEMPLATE = ElementwiseTemplate( /* output: */ int *translation_classes, int *translation_class_is_used, + int *error_flag, """, operation=TRAVERSAL_PREAMBLE_MAKO_DEFS + r"""//CL:mako// - /* - * Find the target box for this source box. - */ + // Find the target box for this source box. box_id_t source_box_id = from_sep_siblings_lists[i]; size_t itarget_box = bsearch( @@ -2442,9 +2446,14 @@ TRANSLATION_CLASS_FINDER_TEMPLATE = ElementwiseTemplate( box_id_t target_box_id = target_or_target_parent_boxes[itarget_box]; - /* - * Compute the translation vector and translation class. - */ + // Ensure levels are the same. + if (box_levels[source_box_id] != box_levels[target_box_id]) + { + *error_flag = 1; + PYOPENCL_ELWISE_CONTINUE; + } + + // Compute the translation vector and translation class. ${load_center("source_center", "source_box_id")} ${load_center("target_center", "target_box_id")} @@ -2453,6 +2462,13 @@ TRANSLATION_CLASS_FINDER_TEMPLATE = ElementwiseTemplate( int translation_class = get_translation_class(vec, well_sep_is_n_away); + // Ensure valid translation class. + if (translation_class == -1) + { + *error_flag = 1; + PYOPENCL_ELWISE_CONTINUE; + } + translation_classes[i] = translation_class; translation_class_is_used[translation_class] = 1; """) @@ -2501,6 +2517,13 @@ class RotationClassesBuilder(object): coord_vec_dtype = cl.cltypes.vec_types[coord_dtype, dimensions] int_coord_vec_dtype = cl.cltypes.vec_types[np.dtype(np.int32), dimensions] + # Make sure translation classes can fit inside a 32 bit integer. + if (not + ( + self.ntranslation_classes(well_sep_is_n_away, dimensions) + <= np.iinfo(np.int32).max)): + raise ValueError("would overflow") + preamble = TRANSLATION_CLASS_FINDER_PREAMBLE_TEMPLATE.render( dimensions=dimensions) @@ -2623,6 +2646,8 @@ class RotationClassesBuilder(object): translation_class_is_used = cl.array.zeros( queue, ntranslation_classes, dtype=np.int32) + error_flag = cl.array.zeros(queue, 1, dtype=np.int32) + evt = knl_info.translation_class_finder( trav.from_sep_siblings_lists, trav.from_sep_siblings_starts, @@ -2635,8 +2660,12 @@ class RotationClassesBuilder(object): well_sep_is_n_away, translation_classes_lists, translation_class_is_used, + error_flag, queue=queue, wait_for=wait_for) + if (error_flag.get()): + raise ValueError("could not compute translation classes") + # }}} # {{{ convert translation classes to rotation classes -- GitLab From 8f0b72855be15969d30b9e8de83759a961beafe7 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 3 Jun 2019 10:33:58 -0500 Subject: [PATCH 41/57] Off-by-one error --- boxtree/traversal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index c0e9567..1c5ee27 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -2521,7 +2521,7 @@ class RotationClassesBuilder(object): if (not ( self.ntranslation_classes(well_sep_is_n_away, dimensions) - <= np.iinfo(np.int32).max)): + <= 1 + np.iinfo(np.int32).max)): raise ValueError("would overflow") preamble = TRANSLATION_CLASS_FINDER_PREAMBLE_TEMPLATE.render( -- GitLab From 436058672458bb2a14acce60ae2d29b9b65517a0 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 8 Jun 2019 15:57:19 -0500 Subject: [PATCH 42/57] Version bump --- boxtree/version.py | 2 +- doc/misc.rst | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/boxtree/version.py b/boxtree/version.py index f5c7ef5..78a7f0b 100644 --- a/boxtree/version.py +++ b/boxtree/version.py @@ -1,2 +1,2 @@ -VERSION = (2018, 2) +VERSION = (2019, 1) VERSION_TEXT = ".".join(str(i) for i in VERSION) diff --git a/doc/misc.rst b/doc/misc.rst index a4c008f..717d0c1 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -24,13 +24,21 @@ for instructions. User-visible Changes ==================== -Version 2018.2 +Version 2019.1 -------------- + .. note:: This version is currently under development. You can get snapshots from boxtree's `git repository `_ +* Faster M2Ls in the FMMLIB backend using precomputed rotation matrices. This + change adds a *geo_data* parameter to the FMMLIB geometry wrangler + constructor. + +Version 2018.2 +-------------- + * Changed index style of the *from_sep_close_bigger_starts* interaction list. Version 2018.1 -- GitLab From c794103905a15a0bac62da4931306eca015a19ac Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 12 Jun 2019 23:46:42 -0500 Subject: [PATCH 43/57] Rename geo_data to _geo_data to avoid accidentally setting pytential subclass's geo_data to None --- boxtree/pyfmmlib_integration.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index f1a736e..ea03be9 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -131,7 +131,7 @@ class FMMLibExpansionWrangler(object): return nterms self.tree = tree - self.geo_data = geo_data + self._geo_data = geo_data self.rotmat_cutoff_bytes = optimized_m2l_precomputation_memory_cutoff_bytes if helmholtz_k == 0: @@ -650,7 +650,7 @@ class FMMLibExpansionWrangler(object): if not self.supports_optimized_m2l: return (rotmatf, rotmatb, rotmat_order) - m2l_rotation_angles = self.geo_data.m2l_rotation_angles() + m2l_rotation_angles = self._geo_data.m2l_rotation_angles() def mem_estimate(order): # Rotation matrix memory cost estimate. @@ -712,7 +712,7 @@ class FMMLibExpansionWrangler(object): # {{{ set up optimized m2l, if applicable if self.level_nterms[lev] <= rotmat_order: - m2l_rotation_lists = self.geo_data.m2l_rotation_lists() + m2l_rotation_lists = self._geo_data.m2l_rotation_lists() assert len(m2l_rotation_lists) == len(lists) mploc = self.get_translation_routine( -- GitLab From ad53d10a7ecf98f2473b565e59dc309e878b9e11 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 12 Jun 2019 23:51:01 -0500 Subject: [PATCH 44/57] Revert "Rename geo_data to _geo_data to avoid accidentally setting pytential" This reverts commit c794103905a15a0bac62da4931306eca015a19ac. --- boxtree/pyfmmlib_integration.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index ea03be9..f1a736e 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -131,7 +131,7 @@ class FMMLibExpansionWrangler(object): return nterms self.tree = tree - self._geo_data = geo_data + self.geo_data = geo_data self.rotmat_cutoff_bytes = optimized_m2l_precomputation_memory_cutoff_bytes if helmholtz_k == 0: @@ -650,7 +650,7 @@ class FMMLibExpansionWrangler(object): if not self.supports_optimized_m2l: return (rotmatf, rotmatb, rotmat_order) - m2l_rotation_angles = self._geo_data.m2l_rotation_angles() + m2l_rotation_angles = self.geo_data.m2l_rotation_angles() def mem_estimate(order): # Rotation matrix memory cost estimate. @@ -712,7 +712,7 @@ class FMMLibExpansionWrangler(object): # {{{ set up optimized m2l, if applicable if self.level_nterms[lev] <= rotmat_order: - m2l_rotation_lists = self._geo_data.m2l_rotation_lists() + m2l_rotation_lists = self.geo_data.m2l_rotation_lists() assert len(m2l_rotation_lists) == len(lists) mploc = self.get_translation_routine( -- GitLab From c5e7bf2b321457b4021366e79bf7cded5aad3d1f Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 13 Jun 2019 06:52:37 +0200 Subject: [PATCH 45/57] Apply suggestion to boxtree/pyfmmlib_integration.py --- boxtree/pyfmmlib_integration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index f1a736e..9a144aa 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -50,7 +50,7 @@ class FMMLibGeometryDataInterface(object): """ def m2l_rotation_lists(self): - """Return a NumPy array mapping entries of List 2 to rotation classes. + """Return a :mod:`numpy` array mapping entries of List 2 to rotation classes. """ raise NotImplementedError -- GitLab From 7dec6f2afe97c5cbcde683260fc211b3fc542896 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 13 Jun 2019 06:52:41 +0200 Subject: [PATCH 46/57] Apply suggestion to boxtree/pyfmmlib_integration.py --- boxtree/pyfmmlib_integration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index 9a144aa..ae4cb62 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -55,7 +55,7 @@ class FMMLibGeometryDataInterface(object): raise NotImplementedError def m2l_rotation_angles(self): - """Return a NumPy array mapping List 2 rotation classes to rotation angles. + """Return a :mod:`numpy` array mapping List 2 rotation classes to rotation angles. """ raise NotImplementedError -- GitLab From 91502b29209b7770077070ef1d872e9e83753e13 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 12 Jun 2019 23:53:08 -0500 Subject: [PATCH 47/57] Change comma to period --- boxtree/traversal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 1c5ee27..6bbc7a5 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -2476,7 +2476,7 @@ TRANSLATION_CLASS_FINDER_TEMPLATE = ElementwiseTemplate( class RotationClassesInfo(DeviceDataRecord): r"""Interaction lists to help with matrix precomputations for rotation-based - translations ("point and shoot"), + translations ("point and shoot"). .. attribute:: nfrom_sep_siblings_rotation_classes -- GitLab From af6817d9e62ff8f8f1b91442208fe7114af46ed8 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 13 Jun 2019 00:01:47 -0500 Subject: [PATCH 48/57] Move rotation classes code to separate file --- boxtree/pyfmmlib_integration.py | 2 +- boxtree/rotation_classes.py | 387 ++++++++++++++++++++++++++++++++ boxtree/traversal.py | 343 ---------------------------- test/test_traversal.py | 3 +- 4 files changed, 390 insertions(+), 345 deletions(-) create mode 100644 boxtree/rotation_classes.py diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index ae4cb62..94cfe45 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -71,7 +71,7 @@ class FMMLibGeometryData(FMMLibGeometryDataInterface): @property @memoize_method def rotation_classes_builder(self): - from boxtree.traversal import RotationClassesBuilder + from boxtree.rotation_classes import RotationClassesBuilder return RotationClassesBuilder(self.queue.context) @memoize_method diff --git a/boxtree/rotation_classes.py b/boxtree/rotation_classes.py new file mode 100644 index 0000000..7444abd --- /dev/null +++ b/boxtree/rotation_classes.py @@ -0,0 +1,387 @@ +from __future__ import division + +__copyright__ = "Copyright (C) 2019 Matt Wala" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import numpy as np +from pytools import Record, memoize_method +import pyopencl as cl +import pyopencl.array # noqa +import pyopencl.cltypes # noqa +from pyopencl.elementwise import ElementwiseTemplate +from mako.template import Template +from boxtree.tools import DeviceDataRecord, InlineBinarySearch +from boxtree.traversal import TRAVERSAL_PREAMBLE_MAKO_DEFS + +import logging +logger = logging.getLogger(__name__) + +from pytools import log_process + + +# {{{ rotation classes builder + +TRANSLATION_CLASS_FINDER_PREAMBLE_TEMPLATE = Template(r"""//CL:mako// + #define LEVEL_TO_RAD(level) \ + (root_extent * 1 / (coord_t) (1 << (level + 1))) + + // Return an integer vector indicating the a translation direction + // as a multiple of the box diameter. + inline int_coord_vec_t get_translation_vector( + coord_t root_extent, + int level, + coord_vec_t source_center, + coord_vec_t target_center) + { + int_coord_vec_t result = (int_coord_vec_t) 0; + coord_t diam = 2 * LEVEL_TO_RAD(level); + %for i in range(dimensions): + result.s${i} = rint((target_center.s${i} - source_center.s${i}) / diam); + %endfor + return result; + } + + // Compute the translation class for the given translation vector. The + // translation class maps a translation vector (a_1, a_2, ..., a_d) into + // a dense range of integers [0, ..., (4*n+3)^d - 1], where + // d is the dimension and n is well_sep_is_n_away. + // + // This relies on the fact that the entries of the vector will + // always be in the range [-2n-1,...,2n+1]. + // + // The mapping from vector to class is: + // + // \~~ d k-1 + // cls(a ,a ,...,a ) = > (2n+1+a ) (4n+3) + // 1 2 d /__ k=1 k + // + // Returns -1 on error. + inline int get_translation_class(int_coord_vec_t vec, int well_sep_is_n_away) + { + int dim_bound = 2 * well_sep_is_n_away + 1; + %for i in range(dimensions): + if (!(-dim_bound <= vec.s${i} && vec.s${i} <= dim_bound)) + { + return -1; + } + %endfor + + int result = 0; + int base = 4 * well_sep_is_n_away + 3; + int mult = 1; + %for i in range(dimensions): + result += (2 * well_sep_is_n_away + 1 + vec.s${i}) * mult; + mult *= base; + %endfor + return result; + } + """ + str(InlineBinarySearch("box_id_t")), + strict_undefined=True) + + +TRANSLATION_CLASS_FINDER_TEMPLATE = ElementwiseTemplate( + arguments=r"""//CL:mako// + /* input: */ + box_id_t *from_sep_siblings_lists, + box_id_t *from_sep_siblings_starts, + box_id_t *target_or_target_parent_boxes, + int ntarget_or_target_parent_boxes, + coord_t *box_centers, + int aligned_nboxes, + coord_t root_extent, + box_level_t *box_levels, + int well_sep_is_n_away, + + /* output: */ + int *translation_classes, + int *translation_class_is_used, + int *error_flag, + """, + + operation=TRAVERSAL_PREAMBLE_MAKO_DEFS + r"""//CL:mako// + // Find the target box for this source box. + box_id_t source_box_id = from_sep_siblings_lists[i]; + + size_t itarget_box = bsearch( + from_sep_siblings_starts, 1 + ntarget_or_target_parent_boxes, i); + + box_id_t target_box_id = target_or_target_parent_boxes[itarget_box]; + + // Ensure levels are the same. + if (box_levels[source_box_id] != box_levels[target_box_id]) + { + *error_flag = 1; + PYOPENCL_ELWISE_CONTINUE; + } + + // Compute the translation vector and translation class. + ${load_center("source_center", "source_box_id")} + ${load_center("target_center", "target_box_id")} + + int_coord_vec_t vec = get_translation_vector( + root_extent, box_levels[source_box_id], source_center, target_center); + + int translation_class = get_translation_class(vec, well_sep_is_n_away); + + // Ensure valid translation class. + if (translation_class == -1) + { + *error_flag = 1; + PYOPENCL_ELWISE_CONTINUE; + } + + translation_classes[i] = translation_class; + translation_class_is_used[translation_class] = 1; + """) + + +class _KernelInfo(Record): + pass + + +class RotationClassesInfo(DeviceDataRecord): + r"""Interaction lists to help with matrix precomputations for rotation-based + translations ("point and shoot"). + + .. attribute:: nfrom_sep_siblings_rotation_classes + + The number of distinct rotation classes. + + .. attribute:: from_sep_siblings_rotation_classes + + ``int32 [*]`` + + A list, corresponding to *from_sep_siblings_lists* of *trav*, of + the rotation class of each box pair. + + .. attribute:: from_sep_siblings_rotation_class_to_angle + + ``coord_t [nfrom_sep_siblings_rotation_classes]`` + + Maps rotation classes in *from_sep_siblings_rotation_classes* to + rotation angles. This represents the angle between box translation + pairs and the *z*-axis. + + """ + + @property + def nfrom_sep_siblings_rotation_classes(self): + return len(self.from_sep_siblings_rotation_class_to_angle) + + +class RotationClassesBuilder(object): + """Build rotation classes for List 2 translations. + """ + + def __init__(self, context): + self.context = context + + @memoize_method + def get_kernel_info(self, dimensions, well_sep_is_n_away, + box_id_dtype, box_level_dtype, coord_dtype): + coord_vec_dtype = cl.cltypes.vec_types[coord_dtype, dimensions] + int_coord_vec_dtype = cl.cltypes.vec_types[np.dtype(np.int32), dimensions] + + # Make sure translation classes can fit inside a 32 bit integer. + if (not + ( + self.ntranslation_classes(well_sep_is_n_away, dimensions) + <= 1 + np.iinfo(np.int32).max)): + raise ValueError("would overflow") + + preamble = TRANSLATION_CLASS_FINDER_PREAMBLE_TEMPLATE.render( + dimensions=dimensions) + + translation_class_finder = ( + TRANSLATION_CLASS_FINDER_TEMPLATE.build( + self.context, + type_aliases=( + ("int_coord_vec_t", int_coord_vec_dtype), + ("coord_vec_t", coord_vec_dtype), + ("coord_t", coord_dtype), + ("box_id_t", box_id_dtype), + ("box_level_t", box_level_dtype), + ), + var_values=( + ("dimensions", dimensions), + ), + more_preamble=preamble)) + + return _KernelInfo(translation_class_finder=translation_class_finder) + + @staticmethod + def vec_gcd(vec): + """Return the GCD of a list of integers.""" + def gcd(a, b): + while b: + a, b = b, a % b + return a + + result = abs(vec[0]) + for elem in vec[1:]: + result = gcd(result, abs(elem)) + return result + + def compute_rotation_classes(self, + well_sep_is_n_away, dimensions, used_translation_classes): + """Convert translation classes to a list of rotation classes and angles.""" + angle_to_rot_class = {} + angles = [] + + ntranslation_classes = ( + self.ntranslation_classes(well_sep_is_n_away, dimensions)) + + translation_class_to_rot_class = ( + np.empty(ntranslation_classes, dtype=np.int32)) + + translation_class_to_rot_class[:] = -1 + + for cls in used_translation_classes: + vec = self.translation_class_to_vector( + well_sep_is_n_away, dimensions, cls) + + # Normalize the translation vector (by dividing by its GCD). + # + # We need this before computing the cosine of the rotation angle, + # because generally in in floating point arithmetic, if k is a + # positive scalar and v is a vector, we can't assume + # + # kv[-1] / sqrt(|kv|^2) == v[-1] / sqrt(|v|^2). + # + # Normalizing ensures vectors that are positive integer multiples of + # each other get classified into the same equivalence class of + # rotations. + vec //= self.vec_gcd(vec) + + # Compute the rotation angle for the vector. + norm = np.linalg.norm(vec) + assert norm != 0 + angle = np.arccos(vec[-1] / norm) + + # Find the rotation class. + if angle in angle_to_rot_class: + rot_class = angle_to_rot_class[angle] + else: + rot_class = len(angles) + angle_to_rot_class[angle] = rot_class + angles.append(angle) + + translation_class_to_rot_class[cls] = rot_class + + return translation_class_to_rot_class, angles + + @staticmethod + def ntranslation_classes(well_sep_is_n_away, dimensions): + return (4 * well_sep_is_n_away + 3) ** dimensions + + @staticmethod + def translation_class_to_vector(well_sep_is_n_away, dimensions, cls): + # This computes the vector for the translation class, using the inverse + # of the formula found in get_translation_class() defined in + # TRANSLATION_CLASS_FINDER_PREAMBLE_TEMPLATE. + result = np.zeros(dimensions, dtype=np.int32) + shift = 2 * well_sep_is_n_away + 1 + base = 4 * well_sep_is_n_away + 3 + for i in range(dimensions): + result[i] = cls % base - shift + cls //= base + return result + + @log_process(logger, "build m2l rotation classes") + def __call__(self, queue, trav, tree, wait_for=None): + """Returns a pair *info*, *evt* where info is a :class:`RotationClassesInfo`. + """ + + # {{{ compute translation classes for list 2 + + well_sep_is_n_away = trav.well_sep_is_n_away + dimensions = tree.dimensions + coord_dtype = tree.coord_dtype + + knl_info = self.get_kernel_info( + dimensions, well_sep_is_n_away, tree.box_id_dtype, + tree.box_level_dtype, coord_dtype) + + ntranslation_classes = ( + self.ntranslation_classes(well_sep_is_n_away, dimensions)) + + translation_classes_lists = cl.array.empty( + queue, len(trav.from_sep_siblings_lists), dtype=np.int32) + + translation_class_is_used = cl.array.zeros( + queue, ntranslation_classes, dtype=np.int32) + + error_flag = cl.array.zeros(queue, 1, dtype=np.int32) + + evt = knl_info.translation_class_finder( + trav.from_sep_siblings_lists, + trav.from_sep_siblings_starts, + trav.target_or_target_parent_boxes, + trav.ntarget_or_target_parent_boxes, + tree.box_centers, + tree.aligned_nboxes, + tree.root_extent, + tree.box_levels, + well_sep_is_n_away, + translation_classes_lists, + translation_class_is_used, + error_flag, + queue=queue, wait_for=wait_for) + + if (error_flag.get()): + raise ValueError("could not compute translation classes") + + # }}} + + # {{{ convert translation classes to rotation classes + + used_translation_classes = ( + np.flatnonzero(translation_class_is_used.get())) + + translation_class_to_rotation_class, rotation_angles = ( + self.compute_rotation_classes( + well_sep_is_n_away, dimensions, used_translation_classes)) + + # There should be no more than 2^(d-1) * (2n+1)^d distinct rotation + # classes, since that is an upper bound on the number of distinct + # positions for list 2 boxes. + d = dimensions + n = well_sep_is_n_away + assert len(rotation_angles) <= 2**(d-1) * (2*n+1)**d + + rotation_classes_lists = ( + cl.array.take( + cl.array.to_device(queue, translation_class_to_rotation_class), + translation_classes_lists)) + + rotation_angles = cl.array.to_device(queue, np.array(rotation_angles)) + + # }}} + + return RotationClassesInfo( + from_sep_siblings_rotation_classes=rotation_classes_lists, + from_sep_siblings_rotation_class_to_angle=rotation_angles, + ).with_queue(None), evt + +# }}} + +# vim: filetype=pyopencl:fdm=marker diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 6bbc7a5..6f59b03 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -2357,347 +2357,4 @@ class FMMTraversalBuilder: # }}} - -# {{{ rotation classes builder - -TRANSLATION_CLASS_FINDER_PREAMBLE_TEMPLATE = Template(r"""//CL:mako// - #define LEVEL_TO_RAD(level) \ - (root_extent * 1 / (coord_t) (1 << (level + 1))) - - // Return an integer vector indicating the a translation direction - // as a multiple of the box diameter. - inline int_coord_vec_t get_translation_vector( - coord_t root_extent, - int level, - coord_vec_t source_center, - coord_vec_t target_center) - { - int_coord_vec_t result = (int_coord_vec_t) 0; - coord_t diam = 2 * LEVEL_TO_RAD(level); - %for i in range(dimensions): - result.s${i} = rint((target_center.s${i} - source_center.s${i}) / diam); - %endfor - return result; - } - - // Compute the translation class for the given translation vector. The - // translation class maps a translation vector (a_1, a_2, ..., a_d) into - // a dense range of integers [0, ..., (4*n+3)^d - 1], where - // d is the dimension and n is well_sep_is_n_away. - // - // This relies on the fact that the entries of the vector will - // always be in the range [-2n-1,...,2n+1]. - // - // The mapping from vector to class is: - // - // \~~ d k-1 - // cls(a ,a ,...,a ) = > (2n+1+a ) (4n+3) - // 1 2 d /__ k=1 k - // - // Returns -1 on error. - inline int get_translation_class(int_coord_vec_t vec, int well_sep_is_n_away) - { - int dim_bound = 2 * well_sep_is_n_away + 1; - %for i in range(dimensions): - if (!(-dim_bound <= vec.s${i} && vec.s${i} <= dim_bound)) - { - return -1; - } - %endfor - - int result = 0; - int base = 4 * well_sep_is_n_away + 3; - int mult = 1; - %for i in range(dimensions): - result += (2 * well_sep_is_n_away + 1 + vec.s${i}) * mult; - mult *= base; - %endfor - return result; - } - """ + str(InlineBinarySearch("box_id_t")), - strict_undefined=True) - - -TRANSLATION_CLASS_FINDER_TEMPLATE = ElementwiseTemplate( - arguments=r"""//CL:mako// - /* input: */ - box_id_t *from_sep_siblings_lists, - box_id_t *from_sep_siblings_starts, - box_id_t *target_or_target_parent_boxes, - int ntarget_or_target_parent_boxes, - coord_t *box_centers, - int aligned_nboxes, - coord_t root_extent, - box_level_t *box_levels, - int well_sep_is_n_away, - - /* output: */ - int *translation_classes, - int *translation_class_is_used, - int *error_flag, - """, - - operation=TRAVERSAL_PREAMBLE_MAKO_DEFS + r"""//CL:mako// - // Find the target box for this source box. - box_id_t source_box_id = from_sep_siblings_lists[i]; - - size_t itarget_box = bsearch( - from_sep_siblings_starts, 1 + ntarget_or_target_parent_boxes, i); - - box_id_t target_box_id = target_or_target_parent_boxes[itarget_box]; - - // Ensure levels are the same. - if (box_levels[source_box_id] != box_levels[target_box_id]) - { - *error_flag = 1; - PYOPENCL_ELWISE_CONTINUE; - } - - // Compute the translation vector and translation class. - ${load_center("source_center", "source_box_id")} - ${load_center("target_center", "target_box_id")} - - int_coord_vec_t vec = get_translation_vector( - root_extent, box_levels[source_box_id], source_center, target_center); - - int translation_class = get_translation_class(vec, well_sep_is_n_away); - - // Ensure valid translation class. - if (translation_class == -1) - { - *error_flag = 1; - PYOPENCL_ELWISE_CONTINUE; - } - - translation_classes[i] = translation_class; - translation_class_is_used[translation_class] = 1; - """) - - -class RotationClassesInfo(DeviceDataRecord): - r"""Interaction lists to help with matrix precomputations for rotation-based - translations ("point and shoot"). - - .. attribute:: nfrom_sep_siblings_rotation_classes - - The number of distinct rotation classes. - - .. attribute:: from_sep_siblings_rotation_classes - - ``int32 [*]`` - - A list, corresponding to *from_sep_siblings_lists* of *trav*, of - the rotation class of each box pair. - - .. attribute:: from_sep_siblings_rotation_class_to_angle - - ``coord_t [nfrom_sep_siblings_rotation_classes]`` - - Maps rotation classes in *from_sep_siblings_rotation_classes* to - rotation angles. This represents the angle between box translation - pairs and the *z*-axis. - - """ - - @property - def nfrom_sep_siblings_rotation_classes(self): - return len(self.from_sep_siblings_rotation_class_to_angle) - - -class RotationClassesBuilder(object): - """Build rotation classes for List 2 translations. - """ - - def __init__(self, context): - self.context = context - - @memoize_method - def get_kernel_info(self, dimensions, well_sep_is_n_away, - box_id_dtype, box_level_dtype, coord_dtype): - coord_vec_dtype = cl.cltypes.vec_types[coord_dtype, dimensions] - int_coord_vec_dtype = cl.cltypes.vec_types[np.dtype(np.int32), dimensions] - - # Make sure translation classes can fit inside a 32 bit integer. - if (not - ( - self.ntranslation_classes(well_sep_is_n_away, dimensions) - <= 1 + np.iinfo(np.int32).max)): - raise ValueError("would overflow") - - preamble = TRANSLATION_CLASS_FINDER_PREAMBLE_TEMPLATE.render( - dimensions=dimensions) - - translation_class_finder = ( - TRANSLATION_CLASS_FINDER_TEMPLATE.build( - self.context, - type_aliases=( - ("int_coord_vec_t", int_coord_vec_dtype), - ("coord_vec_t", coord_vec_dtype), - ("coord_t", coord_dtype), - ("box_id_t", box_id_dtype), - ("box_level_t", box_level_dtype), - ), - var_values=( - ("dimensions", dimensions), - ), - more_preamble=preamble)) - - return _KernelInfo(translation_class_finder=translation_class_finder) - - @staticmethod - def vec_gcd(vec): - """Return the GCD of a list of integers.""" - def gcd(a, b): - while b: - a, b = b, a % b - return a - - result = abs(vec[0]) - for elem in vec[1:]: - result = gcd(result, abs(elem)) - return result - - def compute_rotation_classes(self, - well_sep_is_n_away, dimensions, used_translation_classes): - """Convert translation classes to a list of rotation classes and angles.""" - angle_to_rot_class = {} - angles = [] - - ntranslation_classes = ( - self.ntranslation_classes(well_sep_is_n_away, dimensions)) - - translation_class_to_rot_class = ( - np.empty(ntranslation_classes, dtype=np.int32)) - - translation_class_to_rot_class[:] = -1 - - for cls in used_translation_classes: - vec = self.translation_class_to_vector( - well_sep_is_n_away, dimensions, cls) - - # Normalize the translation vector (by dividing by its GCD). - # - # We need this before computing the cosine of the rotation angle, - # because generally in in floating point arithmetic, if k is a - # positive scalar and v is a vector, we can't assume - # - # kv[-1] / sqrt(|kv|^2) == v[-1] / sqrt(|v|^2). - # - # Normalizing ensures vectors that are positive integer multiples of - # each other get classified into the same equivalence class of - # rotations. - vec //= self.vec_gcd(vec) - - # Compute the rotation angle for the vector. - norm = np.linalg.norm(vec) - assert norm != 0 - angle = np.arccos(vec[-1] / norm) - - # Find the rotation class. - if angle in angle_to_rot_class: - rot_class = angle_to_rot_class[angle] - else: - rot_class = len(angles) - angle_to_rot_class[angle] = rot_class - angles.append(angle) - - translation_class_to_rot_class[cls] = rot_class - - return translation_class_to_rot_class, angles - - @staticmethod - def ntranslation_classes(well_sep_is_n_away, dimensions): - return (4 * well_sep_is_n_away + 3) ** dimensions - - @staticmethod - def translation_class_to_vector(well_sep_is_n_away, dimensions, cls): - # This computes the vector for the translation class, using the inverse - # of the formula found in get_translation_class() defined in - # TRANSLATION_CLASS_FINDER_PREAMBLE_TEMPLATE. - result = np.zeros(dimensions, dtype=np.int32) - shift = 2 * well_sep_is_n_away + 1 - base = 4 * well_sep_is_n_away + 3 - for i in range(dimensions): - result[i] = cls % base - shift - cls //= base - return result - - @log_process(logger, "build m2l rotation classes") - def __call__(self, queue, trav, tree, wait_for=None): - """Returns a pair *info*, *evt* where info is a :class:`RotationClassesInfo`. - """ - - # {{{ compute translation classes for list 2 - - well_sep_is_n_away = trav.well_sep_is_n_away - dimensions = tree.dimensions - coord_dtype = tree.coord_dtype - - knl_info = self.get_kernel_info( - dimensions, well_sep_is_n_away, tree.box_id_dtype, - tree.box_level_dtype, coord_dtype) - - ntranslation_classes = ( - self.ntranslation_classes(well_sep_is_n_away, dimensions)) - - translation_classes_lists = cl.array.empty( - queue, len(trav.from_sep_siblings_lists), dtype=np.int32) - - translation_class_is_used = cl.array.zeros( - queue, ntranslation_classes, dtype=np.int32) - - error_flag = cl.array.zeros(queue, 1, dtype=np.int32) - - evt = knl_info.translation_class_finder( - trav.from_sep_siblings_lists, - trav.from_sep_siblings_starts, - trav.target_or_target_parent_boxes, - trav.ntarget_or_target_parent_boxes, - tree.box_centers, - tree.aligned_nboxes, - tree.root_extent, - tree.box_levels, - well_sep_is_n_away, - translation_classes_lists, - translation_class_is_used, - error_flag, - queue=queue, wait_for=wait_for) - - if (error_flag.get()): - raise ValueError("could not compute translation classes") - - # }}} - - # {{{ convert translation classes to rotation classes - - used_translation_classes = ( - np.flatnonzero(translation_class_is_used.get())) - - translation_class_to_rotation_class, rotation_angles = ( - self.compute_rotation_classes( - well_sep_is_n_away, dimensions, used_translation_classes)) - - # There should be no more than 2^(d-1) * (2n+1)^d distinct rotation - # classes, since that is an upper bound on the number of distinct - # positions for list 2 boxes. - d = dimensions - n = well_sep_is_n_away - assert len(rotation_angles) <= 2**(d-1) * (2*n+1)**d - - rotation_classes_lists = ( - cl.array.take( - cl.array.to_device(queue, translation_class_to_rotation_class), - translation_classes_lists)) - - rotation_angles = cl.array.to_device(queue, np.array(rotation_angles)) - - # }}} - - return RotationClassesInfo( - from_sep_siblings_rotation_classes=rotation_classes_lists, - from_sep_siblings_rotation_class_to_angle=rotation_angles, - ).with_queue(None), evt - -# }}} - # vim: filetype=pyopencl:fdm=marker diff --git a/test/test_traversal.py b/test/test_traversal.py index b0855aa..52e1a50 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -377,7 +377,8 @@ def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away): # {{{ build traversal - from boxtree.traversal import FMMTraversalBuilder, RotationClassesBuilder + from boxtree.traversal import FMMTraversalBuilder + from boxtree.rotation_classes import RotationClassesBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away) trav, _ = tg(queue, tree) -- GitLab From f92ba8098b4c394297831724699f49ca72396390 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 13 Jun 2019 00:45:19 -0500 Subject: [PATCH 49/57] flake8 fix --- boxtree/traversal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/traversal.py b/boxtree/traversal.py index 6f59b03..2d60f68 100644 --- a/boxtree/traversal.py +++ b/boxtree/traversal.py @@ -29,7 +29,7 @@ import pyopencl.array # noqa import pyopencl.cltypes # noqa from pyopencl.elementwise import ElementwiseTemplate from mako.template import Template -from boxtree.tools import AXIS_NAMES, DeviceDataRecord, InlineBinarySearch +from boxtree.tools import AXIS_NAMES, DeviceDataRecord import logging logger = logging.getLogger(__name__) -- GitLab From 31ebd92e0ede996f5d3ae79de751b509b41937f4 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 13 Jun 2019 00:46:43 -0500 Subject: [PATCH 50/57] Rename geo_data to optional_geo_data --- boxtree/pyfmmlib_integration.py | 10 +++++----- doc/misc.rst | 2 +- test/test_fmm.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index 94cfe45..f7c3f59 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -111,7 +111,7 @@ class FMMLibExpansionWrangler(object): def __init__(self, tree, helmholtz_k, fmm_level_to_nterms=None, ifgrad=False, dipole_vec=None, dipoles_already_reordered=False, nterms=None, optimized_m2l_precomputation_memory_cutoff_bytes=10**8, - geo_data=None): + optional_geo_data=None): """ :arg fmm_level_to_nterms: a callable that, upon being passed the tree and the tree level as an integer, returns the value of *nterms* for the @@ -131,7 +131,7 @@ class FMMLibExpansionWrangler(object): return nterms self.tree = tree - self.geo_data = geo_data + self.optional_geo_data = optional_geo_data self.rotmat_cutoff_bytes = optimized_m2l_precomputation_memory_cutoff_bytes if helmholtz_k == 0: @@ -157,7 +157,7 @@ class FMMLibExpansionWrangler(object): self.dim = tree.dimensions - self.supports_optimized_m2l = self.dim == 3 and geo_data is not None + self.supports_optimized_m2l = self.dim == 3 and optional_geo_data is not None if dipole_vec is not None: assert dipole_vec.shape == (self.dim, self.tree.nsources) @@ -650,7 +650,7 @@ class FMMLibExpansionWrangler(object): if not self.supports_optimized_m2l: return (rotmatf, rotmatb, rotmat_order) - m2l_rotation_angles = self.geo_data.m2l_rotation_angles() + m2l_rotation_angles = self.optional_geo_data.m2l_rotation_angles() def mem_estimate(order): # Rotation matrix memory cost estimate. @@ -712,7 +712,7 @@ class FMMLibExpansionWrangler(object): # {{{ set up optimized m2l, if applicable if self.level_nterms[lev] <= rotmat_order: - m2l_rotation_lists = self.geo_data.m2l_rotation_lists() + m2l_rotation_lists = self.optional_geo_data.m2l_rotation_lists() assert len(m2l_rotation_lists) == len(lists) mploc = self.get_translation_routine( diff --git a/doc/misc.rst b/doc/misc.rst index 717d0c1..44ceb38 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -33,7 +33,7 @@ Version 2019.1 boxtree's `git repository `_ * Faster M2Ls in the FMMLIB backend using precomputed rotation matrices. This - change adds a *geo_data* parameter to the FMMLIB geometry wrangler + change adds an *optional_geo_data* parameter to the FMMLIB geometry wrangler constructor. Version 2018.2 diff --git a/test/test_fmm.py b/test/test_fmm.py index b26ee5f..b5e0f9d 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -681,7 +681,7 @@ def test_fmm_with_optimized_3d_m2l(ctx_factory, helmholtz_k, well_sep_is_n_away) optimized_wrangler = FMMLibExpansionWrangler( trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms, - geo_data=FMMLibGeometryData(queue, trav)) + optional_geo_data=FMMLibGeometryData(queue, trav)) from boxtree.fmm import drive_fmm -- GitLab From c4b140dd556be03b1744b7e89e36ff1db7d8a3f1 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 13 Jun 2019 13:16:59 -0500 Subject: [PATCH 51/57] Rename optional_geo_data to rotation_data --- boxtree/pyfmmlib_integration.py | 19 ++++++++++--------- doc/misc.rst | 2 +- test/test_fmm.py | 4 ++-- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index f7c3f59..f25ba45 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -40,13 +40,14 @@ __doc__ = """Integrates :mod:`boxtree` with # {{{ geometry data interface -class FMMLibGeometryDataInterface(object): - """Abstract interface for additional, optional geometry data passed to the - expansion wrangler. +class FMMLibRotationDataInterface(object): + """Abstract interface for additional, optional data for precomputation of + rotation matrices passed to the expansion wrangler. .. automethod:: m2l_rotation_lists .. automethod:: m2l_rotation_angles + """ def m2l_rotation_lists(self): @@ -60,7 +61,7 @@ class FMMLibGeometryDataInterface(object): raise NotImplementedError -class FMMLibGeometryData(FMMLibGeometryDataInterface): +class FMMLibRotationData(FMMLibRotationDataInterface): """An implementation of the :class:`FMMLibGeometryDataInterface`.""" def __init__(self, queue, trav): @@ -111,7 +112,7 @@ class FMMLibExpansionWrangler(object): def __init__(self, tree, helmholtz_k, fmm_level_to_nterms=None, ifgrad=False, dipole_vec=None, dipoles_already_reordered=False, nterms=None, optimized_m2l_precomputation_memory_cutoff_bytes=10**8, - optional_geo_data=None): + rotation_data=None): """ :arg fmm_level_to_nterms: a callable that, upon being passed the tree and the tree level as an integer, returns the value of *nterms* for the @@ -131,7 +132,7 @@ class FMMLibExpansionWrangler(object): return nterms self.tree = tree - self.optional_geo_data = optional_geo_data + self.rotation_data = rotation_data self.rotmat_cutoff_bytes = optimized_m2l_precomputation_memory_cutoff_bytes if helmholtz_k == 0: @@ -157,7 +158,7 @@ class FMMLibExpansionWrangler(object): self.dim = tree.dimensions - self.supports_optimized_m2l = self.dim == 3 and optional_geo_data is not None + self.supports_optimized_m2l = self.dim == 3 and rotation_data is not None if dipole_vec is not None: assert dipole_vec.shape == (self.dim, self.tree.nsources) @@ -650,7 +651,7 @@ class FMMLibExpansionWrangler(object): if not self.supports_optimized_m2l: return (rotmatf, rotmatb, rotmat_order) - m2l_rotation_angles = self.optional_geo_data.m2l_rotation_angles() + m2l_rotation_angles = self.rotation_data.m2l_rotation_angles() def mem_estimate(order): # Rotation matrix memory cost estimate. @@ -712,7 +713,7 @@ class FMMLibExpansionWrangler(object): # {{{ set up optimized m2l, if applicable if self.level_nterms[lev] <= rotmat_order: - m2l_rotation_lists = self.optional_geo_data.m2l_rotation_lists() + m2l_rotation_lists = self.rotation_data.m2l_rotation_lists() assert len(m2l_rotation_lists) == len(lists) mploc = self.get_translation_routine( diff --git a/doc/misc.rst b/doc/misc.rst index 44ceb38..2c9f7a8 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -33,7 +33,7 @@ Version 2019.1 boxtree's `git repository `_ * Faster M2Ls in the FMMLIB backend using precomputed rotation matrices. This - change adds an *optional_geo_data* parameter to the FMMLIB geometry wrangler + change adds an optional *rotation_data* parameter to the FMMLIB geometry wrangler constructor. Version 2018.2 diff --git a/test/test_fmm.py b/test/test_fmm.py index b5e0f9d..2a36c00 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -672,7 +672,7 @@ def test_fmm_with_optimized_3d_m2l(ctx_factory, helmholtz_k, well_sep_is_n_away) return result from boxtree.pyfmmlib_integration import ( - FMMLibExpansionWrangler, FMMLibGeometryData) + FMMLibExpansionWrangler, FMMLibRotationData) baseline_wrangler = FMMLibExpansionWrangler( trav.tree, helmholtz_k, @@ -681,7 +681,7 @@ def test_fmm_with_optimized_3d_m2l(ctx_factory, helmholtz_k, well_sep_is_n_away) optimized_wrangler = FMMLibExpansionWrangler( trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms, - optional_geo_data=FMMLibGeometryData(queue, trav)) + rotation_data=FMMLibRotationData(queue, trav)) from boxtree.fmm import drive_fmm -- GitLab From 3d4855c9c25df3b36c6a90d2adb0505dd8d3bc15 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 13 Jun 2019 13:19:55 -0500 Subject: [PATCH 52/57] Renaming cleanup --- boxtree/pyfmmlib_integration.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index f25ba45..490a19b 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -38,7 +38,7 @@ __doc__ = """Integrates :mod:`boxtree` with """ -# {{{ geometry data interface +# {{{ rotation data interface class FMMLibRotationDataInterface(object): """Abstract interface for additional, optional data for precomputation of @@ -62,7 +62,7 @@ class FMMLibRotationDataInterface(object): class FMMLibRotationData(FMMLibRotationDataInterface): - """An implementation of the :class:`FMMLibGeometryDataInterface`.""" + """An implementation of the :class:`FMMLibRotationDataInterface`.""" def __init__(self, queue, trav): self.queue = queue -- GitLab From 7712dfee0e59dab820050c584adac26b2f65d81e Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 26 Jun 2019 16:11:42 -0500 Subject: [PATCH 53/57] Add comment concerning translation class building --- boxtree/rotation_classes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/boxtree/rotation_classes.py b/boxtree/rotation_classes.py index 7444abd..97df572 100644 --- a/boxtree/rotation_classes.py +++ b/boxtree/rotation_classes.py @@ -40,6 +40,9 @@ from pytools import log_process # {{{ rotation classes builder +# Note that these kernels compute translation classes first, and +# these get converted to rotation classes in a second step, from Python. + TRANSLATION_CLASS_FINDER_PREAMBLE_TEMPLATE = Template(r"""//CL:mako// #define LEVEL_TO_RAD(level) \ (root_extent * 1 / (coord_t) (1 << (level + 1))) -- GitLab From b38f17445624d46f7603b0ce893015c47a1307eb Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 26 Jun 2019 16:24:19 -0500 Subject: [PATCH 54/57] Add documentation for rotation data constructor argument --- boxtree/pyfmmlib_integration.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index 490a19b..1426156 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -114,9 +114,16 @@ class FMMLibExpansionWrangler(object): optimized_m2l_precomputation_memory_cutoff_bytes=10**8, rotation_data=None): """ - :arg fmm_level_to_nterms: a callable that, upon being passed the tree + :arg fmm_level_to_nterms: A callable that, upon being passed the tree and the tree level as an integer, returns the value of *nterms* for the multipole and local expansions on that level. + :arg rotation_data: Either *None* or an instance of the + :class:`FMMLibRotationDataInterface`. In three dimensions, passing + *rotation_data* enables optimized M2L (List 2) translations. + In two dimensions, this does nothing. + :arg optimized_m2l_precomputation_memory_cutoff_bytes: When using + optimized List 2 translations, an upper bound in bytes on the + amount of storage to use for a precomputed rotation matrix. """ if nterms is not None and fmm_level_to_nterms is not None: -- GitLab From 79c9cd78f5eda03e8bc941dd56a4ef5588a9432c Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 26 Jun 2019 16:31:24 -0500 Subject: [PATCH 55/57] Warn if rotation_data not supplied. Ignore warning in current tests. --- boxtree/pyfmmlib_integration.py | 18 +++++++++++++++++- test/test_fmm.py | 4 ++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index 1426156..843085a 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -95,6 +95,10 @@ class FMMLibRotationData(FMMLibRotationDataInterface): .from_sep_siblings_rotation_class_to_angle .get(self.queue)) + +class FMMLibRotationDataNotSuppliedWarning(UserWarning): + pass + # }}} @@ -165,7 +169,19 @@ class FMMLibExpansionWrangler(object): self.dim = tree.dimensions - self.supports_optimized_m2l = self.dim == 3 and rotation_data is not None + if self.dim == 3: + if rotation_data is None: + from warnings import warn + warn( + "List 2 (multipole-to-local) translations will be " + "unoptimized. Supply a rotation_data argument to " + "the wrangler for optimized List 2.", + FMMLibRotationDataNotSuppliedWarning, + stacklevel=2) + + self.supports_optimized_m2l = rotation_data is not None + else: + self.supports_optimized_m2l = False if dipole_vec is not None: assert dipole_vec.shape == (self.dim, self.tree.nsources) diff --git a/test/test_fmm.py b/test/test_fmm.py index 2a36c00..e673ce9 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -27,11 +27,13 @@ from six.moves import range import numpy as np import numpy.linalg as la import pyopencl as cl +import warnings import pytest from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) +from boxtree.pyfmmlib_integration import FMMLibRotationDataNotSuppliedWarning from boxtree.tools import ( # noqa: F401 make_normal_particle_array as p_normal, make_surface_particle_array as p_surface, @@ -42,6 +44,8 @@ from boxtree.tools import ( # noqa: F401 import logging logger = logging.getLogger(__name__) +warnings.simplefilter("ignore", FMMLibRotationDataNotSuppliedWarning) + # {{{ fmm interaction completeness test -- GitLab From 4a7eb780710a4ed6d50a8ecda8c16b6cb979af3a Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 26 Jun 2019 16:42:36 -0500 Subject: [PATCH 56/57] Update FMM docs --- doc/fmm.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/fmm.rst b/doc/fmm.rst index 3d4656b..9ae7034 100644 --- a/doc/fmm.rst +++ b/doc/fmm.rst @@ -19,9 +19,11 @@ Integration with PyFMMLib .. automodule:: boxtree.pyfmmlib_integration -.. autoclass:: FMMLibGeometryDataInterface +.. autoclass:: FMMLibRotationDataInterface -.. autoclass:: FMMLibGeometryData +.. autoclass:: FMMLibRotationData + +.. autoclass:: FMMLibRotationDataNotSuppliedWarning .. autoclass:: FMMLibExpansionWrangler -- GitLab From 9745577b1ecc45c41525c6410d7d409367b573f2 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Wed, 26 Jun 2019 20:49:30 -0500 Subject: [PATCH 57/57] Make setting of self.rotation_data closer to rotation_data check --- boxtree/pyfmmlib_integration.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index 843085a..2ca27e3 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -143,8 +143,6 @@ class FMMLibExpansionWrangler(object): return nterms self.tree = tree - self.rotation_data = rotation_data - self.rotmat_cutoff_bytes = optimized_m2l_precomputation_memory_cutoff_bytes if helmholtz_k == 0: self.eqn_letter = "l" @@ -169,6 +167,9 @@ class FMMLibExpansionWrangler(object): self.dim = tree.dimensions + self.rotation_data = rotation_data + self.rotmat_cutoff_bytes = optimized_m2l_precomputation_memory_cutoff_bytes + if self.dim == 3: if rotation_data is None: from warnings import warn -- GitLab