diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index a32d1e6ef2dca82941efb6c0b49aa35fd4c614f3..2ca27e356fa1f7ecbe11368fe86d57c7b34ff2d1 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -38,6 +38,70 @@ __doc__ = """Integrates :mod:`boxtree` with """ +# {{{ rotation data interface + +class FMMLibRotationDataInterface(object): + """Abstract interface for additional, optional data for precomputation of + rotation matrices passed to the expansion wrangler. + + .. automethod:: m2l_rotation_lists + + .. automethod:: m2l_rotation_angles + + """ + + def m2l_rotation_lists(self): + """Return a :mod:`numpy` array mapping entries of List 2 to rotation classes. + """ + raise NotImplementedError + + def m2l_rotation_angles(self): + """Return a :mod:`numpy` array mapping List 2 rotation classes to rotation angles. + """ + raise NotImplementedError + + +class FMMLibRotationData(FMMLibRotationDataInterface): + """An implementation of the :class:`FMMLibRotationDataInterface`.""" + + def __init__(self, queue, trav): + self.queue = queue + self.trav = trav + self.tree = trav.tree + + @property + @memoize_method + def rotation_classes_builder(self): + from boxtree.rotation_classes import RotationClassesBuilder + return RotationClassesBuilder(self.queue.context) + + @memoize_method + def build_rotation_classes_lists(self): + trav = self.trav.to_device(self.queue) + tree = self.tree.to_device(self.queue) + return self.rotation_classes_builder(self.queue, trav, tree)[0] + + @memoize_method + def m2l_rotation_lists(self): + return (self + .build_rotation_classes_lists() + .from_sep_siblings_rotation_classes + .get(self.queue)) + + @memoize_method + def m2l_rotation_angles(self): + return (self + .build_rotation_classes_lists() + .from_sep_siblings_rotation_class_to_angle + .get(self.queue)) + + +class FMMLibRotationDataNotSuppliedWarning(UserWarning): + pass + +# }}} + + class FMMLibExpansionWrangler(object): """Implements the :class:`boxtree.fmm.ExpansionWranglerInterface` by using pyfmmlib. @@ -50,11 +114,20 @@ class FMMLibExpansionWrangler(object): # {{{ constructor def __init__(self, tree, helmholtz_k, fmm_level_to_nterms=None, ifgrad=False, - dipole_vec=None, dipoles_already_reordered=False, nterms=None): + dipole_vec=None, dipoles_already_reordered=False, nterms=None, + optimized_m2l_precomputation_memory_cutoff_bytes=10**8, + rotation_data=None): """ - :arg fmm_level_to_nterms: a callable that, upon being passed the tree + :arg fmm_level_to_nterms: A callable that, upon being passed the tree and the tree level as an integer, returns the value of *nterms* for the multipole and local expansions on that level. + :arg rotation_data: Either *None* or an instance of the + :class:`FMMLibRotationDataInterface`. In three dimensions, passing + *rotation_data* enables optimized M2L (List 2) translations. + In two dimensions, this does nothing. + :arg optimized_m2l_precomputation_memory_cutoff_bytes: When using + optimized List 2 translations, an upper bound in bytes on the + amount of storage to use for a precomputed rotation matrix. """ if nterms is not None and fmm_level_to_nterms is not None: @@ -94,6 +167,23 @@ class FMMLibExpansionWrangler(object): self.dim = tree.dimensions + self.rotation_data = rotation_data + self.rotmat_cutoff_bytes = optimized_m2l_precomputation_memory_cutoff_bytes + + if self.dim == 3: + if rotation_data is None: + from warnings import warn + warn( + "List 2 (multipole-to-local) translations will be " + "unoptimized. Supply a rotation_data argument to " + "the wrangler for optimized List 2.", + FMMLibRotationDataNotSuppliedWarning, + stacklevel=2) + + self.supports_optimized_m2l = rotation_data is not None + else: + self.supports_optimized_m2l = False + if dipole_vec is not None: assert dipole_vec.shape == (self.dim, self.tree.nsources) @@ -569,6 +659,57 @@ class FMMLibExpansionWrangler(object): return output + # {{{ precompute rotation matrices for optimized m2l + + @memoize_method + def m2l_rotation_matrices(self): + # Returns a tuple (rotmatf, rotmatb, rotmat_order), consisting of the + # forward rotation matrices, backward rotation matrices, and the + # translation order of the matrices. rotmat_order is -1 if not + # supported. + + rotmatf = None + rotmatb = None + rotmat_order = -1 + + if not self.supports_optimized_m2l: + return (rotmatf, rotmatb, rotmat_order) + + m2l_rotation_angles = self.rotation_data.m2l_rotation_angles() + + def mem_estimate(order): + # Rotation matrix memory cost estimate. + return (8 + * (order + 1)**2 + * (2*order + 1) + * len(m2l_rotation_angles)) + + # Find the largest order we can use. Because the memory cost of the + # matrices could be large, only precompute them if the cost estimate + # for the order does not exceed the cutoff. + for order in sorted(self.level_nterms, key=lambda x: -x): + if mem_estimate(order) < self.rotmat_cutoff_bytes: + rotmat_order = order + break + + if rotmat_order == -1: + return (rotmatf, rotmatb, rotmat_order) + + # Compute the rotation matrices. + from pyfmmlib import rotviarecur3p_init_vec as rotmat_builder + + ier, rotmatf = ( + rotmat_builder(rotmat_order, m2l_rotation_angles)) + assert (0 == ier).all() + + ier, rotmatb = ( + rotmat_builder(rotmat_order, -m2l_rotation_angles)) + assert (0 == ier).all() + + return (rotmatf, rotmatb, rotmat_order) + + # }}} + @log_process(logger) @return_timing_data def multipole_to_local(self, @@ -578,7 +719,9 @@ class FMMLibExpansionWrangler(object): tree = self.tree local_exps = self.local_expansion_zeros() - mploc = self.get_translation_routine("%ddmploc", vec_suffix="_imany") + # Precomputed rotation matrices (matrices of larger order can be used + # for translations of smaller order) + rotmatf, rotmatb, rotmat_order = self.m2l_rotation_matrices() for lev in range(self.tree.nlevels): lstart, lstop = level_start_target_or_target_parent_box_nrs[lev:lev+2] @@ -587,6 +730,33 @@ class FMMLibExpansionWrangler(object): starts_on_lvl = starts[lstart:lstop+1] + mploc = self.get_translation_routine("%ddmploc", vec_suffix="_imany") + + kwargs = {} + + # {{{ set up optimized m2l, if applicable + + if self.level_nterms[lev] <= rotmat_order: + m2l_rotation_lists = self.rotation_data.m2l_rotation_lists() + assert len(m2l_rotation_lists) == len(lists) + + mploc = self.get_translation_routine( + "%ddmploc", vec_suffix="2_trunc_imany") + + kwargs["ldm"] = rotmat_order + kwargs["nterms"] = self.level_nterms[lev] + kwargs["nterms1"] = self.level_nterms[lev] + + kwargs["rotmatf"] = rotmatf + kwargs["rotmatf_offsets"] = m2l_rotation_lists + kwargs["rotmatf_starts"] = starts_on_lvl + + kwargs["rotmatb"] = rotmatb + kwargs["rotmatb_offsets"] = m2l_rotation_lists + kwargs["rotmatb_starts"] = starts_on_lvl + + # }}} + source_level_start_ibox, source_mpoles_view = \ self.multipole_expansions_view(mpole_exps, lev) target_level_start_ibox, target_local_exps_view = \ @@ -610,7 +780,6 @@ class FMMLibExpansionWrangler(object): rscale1 = np.ones(nsrc_boxes) * rscale rscale1_offsets = np.arange(nsrc_boxes) - kwargs = {} if self.dim == 3 and self.eqn_letter == "h": kwargs["radius"] = ( tree.root_extent * 2**(-lev) diff --git a/boxtree/rotation_classes.py b/boxtree/rotation_classes.py new file mode 100644 index 0000000000000000000000000000000000000000..97df572cc3994bf7bf1221c087563631337a53fe --- /dev/null +++ b/boxtree/rotation_classes.py @@ -0,0 +1,390 @@ +from __future__ import division + +__copyright__ = "Copyright (C) 2019 Matt Wala" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import numpy as np +from pytools import Record, memoize_method +import pyopencl as cl +import pyopencl.array # noqa +import pyopencl.cltypes # noqa +from pyopencl.elementwise import ElementwiseTemplate +from mako.template import Template +from boxtree.tools import DeviceDataRecord, InlineBinarySearch +from boxtree.traversal import TRAVERSAL_PREAMBLE_MAKO_DEFS + +import logging +logger = logging.getLogger(__name__) + +from pytools import log_process + + +# {{{ rotation classes builder + +# Note that these kernels compute translation classes first, and +# these get converted to rotation classes in a second step, from Python. + +TRANSLATION_CLASS_FINDER_PREAMBLE_TEMPLATE = Template(r"""//CL:mako// + #define LEVEL_TO_RAD(level) \ + (root_extent * 1 / (coord_t) (1 << (level + 1))) + + // Return an integer vector indicating the a translation direction + // as a multiple of the box diameter. + inline int_coord_vec_t get_translation_vector( + coord_t root_extent, + int level, + coord_vec_t source_center, + coord_vec_t target_center) + { + int_coord_vec_t result = (int_coord_vec_t) 0; + coord_t diam = 2 * LEVEL_TO_RAD(level); + %for i in range(dimensions): + result.s${i} = rint((target_center.s${i} - source_center.s${i}) / diam); + %endfor + return result; + } + + // Compute the translation class for the given translation vector. The + // translation class maps a translation vector (a_1, a_2, ..., a_d) into + // a dense range of integers [0, ..., (4*n+3)^d - 1], where + // d is the dimension and n is well_sep_is_n_away. + // + // This relies on the fact that the entries of the vector will + // always be in the range [-2n-1,...,2n+1]. + // + // The mapping from vector to class is: + // + // \~~ d k-1 + // cls(a ,a ,...,a ) = > (2n+1+a ) (4n+3) + // 1 2 d /__ k=1 k + // + // Returns -1 on error. + inline int get_translation_class(int_coord_vec_t vec, int well_sep_is_n_away) + { + int dim_bound = 2 * well_sep_is_n_away + 1; + %for i in range(dimensions): + if (!(-dim_bound <= vec.s${i} && vec.s${i} <= dim_bound)) + { + return -1; + } + %endfor + + int result = 0; + int base = 4 * well_sep_is_n_away + 3; + int mult = 1; + %for i in range(dimensions): + result += (2 * well_sep_is_n_away + 1 + vec.s${i}) * mult; + mult *= base; + %endfor + return result; + } + """ + str(InlineBinarySearch("box_id_t")), + strict_undefined=True) + + +TRANSLATION_CLASS_FINDER_TEMPLATE = ElementwiseTemplate( + arguments=r"""//CL:mako// + /* input: */ + box_id_t *from_sep_siblings_lists, + box_id_t *from_sep_siblings_starts, + box_id_t *target_or_target_parent_boxes, + int ntarget_or_target_parent_boxes, + coord_t *box_centers, + int aligned_nboxes, + coord_t root_extent, + box_level_t *box_levels, + int well_sep_is_n_away, + + /* output: */ + int *translation_classes, + int *translation_class_is_used, + int *error_flag, + """, + + operation=TRAVERSAL_PREAMBLE_MAKO_DEFS + r"""//CL:mako// + // Find the target box for this source box. + box_id_t source_box_id = from_sep_siblings_lists[i]; + + size_t itarget_box = bsearch( + from_sep_siblings_starts, 1 + ntarget_or_target_parent_boxes, i); + + box_id_t target_box_id = target_or_target_parent_boxes[itarget_box]; + + // Ensure levels are the same. + if (box_levels[source_box_id] != box_levels[target_box_id]) + { + *error_flag = 1; + PYOPENCL_ELWISE_CONTINUE; + } + + // Compute the translation vector and translation class. + ${load_center("source_center", "source_box_id")} + ${load_center("target_center", "target_box_id")} + + int_coord_vec_t vec = get_translation_vector( + root_extent, box_levels[source_box_id], source_center, target_center); + + int translation_class = get_translation_class(vec, well_sep_is_n_away); + + // Ensure valid translation class. + if (translation_class == -1) + { + *error_flag = 1; + PYOPENCL_ELWISE_CONTINUE; + } + + translation_classes[i] = translation_class; + translation_class_is_used[translation_class] = 1; + """) + + +class _KernelInfo(Record): + pass + + +class RotationClassesInfo(DeviceDataRecord): + r"""Interaction lists to help with matrix precomputations for rotation-based + translations ("point and shoot"). + + .. attribute:: nfrom_sep_siblings_rotation_classes + + The number of distinct rotation classes. + + .. attribute:: from_sep_siblings_rotation_classes + + ``int32 [*]`` + + A list, corresponding to *from_sep_siblings_lists* of *trav*, of + the rotation class of each box pair. + + .. attribute:: from_sep_siblings_rotation_class_to_angle + + ``coord_t [nfrom_sep_siblings_rotation_classes]`` + + Maps rotation classes in *from_sep_siblings_rotation_classes* to + rotation angles. This represents the angle between box translation + pairs and the *z*-axis. + + """ + + @property + def nfrom_sep_siblings_rotation_classes(self): + return len(self.from_sep_siblings_rotation_class_to_angle) + + +class RotationClassesBuilder(object): + """Build rotation classes for List 2 translations. + """ + + def __init__(self, context): + self.context = context + + @memoize_method + def get_kernel_info(self, dimensions, well_sep_is_n_away, + box_id_dtype, box_level_dtype, coord_dtype): + coord_vec_dtype = cl.cltypes.vec_types[coord_dtype, dimensions] + int_coord_vec_dtype = cl.cltypes.vec_types[np.dtype(np.int32), dimensions] + + # Make sure translation classes can fit inside a 32 bit integer. + if (not + ( + self.ntranslation_classes(well_sep_is_n_away, dimensions) + <= 1 + np.iinfo(np.int32).max)): + raise ValueError("would overflow") + + preamble = TRANSLATION_CLASS_FINDER_PREAMBLE_TEMPLATE.render( + dimensions=dimensions) + + translation_class_finder = ( + TRANSLATION_CLASS_FINDER_TEMPLATE.build( + self.context, + type_aliases=( + ("int_coord_vec_t", int_coord_vec_dtype), + ("coord_vec_t", coord_vec_dtype), + ("coord_t", coord_dtype), + ("box_id_t", box_id_dtype), + ("box_level_t", box_level_dtype), + ), + var_values=( + ("dimensions", dimensions), + ), + more_preamble=preamble)) + + return _KernelInfo(translation_class_finder=translation_class_finder) + + @staticmethod + def vec_gcd(vec): + """Return the GCD of a list of integers.""" + def gcd(a, b): + while b: + a, b = b, a % b + return a + + result = abs(vec[0]) + for elem in vec[1:]: + result = gcd(result, abs(elem)) + return result + + def compute_rotation_classes(self, + well_sep_is_n_away, dimensions, used_translation_classes): + """Convert translation classes to a list of rotation classes and angles.""" + angle_to_rot_class = {} + angles = [] + + ntranslation_classes = ( + self.ntranslation_classes(well_sep_is_n_away, dimensions)) + + translation_class_to_rot_class = ( + np.empty(ntranslation_classes, dtype=np.int32)) + + translation_class_to_rot_class[:] = -1 + + for cls in used_translation_classes: + vec = self.translation_class_to_vector( + well_sep_is_n_away, dimensions, cls) + + # Normalize the translation vector (by dividing by its GCD). + # + # We need this before computing the cosine of the rotation angle, + # because generally in in floating point arithmetic, if k is a + # positive scalar and v is a vector, we can't assume + # + # kv[-1] / sqrt(|kv|^2) == v[-1] / sqrt(|v|^2). + # + # Normalizing ensures vectors that are positive integer multiples of + # each other get classified into the same equivalence class of + # rotations. + vec //= self.vec_gcd(vec) + + # Compute the rotation angle for the vector. + norm = np.linalg.norm(vec) + assert norm != 0 + angle = np.arccos(vec[-1] / norm) + + # Find the rotation class. + if angle in angle_to_rot_class: + rot_class = angle_to_rot_class[angle] + else: + rot_class = len(angles) + angle_to_rot_class[angle] = rot_class + angles.append(angle) + + translation_class_to_rot_class[cls] = rot_class + + return translation_class_to_rot_class, angles + + @staticmethod + def ntranslation_classes(well_sep_is_n_away, dimensions): + return (4 * well_sep_is_n_away + 3) ** dimensions + + @staticmethod + def translation_class_to_vector(well_sep_is_n_away, dimensions, cls): + # This computes the vector for the translation class, using the inverse + # of the formula found in get_translation_class() defined in + # TRANSLATION_CLASS_FINDER_PREAMBLE_TEMPLATE. + result = np.zeros(dimensions, dtype=np.int32) + shift = 2 * well_sep_is_n_away + 1 + base = 4 * well_sep_is_n_away + 3 + for i in range(dimensions): + result[i] = cls % base - shift + cls //= base + return result + + @log_process(logger, "build m2l rotation classes") + def __call__(self, queue, trav, tree, wait_for=None): + """Returns a pair *info*, *evt* where info is a :class:`RotationClassesInfo`. + """ + + # {{{ compute translation classes for list 2 + + well_sep_is_n_away = trav.well_sep_is_n_away + dimensions = tree.dimensions + coord_dtype = tree.coord_dtype + + knl_info = self.get_kernel_info( + dimensions, well_sep_is_n_away, tree.box_id_dtype, + tree.box_level_dtype, coord_dtype) + + ntranslation_classes = ( + self.ntranslation_classes(well_sep_is_n_away, dimensions)) + + translation_classes_lists = cl.array.empty( + queue, len(trav.from_sep_siblings_lists), dtype=np.int32) + + translation_class_is_used = cl.array.zeros( + queue, ntranslation_classes, dtype=np.int32) + + error_flag = cl.array.zeros(queue, 1, dtype=np.int32) + + evt = knl_info.translation_class_finder( + trav.from_sep_siblings_lists, + trav.from_sep_siblings_starts, + trav.target_or_target_parent_boxes, + trav.ntarget_or_target_parent_boxes, + tree.box_centers, + tree.aligned_nboxes, + tree.root_extent, + tree.box_levels, + well_sep_is_n_away, + translation_classes_lists, + translation_class_is_used, + error_flag, + queue=queue, wait_for=wait_for) + + if (error_flag.get()): + raise ValueError("could not compute translation classes") + + # }}} + + # {{{ convert translation classes to rotation classes + + used_translation_classes = ( + np.flatnonzero(translation_class_is_used.get())) + + translation_class_to_rotation_class, rotation_angles = ( + self.compute_rotation_classes( + well_sep_is_n_away, dimensions, used_translation_classes)) + + # There should be no more than 2^(d-1) * (2n+1)^d distinct rotation + # classes, since that is an upper bound on the number of distinct + # positions for list 2 boxes. + d = dimensions + n = well_sep_is_n_away + assert len(rotation_angles) <= 2**(d-1) * (2*n+1)**d + + rotation_classes_lists = ( + cl.array.take( + cl.array.to_device(queue, translation_class_to_rotation_class), + translation_classes_lists)) + + rotation_angles = cl.array.to_device(queue, np.array(rotation_angles)) + + # }}} + + return RotationClassesInfo( + from_sep_siblings_rotation_classes=rotation_classes_lists, + from_sep_siblings_rotation_class_to_angle=rotation_angles, + ).with_queue(None), evt + +# }}} + +# vim: filetype=pyopencl:fdm=marker diff --git a/boxtree/tools.py b/boxtree/tools.py index 56180ac83ee91ebe9dedbeb4e6f155bef1bea84f..5dc58319420b02fee0d39927bc089de51fc44f7a 100644 --- a/boxtree/tools.py +++ b/boxtree/tools.py @@ -580,25 +580,38 @@ from mako.template import Template BINARY_SEARCH_TEMPLATE = Template(""" -inline size_t bsearch(__global ${idx_t} *starts, size_t len, ${idx_t} val) +/* + * Returns the largest value of i such that arr[i] <= val, or (size_t) -1 if val + * is less than all values. + */ +inline size_t bsearch( + __global const ${elem_t} *arr, + size_t len, + const ${elem_t} val) { - size_t l_idx = 0, r_idx = len - 1, my_idx; - for (;;) + if (val < arr[0]) { - my_idx = (l_idx + r_idx) / 2; + return -1; + } + + size_t l = 0, r = len, i; + + while (1) + { + i = l + (r - l) / 2; - if (starts[my_idx] <= val && val < starts[my_idx + 1]) + if (arr[i] <= val && (i == len - 1 || val < arr[i + 1])) { - return my_idx; + return i; } - if (starts[my_idx] > val) + if (arr[i] <= val) { - r_idx = my_idx - 1; + l = i; } else { - l_idx = my_idx + 1; + r = i; } } } @@ -607,12 +620,12 @@ inline size_t bsearch(__global ${idx_t} *starts, size_t len, ${idx_t} val) class InlineBinarySearch(object): - def __init__(self, idx_t): - self.idx_t = idx_t + def __init__(self, elem_type_name): + self.render_vars = {"elem_t": elem_type_name} @memoize_method def __str__(self): - return BINARY_SEARCH_TEMPLATE.render(idx_t=self.idx_t) + return BINARY_SEARCH_TEMPLATE.render(**self.render_vars) # }}} diff --git a/boxtree/version.py b/boxtree/version.py index f5c7ef59fd0b28718e7d3d4b38e0d3e91e2e1949..78a7f0b5f35e6402a55367e5a38d626b9aa881fd 100644 --- a/boxtree/version.py +++ b/boxtree/version.py @@ -1,2 +1,2 @@ -VERSION = (2018, 2) +VERSION = (2019, 1) VERSION_TEXT = ".".join(str(i) for i in VERSION) diff --git a/doc/fmm.rst b/doc/fmm.rst index 97ea4e06a88a75e0a9626a0a26aeef468b6d0246..9ae7034eb421d5e041ef0825d37999f5ce93e13e 100644 --- a/doc/fmm.rst +++ b/doc/fmm.rst @@ -19,6 +19,12 @@ Integration with PyFMMLib .. automodule:: boxtree.pyfmmlib_integration +.. autoclass:: FMMLibRotationDataInterface + +.. autoclass:: FMMLibRotationData + +.. autoclass:: FMMLibRotationDataNotSuppliedWarning + .. autoclass:: FMMLibExpansionWrangler .. vim: sw=4 diff --git a/doc/misc.rst b/doc/misc.rst index a4c008fd9a67cfeb62438bcd00e9f7fa825395e8..2c9f7a80de1977bec1751d3e5d06a18927878917 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -24,13 +24,21 @@ for instructions. User-visible Changes ==================== -Version 2018.2 +Version 2019.1 -------------- + .. note:: This version is currently under development. You can get snapshots from boxtree's `git repository `_ +* Faster M2Ls in the FMMLIB backend using precomputed rotation matrices. This + change adds an optional *rotation_data* parameter to the FMMLIB geometry wrangler + constructor. + +Version 2018.2 +-------------- + * Changed index style of the *from_sep_close_bigger_starts* interaction list. Version 2018.1 diff --git a/test/test_fmm.py b/test/test_fmm.py index fbd5564a63c7f2fa9cde7332deaaab361e485521..e673ce9141b95a395e97f04d0368187b421c7fca 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -27,11 +27,13 @@ from six.moves import range import numpy as np import numpy.linalg as la import pyopencl as cl +import warnings import pytest from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) +from boxtree.pyfmmlib_integration import FMMLibRotationDataNotSuppliedWarning from boxtree.tools import ( # noqa: F401 make_normal_particle_array as p_normal, make_surface_particle_array as p_surface, @@ -42,6 +44,8 @@ from boxtree.tools import ( # noqa: F401 import logging logger = logging.getLogger(__name__) +warnings.simplefilter("ignore", FMMLibRotationDataNotSuppliedWarning) + # {{{ fmm interaction completeness test @@ -619,6 +623,93 @@ def test_fmm_float32(ctx_factory, enable_extents): # }}} +# {{{ test with fmm optimized 3d m2l + +@pytest.mark.parametrize("well_sep_is_n_away", (1, 2)) +@pytest.mark.parametrize("helmholtz_k", (0, 2)) +def test_fmm_with_optimized_3d_m2l(ctx_factory, helmholtz_k, well_sep_is_n_away): + logging.basicConfig(level=logging.INFO) + + from pytest import importorskip + importorskip("pyfmmlib") + + dims = 3 + + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + nsources = 5000 + ntargets = 5000 + dtype = np.float64 + + sources = p_normal(queue, nsources, dims, dtype, seed=15) + targets = ( + p_normal(queue, ntargets, dims, dtype, seed=18) + + np.array([2, 0, 0])[:dims]) + + from boxtree import TreeBuilder + tb = TreeBuilder(ctx) + + tree, _ = tb(queue, sources, targets=targets, + max_particles_in_box=30, debug=True) + + from boxtree.traversal import FMMTraversalBuilder + tbuild = FMMTraversalBuilder(ctx) + trav, _ = tbuild(queue, tree, debug=True) + + trav = trav.get(queue=queue) + + from pyopencl.clrandom import PhiloxGenerator + rng = PhiloxGenerator(queue.context, seed=20) + + weights = rng.uniform(queue, nsources, dtype=np.float64).get() + + base_nterms = 10 + + def fmm_level_to_nterms(tree, lev): + result = base_nterms + + if lev < 3 and helmholtz_k: + # exercise order-varies-by-level capability + result += 5 + + return result + + from boxtree.pyfmmlib_integration import ( + FMMLibExpansionWrangler, FMMLibRotationData) + + baseline_wrangler = FMMLibExpansionWrangler( + trav.tree, helmholtz_k, + fmm_level_to_nterms=fmm_level_to_nterms) + + optimized_wrangler = FMMLibExpansionWrangler( + trav.tree, helmholtz_k, + fmm_level_to_nterms=fmm_level_to_nterms, + rotation_data=FMMLibRotationData(queue, trav)) + + from boxtree.fmm import drive_fmm + + baseline_timing_data = {} + baseline_pot = drive_fmm( + trav, baseline_wrangler, weights, timing_data=baseline_timing_data) + + optimized_timing_data = {} + optimized_pot = drive_fmm( + trav, optimized_wrangler, weights, timing_data=optimized_timing_data) + + baseline_time = baseline_timing_data["multipole_to_local"]["process_elapsed"] + if baseline_time is not None: + print("Baseline M2L time : %#.4g s" % baseline_time) + + opt_time = optimized_timing_data["multipole_to_local"]["process_elapsed"] + if opt_time is not None: + print("Optimized M2L time: %#.4g s" % opt_time) + + assert np.allclose(baseline_pot, optimized_pot, atol=1e-13, rtol=1e-13) + +# }}} + + # You can test individual routines by typing # $ python test_fmm.py 'test_routine(cl.create_some_context)' diff --git a/test/test_traversal.py b/test/test_traversal.py index 32645a2a708d0f4403a714fdccf69bd654cc7dbe..52e1a5007f907fc9b58fbd95e36ec7ab88416e4d 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -346,6 +346,77 @@ def plot_traversal(ctx_factory, do_plot=False, well_sep_is_n_away=1): # }}} +# {{{ test_from_sep_siblings_rotation_classes + +@pytest.mark.parametrize("well_sep_is_n_away", (1, 2)) +def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + dims = 3 + nparticles = 10**4 + dtype = np.float64 + + # {{{ build tree + + from pyopencl.clrandom import PhiloxGenerator + rng = PhiloxGenerator(queue.context, seed=15) + + from pytools.obj_array import make_obj_array + particles = make_obj_array([ + rng.normal(queue, nparticles, dtype=dtype) + for i in range(dims)]) + + from boxtree import TreeBuilder + tb = TreeBuilder(ctx) + + queue.finish() + tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) + + # }}} + + # {{{ build traversal + + from boxtree.traversal import FMMTraversalBuilder + from boxtree.rotation_classes import RotationClassesBuilder + + tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away) + trav, _ = tg(queue, tree) + + rb = RotationClassesBuilder(ctx) + result, _ = rb(queue, trav, tree) + + rot_classes = result.from_sep_siblings_rotation_classes.get(queue) + rot_angles = result.from_sep_siblings_rotation_class_to_angle.get(queue) + + tree = tree.get(queue=queue) + trav = trav.get(queue=queue) + + centers = tree.box_centers.T + + # }}} + + # For each entry of from_sep_siblings, compute the source-target translation + # direction as a vector, and check that the from_sep_siblings rotation class + # in the traversal corresponds to the angle with the z-axis of the + # translation direction. + + for itgt_box, tgt_ibox in enumerate(trav.target_or_target_parent_boxes): + start, end = trav.from_sep_siblings_starts[itgt_box:itgt_box+2] + seps = trav.from_sep_siblings_lists[start:end] + level_rot_classes = rot_classes[start:end] + + translation_vecs = centers[tgt_ibox] - centers[seps] + theta = np.arctan2( + la.norm(translation_vecs[:, :dims - 1], axis=1), + translation_vecs[:, dims - 1]) + level_rot_angles = rot_angles[level_rot_classes] + + assert np.allclose(theta, level_rot_angles, atol=1e-13, rtol=1e-13) + +# }}} + + # You can test individual routines by typing # $ python test_traversal.py 'test_routine(cl.create_some_context)'