Skip to content
[flake8]
ignore = E126,E127,E128,E123,E226,E241,E242,E265,E402,W503
max-line-length=85
#!/usr/bin/env python
# -*- coding: latin1 -*-
def main():
from setuptools import setup
version_dict = {}
init_filename = "boxtree/version.py"
exec(compile(open(init_filename, "r").read(), init_filename, "exec"),
version_dict)
setup(name="boxtree",
version=version_dict["VERSION_TEXT"],
description="Quadtree/octree building in Python and OpenCL",
long_description=open("README.rst", "rt").read(),
author="Andreas Kloeckner",
author_email="inform@tiker.net",
license="MIT",
url="http://wiki.tiker.net/BoxTree",
classifiers=[
'Development Status :: 3 - Alpha',
'Intended Audience :: Developers',
'Intended Audience :: Other Audience',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: MIT License',
'Natural Language :: English',
'Programming Language :: Python',
# We use conditional expressions, so 2.5 is the bare minimum.
'Programming Language :: Python :: 2.5',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
# 3.x has not yet been tested.
'Topic :: Scientific/Engineering',
'Topic :: Scientific/Engineering :: Information Analysis',
'Topic :: Scientific/Engineering :: Mathematics',
'Topic :: Scientific/Engineering :: Visualization',
'Topic :: Software Development :: Libraries',
'Topic :: Utilities',
],
packages=["boxtree"],
install_requires=[
"pytools>=2013.1",
"pyopencl>=2013.1",
"Mako>=0.7.3",
"pytest>=2.3",
"cgen>=2013.1.2",
"six",
])
if __name__ == '__main__':
main()
__copyright__ = """
Copyright (C) 2013 Andreas Kloeckner
Copyright (C) 2018 Matt Wala
Copyright (C) 2018 Hao Gao
"""
__license__ = """
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import logging
import sys
import time
import numpy as np
import pytest
from arraycontext import pytest_generate_tests_for_array_contexts
from boxtree.array_context import (
PytestPyOpenCLArrayContextFactory,
_acf, # noqa: F401
)
from boxtree.cost import (
FMMCostModel,
_PythonFMMCostModel,
make_pde_aware_translation_cost_model,
)
logger = logging.getLogger(__name__)
pytest_generate_tests = pytest_generate_tests_for_array_contexts([
PytestPyOpenCLArrayContextFactory,
])
# {{{ test_compare_cl_and_py_cost_model
@pytest.mark.opencl
@pytest.mark.parametrize(
("nsources", "ntargets", "dims", "dtype"), [
(50000, 50000, 3, np.float64)
]
)
def test_compare_cl_and_py_cost_model(actx_factory, nsources, ntargets, dims, dtype):
actx = actx_factory()
# {{{ Generate sources, targets and target_radii
from boxtree.tools import make_normal_particle_array as p_normal
sources = p_normal(actx.queue, nsources, dims, dtype, seed=15)
targets = p_normal(actx.queue, ntargets, dims, dtype, seed=18)
rng = np.random.default_rng(22)
target_radii = rng.uniform(0.0, 0.05, (ntargets,)).astype(dtype)
# }}}
# {{{ Generate tree and traversal
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
tree, _ = tb(
actx.queue, sources, targets=targets, target_radii=target_radii,
stick_out_factor=0.15, max_particles_in_box=30, debug=True
)
from boxtree.traversal import FMMTraversalBuilder
tg = FMMTraversalBuilder(actx.context, well_sep_is_n_away=2)
trav_dev, _ = tg(actx.queue, tree, debug=True)
trav = trav_dev.get(queue=actx.queue)
# }}}
# {{{ Construct cost models
cl_cost_model = FMMCostModel(None)
python_cost_model = _PythonFMMCostModel(None)
constant_one_params = cl_cost_model.get_unit_calibration_params().copy()
for ilevel in range(trav.tree.nlevels):
constant_one_params[f"p_fmm_lev{ilevel}"] = 10
xlat_cost = make_pde_aware_translation_cost_model(dims, trav.tree.nlevels)
# }}}
# {{{ Test process_form_multipoles
from pymbolic import evaluate
nlevels = trav.tree.nlevels
p2m_cost = np.zeros(nlevels, dtype=np.float64)
for ilevel in range(nlevels):
p2m_cost[ilevel] = evaluate(
xlat_cost.p2m(ilevel),
context=constant_one_params
)
p2m_cost_dev = actx.from_numpy(p2m_cost)
actx.queue.finish()
start_time = time.time()
cl_form_multipoles = cl_cost_model.process_form_multipoles(
actx.queue, trav_dev, p2m_cost_dev
)
actx.queue.finish()
logger.info("OpenCL time for process_form_multipoles: %gs",
time.time() - start_time)
start_time = time.time()
python_form_multipoles = python_cost_model.process_form_multipoles(
actx.queue, trav, p2m_cost
)
logger.info("Python time for process_form_multipoles: %gs",
time.time() - start_time)
assert np.array_equal(actx.to_numpy(cl_form_multipoles), python_form_multipoles)
# }}}
# {{{ Test process_coarsen_multipoles
m2m_cost = np.zeros(nlevels - 1, dtype=np.float64)
for target_level in range(nlevels - 1):
m2m_cost[target_level] = evaluate(
xlat_cost.m2m(target_level + 1, target_level),
context=constant_one_params
)
m2m_cost_dev = actx.from_numpy(m2m_cost)
actx.queue.finish()
start_time = time.time()
cl_coarsen_multipoles = cl_cost_model.process_coarsen_multipoles(
actx.queue, trav_dev, m2m_cost_dev
)
actx.queue.finish()
logger.info("OpenCL time for coarsen_multipoles: %gs",
time.time() - start_time)
start_time = time.time()
python_coarsen_multipoles = python_cost_model.process_coarsen_multipoles(
actx.queue, trav, m2m_cost
)
logger.info("Python time for coarsen_multipoles: %gs",
time.time() - start_time)
assert cl_coarsen_multipoles == python_coarsen_multipoles
# }}}
# {{{ Test process_direct
actx.queue.finish()
start_time = time.time()
cl_ndirect_sources_per_target_box = \
cl_cost_model.get_ndirect_sources_per_target_box(actx.queue, trav_dev)
cl_direct = cl_cost_model.process_direct(
actx.queue, trav_dev, cl_ndirect_sources_per_target_box, 5.0
)
actx.queue.finish()
logger.info("OpenCL time for process_direct: %gs",
time.time() - start_time)
start_time = time.time()
python_ndirect_sources_per_target_box = \
python_cost_model.get_ndirect_sources_per_target_box(actx.queue, trav)
python_direct = python_cost_model.process_direct(
actx.queue, trav, python_ndirect_sources_per_target_box, 5.0
)
logger.info("Python time for process_direct: %gs",
time.time() - start_time)
assert np.array_equal(actx.to_numpy(cl_direct), python_direct)
# }}}
# {{{ Test aggregate_over_boxes
start_time = time.time()
cl_direct_aggregate = cl_cost_model.aggregate_over_boxes(cl_direct)
actx.queue.finish()
logger.info("OpenCL time for aggregate_over_boxes: %gs",
time.time() - start_time)
start_time = time.time()
python_direct_aggregate = python_cost_model.aggregate_over_boxes(python_direct)
logger.info("Python time for aggregate_over_boxes: %gs",
time.time() - start_time)
assert cl_direct_aggregate == python_direct_aggregate
# }}}
# {{{ Test process_list2
nlevels = trav.tree.nlevels
m2l_cost = np.zeros(nlevels, dtype=np.float64)
for ilevel in range(nlevels):
m2l_cost[ilevel] = evaluate(
xlat_cost.m2l(ilevel, ilevel),
context=constant_one_params
)
m2l_cost_dev = actx.from_numpy(m2l_cost)
actx.queue.finish()
start_time = time.time()
cl_m2l_cost = cl_cost_model.process_list2(actx.queue, trav_dev, m2l_cost_dev)
actx.queue.finish()
logger.info("OpenCL time for process_list2: %gs",
time.time() - start_time)
start_time = time.time()
python_m2l_cost = python_cost_model.process_list2(actx.queue, trav, m2l_cost)
logger.info("Python time for process_list2: %gs",
time.time() - start_time)
assert np.array_equal(actx.to_numpy(cl_m2l_cost), python_m2l_cost)
# }}}
# {{{ Test process_list 3
m2p_cost = np.zeros(nlevels, dtype=np.float64)
for ilevel in range(nlevels):
m2p_cost[ilevel] = evaluate(
xlat_cost.m2p(ilevel),
context=constant_one_params
)
m2p_cost_dev = actx.from_numpy(m2p_cost)
actx.queue.finish()
start_time = time.time()
cl_m2p_cost = cl_cost_model.process_list3(actx.queue, trav_dev, m2p_cost_dev)
actx.queue.finish()
logger.info("OpenCL time for process_list3: %gs",
time.time() - start_time)
start_time = time.time()
python_m2p_cost = python_cost_model.process_list3(actx.queue, trav, m2p_cost)
logger.info("Python time for process_list3: %gs",
time.time() - start_time)
assert np.array_equal(actx.to_numpy(cl_m2p_cost), python_m2p_cost)
# }}}
# {{{ Test process_list4
p2l_cost = np.zeros(nlevels, dtype=np.float64)
for ilevel in range(nlevels):
p2l_cost[ilevel] = evaluate(
xlat_cost.p2l(ilevel),
context=constant_one_params
)
p2l_cost_dev = actx.from_numpy(p2l_cost)
actx.queue.finish()
start_time = time.time()
cl_p2l_cost = cl_cost_model.process_list4(actx.queue, trav_dev, p2l_cost_dev)
actx.queue.finish()
logger.info("OpenCL time for process_list4: %gs",
time.time() - start_time)
start_time = time.time()
python_p2l_cost = python_cost_model.process_list4(actx.queue, trav, p2l_cost)
logger.info("Python time for process_list4: %gs",
time.time() - start_time)
assert np.array_equal(actx.to_numpy(cl_p2l_cost), python_p2l_cost)
# }}}
# {{{ Test process_refine_locals
l2l_cost = np.zeros(nlevels - 1, dtype=np.float64)
for ilevel in range(nlevels - 1):
l2l_cost[ilevel] = evaluate(
xlat_cost.l2l(ilevel, ilevel + 1),
context=constant_one_params
)
l2l_cost_dev = actx.from_numpy(l2l_cost)
actx.queue.finish()
start_time = time.time()
cl_refine_locals_cost = cl_cost_model.process_refine_locals(
actx.queue, trav_dev, l2l_cost_dev
)
actx.queue.finish()
logger.info("OpenCL time for refine_locals: %gs",
time.time() - start_time)
start_time = time.time()
python_refine_locals_cost = python_cost_model.process_refine_locals(
actx.queue, trav, l2l_cost
)
logger.info("Python time for refine_locals: %gs",
time.time() - start_time)
assert cl_refine_locals_cost == python_refine_locals_cost
# }}}
# {{{ Test process_eval_locals
l2p_cost = np.zeros(nlevels, dtype=np.float64)
for ilevel in range(nlevels):
l2p_cost[ilevel] = evaluate(
xlat_cost.l2p(ilevel),
context=constant_one_params
)
l2p_cost_dev = actx.from_numpy(l2p_cost)
actx.queue.finish()
start_time = time.time()
cl_l2p_cost = cl_cost_model.process_eval_locals(
actx.queue, trav_dev, l2p_cost_dev)
actx.queue.finish()
logger.info("OpenCL time for process_eval_locals: %gs",
time.time() - start_time)
start_time = time.time()
python_l2p_cost = python_cost_model.process_eval_locals(
actx.queue, trav, l2p_cost)
logger.info("Python time for process_eval_locals: %gs",
time.time() - start_time)
assert np.array_equal(actx.to_numpy(cl_l2p_cost), python_l2p_cost)
# }}}
# }}}
# {{{ test_estimate_calibration_params
@pytest.mark.opencl
def test_estimate_calibration_params(actx_factory):
from boxtree.pyfmmlib_integration import (
FMMLibExpansionWrangler,
FMMLibTreeIndependentDataForWrangler,
Kernel,
)
rng = np.random.default_rng(seed=42)
nsources_list = [1000, 2000, 3000, 4000]
ntargets_list = [1000, 2000, 3000, 4000]
dims = 3
dtype = np.float64
actx = actx_factory()
traversals = []
traversals_dev = []
level_to_orders = []
timing_results = []
def fmm_level_to_order(tree, ilevel):
return 10
for nsources, ntargets in zip(nsources_list, ntargets_list, strict=True):
# {{{ Generate sources, targets and target_radii
from boxtree.tools import make_normal_particle_array as p_normal
sources = p_normal(actx.queue, nsources, dims, dtype, seed=15)
targets = p_normal(actx.queue, ntargets, dims, dtype, seed=18)
rng = np.random.default_rng(22)
target_radii = rng.uniform(0.0, 0.05, (ntargets,)).astype(dtype)
# }}}
# {{{ Generate tree and traversal
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
tree, _ = tb(
actx.queue, sources, targets=targets, target_radii=target_radii,
stick_out_factor=0.15, max_particles_in_box=30, debug=True
)
from boxtree.traversal import FMMTraversalBuilder
tg = FMMTraversalBuilder(actx.context, well_sep_is_n_away=2)
trav_dev, _ = tg(actx.queue, tree, debug=True)
trav = trav_dev.get(queue=actx.queue)
traversals.append(trav)
traversals_dev.append(trav_dev)
# }}}
tree_indep = FMMLibTreeIndependentDataForWrangler(
trav.tree.dimensions, Kernel.LAPLACE)
wrangler = FMMLibExpansionWrangler(tree_indep, trav,
fmm_level_to_order=fmm_level_to_order)
level_to_orders.append(wrangler.level_orders)
timing_data = {}
from boxtree.fmm import drive_fmm
src_weights = rng.random(size=tree.nsources, dtype=tree.coord_dtype)
drive_fmm(wrangler, (src_weights,), timing_data=timing_data)
timing_results.append(timing_data)
time_field_name = "process_elapsed"
def test_params_sanity(test_params):
param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l",
"c_l2p"]
for name in param_names:
assert isinstance(test_params[name], np.float64)
def test_params_equal(test_params1, test_params2):
param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l",
"c_l2p"]
for name in param_names:
assert test_params1[name] == test_params2[name]
python_cost_model = _PythonFMMCostModel(make_pde_aware_translation_cost_model)
python_model_results = []
for icase in range(len(traversals)-1):
traversal = traversals[icase]
level_to_order = level_to_orders[icase]
python_model_results.append(python_cost_model.cost_per_stage(
actx.queue, traversal, level_to_order,
_PythonFMMCostModel.get_unit_calibration_params(),
))
python_params = python_cost_model.estimate_calibration_params(
python_model_results, timing_results[:-1], time_field_name=time_field_name
)
test_params_sanity(python_params)
cl_cost_model = FMMCostModel(make_pde_aware_translation_cost_model)
cl_model_results = []
for icase in range(len(traversals_dev)-1):
traversal = traversals_dev[icase]
level_to_order = level_to_orders[icase]
cl_model_results.append(cl_cost_model.cost_per_stage(
actx.queue, traversal, level_to_order,
FMMCostModel.get_unit_calibration_params(),
))
cl_params = cl_cost_model.estimate_calibration_params(
cl_model_results, timing_results[:-1], time_field_name=time_field_name
)
test_params_sanity(cl_params)
test_params_equal(cl_params, python_params)
# }}}
# {{{ test_cost_model_op_counts_agree_with_constantone_wrangler
class OpCountingTranslationCostModel:
"""A translation cost model which assigns at cost of 1 to each operation."""
def __init__(self, dim, nlevels):
pass
@staticmethod
def direct():
return 1
@staticmethod
def p2l(level):
return 1
l2p = p2l
p2m = p2l
m2p = p2l
@staticmethod
def m2m(src_level, tgt_level):
return 1
l2l = m2m
m2l = m2m
@pytest.mark.opencl
@pytest.mark.parametrize(
("nsources", "ntargets", "dims", "dtype"), [
(5000, 5000, 3, np.float64)
]
)
def test_cost_model_op_counts_agree_with_constantone_wrangler(
actx_factory, nsources, ntargets, dims, dtype):
actx = actx_factory()
from boxtree.tools import make_normal_particle_array as p_normal
sources = p_normal(actx.queue, nsources, dims, dtype, seed=16)
targets = p_normal(actx.queue, ntargets, dims, dtype, seed=19)
rng = np.random.default_rng(20)
target_radii = rng.uniform(0, 0.04, (ntargets,)).astype(dtype)
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
tree, _ = tb(
actx.queue, sources, targets=targets, target_radii=target_radii,
stick_out_factor=0.15, max_particles_in_box=30, debug=True
)
from boxtree.traversal import FMMTraversalBuilder
tg = FMMTraversalBuilder(actx.context, well_sep_is_n_away=2)
trav_dev, _ = tg(actx.queue, tree, debug=True)
trav = trav_dev.get(queue=actx.queue)
from boxtree.constant_one import (
ConstantOneExpansionWrangler,
ConstantOneTreeIndependentDataForWrangler,
)
tree_indep = ConstantOneTreeIndependentDataForWrangler()
wrangler = ConstantOneExpansionWrangler(tree_indep, trav)
timing_data = {}
from boxtree.fmm import drive_fmm
src_weights = rng.random(size=tree.nsources, dtype=tree.coord_dtype)
drive_fmm(wrangler, (src_weights,), timing_data=timing_data)
cost_model = FMMCostModel(
translation_cost_model_factory=OpCountingTranslationCostModel
)
level_to_order = np.array([1 for _ in range(tree.nlevels)])
modeled_time = cost_model.cost_per_stage(
actx.queue, trav_dev, level_to_order,
FMMCostModel.get_unit_calibration_params(),
)
mismatches = []
for stage in timing_data:
if timing_data[stage]["ops_elapsed"] != modeled_time[stage]:
mismatches.append(
(stage, timing_data[stage]["ops_elapsed"], modeled_time[stage]))
assert not mismatches, "\n".join(str(s) for s in mismatches)
# {{{ Test per-box cost
total_cost = 0.0
for stage in timing_data:
total_cost += timing_data[stage]["ops_elapsed"]
per_box_cost = cost_model.cost_per_box(
actx.queue, trav_dev, level_to_order,
FMMCostModel.get_unit_calibration_params(),
)
total_aggregate_cost = cost_model.aggregate_over_boxes(per_box_cost)
assert total_cost == (
total_aggregate_cost
+ modeled_time["coarsen_multipoles"]
+ modeled_time["refine_locals"]
)
# }}}
# }}}
# You can test individual routines by typing
# $ python test_cost_model.py 'test_routine(_acf)'
if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
from pytest import main
main([__file__])
# vim: foldmethod=marker
__copyright__ = "Copyright (C) 2021 Hao Gao"
__license__ = """
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import logging
import os
import sys
import numpy as np
import numpy.linalg as la
import pytest
from arraycontext import pytest_generate_tests_for_array_contexts
from boxtree.array_context import (
PytestPyOpenCLArrayContextFactory,
_acf,
)
from boxtree.constant_one import (
ConstantOneExpansionWrangler as ConstantOneExpansionWranglerBase,
ConstantOneTreeIndependentDataForWrangler,
)
from boxtree.pyfmmlib_integration import (
FMMLibExpansionWrangler,
FMMLibTreeIndependentDataForWrangler,
Kernel,
)
logger = logging.getLogger(__name__)
pytest_generate_tests = pytest_generate_tests_for_array_contexts([
PytestPyOpenCLArrayContextFactory,
])
# NOTE: Do not import mpi4py.MPI object at the module level, because OpenMPI
# does not support recursive invocations.
def _cachedir():
import tempfile
return tempfile.mkdtemp(prefix="boxtree-pytest-")
# {{{ test_against_shared
def _test_against_shared(
tmp_cache_basedir,
dims, nsources, ntargets, dtype, communicate_mpoles_via_allreduce=False):
from mpi4py import MPI
# Get the current rank
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
rank_cache_dir = os.path.join(tmp_cache_basedir, f"rank_{rank:03d}")
# Initialize arguments for worker processes
global_tree_host = None
sources_weights = np.empty(0, dtype=dtype)
helmholtz_k = 0
def fmm_level_to_order(tree, level):
return max(level, 3)
from unittest.mock import patch
with patch.dict(os.environ, {"XDG_CACHE_HOME": rank_cache_dir}):
actx = _acf()
from boxtree.traversal import FMMTraversalBuilder
tg = FMMTraversalBuilder(actx.context, well_sep_is_n_away=2)
tree_indep = FMMLibTreeIndependentDataForWrangler(
dims, Kernel.HELMHOLTZ if helmholtz_k else Kernel.LAPLACE)
# Generate particles and run shared-memory parallelism on rank 0
if rank == 0:
# Generate random particles and source weights
from boxtree.tools import make_normal_particle_array as p_normal
sources = p_normal(actx.queue, nsources, dims, dtype, seed=15)
targets = p_normal(actx.queue, ntargets, dims, dtype, seed=18)
rng = np.random.default_rng(20)
sources_weights = rng.uniform(0.0, 1.0, (nsources,))
target_radii = rng.uniform(0.0, 0.05, (ntargets,))
# Build the tree and interaction lists
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
global_tree_dev, _ = tb(
actx.queue, sources, targets=targets, target_radii=target_radii,
stick_out_factor=0.25, max_particles_in_box=30, debug=True)
d_trav, _ = tg(actx.queue, global_tree_dev, debug=True)
global_traversal_host = d_trav.get(queue=actx.queue)
global_tree_host = global_traversal_host.tree
# Get pyfmmlib expansion wrangler
wrangler = FMMLibExpansionWrangler(
tree_indep, global_traversal_host,
fmm_level_to_order=fmm_level_to_order)
# Compute FMM with one MPI rank
from boxtree.fmm import drive_fmm
pot_fmm = drive_fmm(wrangler, [sources_weights]) * 2 * np.pi
# Compute FMM using the distributed implementation
def wrangler_factory(local_traversal, global_traversal):
from boxtree.distributed.calculation import (
DistributedFMMLibExpansionWrangler,
)
return DistributedFMMLibExpansionWrangler(
actx.context, comm, tree_indep, local_traversal, global_traversal,
fmm_level_to_order=fmm_level_to_order,
communicate_mpoles_via_allreduce=communicate_mpoles_via_allreduce)
from boxtree.distributed import DistributedFMMRunner
distributed_fmm_info = DistributedFMMRunner(
actx.queue, global_tree_host, tg, wrangler_factory, comm=comm)
timing_data = {}
pot_dfmm = distributed_fmm_info.drive_dfmm(
[sources_weights], timing_data=timing_data)
assert timing_data
# Uncomment the following section to print the time taken of each stage
"""
if rank == 1:
from pytools import Table
table = Table()
table.add_row(["stage", "time (s)"])
for stage in timing_data:
table.add_row([stage, "%.2f" % timing_data[stage]["wall_elapsed"]])
print(table)
"""
if rank == 0:
error = (la.norm(pot_fmm - pot_dfmm * 2 * np.pi, ord=np.inf)
/ la.norm(pot_fmm, ord=np.inf))
print(error)
assert error < 1e-14
@pytest.mark.mpi
@pytest.mark.parametrize(
"num_processes, dims, nsources, ntargets, communicate_mpoles_via_allreduce", [
(4, 3, 10000, 10000, True),
(4, 3, 10000, 10000, False)
]
)
def test_against_shared(
tmp_path, num_processes, dims, nsources, ntargets,
communicate_mpoles_via_allreduce):
pytest.importorskip("mpi4py")
from boxtree.tools import run_mpi
run_mpi(__file__, num_processes, {
"OMP_NUM_THREADS": 1,
"_BOXTREE_TEST_NAME": "shared",
"_BOXTREE_TEST_DIMS": dims,
"_BOXTREE_TEST_NSOURCES": nsources,
"_BOXTREE_TEST_NTARGETS": ntargets,
"_BOXTREE_TEST_TMP_CACHE_BASEDIR": tmp_path / "boxtree_distributed_test",
"_BOXTREE_TEST_MPOLES_ALLREDUCE": communicate_mpoles_via_allreduce
})
# }}}
# {{{ test_constantone
def _test_constantone(tmp_cache_basedir, dims, nsources, ntargets, dtype):
from boxtree.distributed.calculation import DistributedExpansionWrangler
class ConstantOneExpansionWrangler(
ConstantOneExpansionWranglerBase, DistributedExpansionWrangler):
def __init__(
self, queue, comm, tree_indep, local_traversal, global_traversal):
DistributedExpansionWrangler.__init__(
self, queue, comm, global_traversal, False,
communicate_mpoles_via_allreduce=True)
ConstantOneExpansionWranglerBase.__init__(
self, tree_indep, local_traversal)
self.level_orders = np.ones(local_traversal.tree.nlevels, dtype=np.int32)
def reorder_sources(self, source_array):
if self.comm.Get_rank() == 0:
return source_array[self.global_traversal.tree.user_source_ids]
else:
return None
def reorder_potentials(self, potentials):
if self.comm.Get_rank() == 0:
return potentials[self.global_traversal.tree.sorted_target_ids]
else:
return None
from mpi4py import MPI
# Get the current rank
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
rank_cache_dir = os.path.join(tmp_cache_basedir, f"rank_{rank:03d}")
# Initialization
tree = None
sources_weights = np.empty(0, dtype=dtype)
from unittest.mock import patch
with patch.dict(os.environ, {"XDG_CACHE_HOME": rank_cache_dir}):
actx = _acf()
from boxtree.traversal import FMMTraversalBuilder
tg = FMMTraversalBuilder(actx.context)
if rank == 0:
# Generate random particles
from boxtree.tools import make_normal_particle_array as p_normal
sources = p_normal(actx.queue, nsources, dims, dtype, seed=15)
targets = (p_normal(actx.queue, ntargets, dims, dtype, seed=18)
+ np.array([2, 0, 0])[:dims])
# Constant one source weights
sources_weights = np.ones((nsources,), dtype=dtype)
# Build the global tree
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
tree, _ = tb(
actx.queue, sources, targets=targets, max_particles_in_box=30,
debug=True)
tree = tree.get(actx.queue)
tree_indep = ConstantOneTreeIndependentDataForWrangler()
def wrangler_factory(local_traversal, global_traversal):
return ConstantOneExpansionWrangler(
actx.queue, comm, tree_indep, local_traversal, global_traversal)
from boxtree.distributed import DistributedFMMRunner
distributed_fmm_info = DistributedFMMRunner(
actx.queue, tree, tg, wrangler_factory, comm=MPI.COMM_WORLD)
pot_dfmm = distributed_fmm_info.drive_dfmm([sources_weights])
if rank == 0:
assert (np.all(pot_dfmm == nsources))
@pytest.mark.mpi
@pytest.mark.parametrize("num_processes, dims, nsources, ntargets", [
(4, 3, 10000, 10000)
])
def test_constantone(tmp_path, num_processes, dims, nsources, ntargets):
pytest.importorskip("mpi4py")
from boxtree.tools import run_mpi
run_mpi(__file__, num_processes, {
"OMP_NUM_THREADS": 1,
"_BOXTREE_TEST_NAME": "constantone",
"_BOXTREE_TEST_DIMS": dims,
"_BOXTREE_TEST_NSOURCES": nsources,
"_BOXTREE_TEST_NTARGETS": ntargets,
"_BOXTREE_TEST_TMP_CACHE_BASEDIR": tmp_path / "boxtree_distributed_test",
"_BOXTREE_TEST_MPOLES_ALLREDUCE": False
})
# }}}
if __name__ == "__main__":
dtype = np.float64
tmp_cache_basedir = os.environ.get("_BOXTREE_TEST_TMP_CACHE_BASEDIR", _cachedir())
if "_BOXTREE_TEST_DIMS" in os.environ:
dims = int(os.environ["_BOXTREE_TEST_DIMS"])
nsources = int(os.environ["_BOXTREE_TEST_NSOURCES"])
ntargets = int(os.environ["_BOXTREE_TEST_NTARGETS"])
communicate_mpoles_via_allreduce = (
os.environ["_BOXTREE_TEST_MPOLES_ALLREDUCE"] == "True"
)
if os.environ["_BOXTREE_TEST_NAME"] == "shared":
_test_against_shared(
tmp_cache_basedir,
dims, nsources, ntargets, dtype,
communicate_mpoles_via_allreduce=communicate_mpoles_via_allreduce)
elif os.environ["_BOXTREE_TEST_NAME"] == "constantone":
_test_constantone(tmp_cache_basedir, dims, nsources, ntargets, dtype)
else:
if len(sys.argv) > 1:
# You can test individual routines by typing
# $ python test_distributed.py 'test_constantone(
# tmp_cache_basedir, 4, 3, 10000, 10000)'
exec(sys.argv[1])
elif len(sys.argv) == 1:
# Run against_shared test case with default parameter
dims = 3
nsources = 10000
ntargets = 10000
_test_against_shared(tmp_cache_basedir, dims, nsources, ntargets, dtype)
from __future__ import division, absolute_import, print_function
__copyright__ = "Copyright (C) 2013 Andreas Kloeckner"
__license__ = """
......@@ -22,169 +20,87 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
from six.moves import range
import logging
import numpy as np
import numpy.linalg as la
import pyopencl as cl
import pytest
from pyopencl.tools import ( # noqa
pytest_generate_tests_for_pyopencl as pytest_generate_tests)
from arraycontext import pytest_generate_tests_for_array_contexts
from boxtree.array_context import (
PytestPyOpenCLArrayContextFactory,
_acf, # noqa: F401
)
from boxtree.constant_one import (
ConstantOneExpansionWrangler,
ConstantOneTreeIndependentDataForWrangler,
)
from boxtree.tools import ( # noqa: F401
make_normal_particle_array as p_normal,
make_surface_particle_array as p_surface,
make_uniform_particle_array as p_uniform,
particle_array_to_host,
)
from boxtree.tools import (
make_normal_particle_array as p_normal,
make_surface_particle_array as p_surface,
make_uniform_particle_array as p_uniform,
particle_array_to_host)
import logging
logger = logging.getLogger(__name__)
pytest_generate_tests = pytest_generate_tests_for_array_contexts([
PytestPyOpenCLArrayContextFactory,
])
# {{{ fmm interaction completeness test
class ConstantOneExpansionWrangler(object):
"""This implements the 'analytical routines' for a Green's function that is
constant 1 everywhere. For 'charges' of 'ones', this should get every particle
a copy of the particle count.
"""
def __init__(self, tree):
self.tree = tree
def multipole_expansion_zeros(self):
return np.zeros(self.tree.nboxes, dtype=np.float64)
local_expansion_zeros = multipole_expansion_zeros
def potential_zeros(self):
return np.zeros(self.tree.ntargets, dtype=np.float64)
def _get_source_slice(self, ibox):
pstart = self.tree.box_source_starts[ibox]
return slice(
pstart, pstart + self.tree.box_source_counts_nonchild[ibox])
def _get_target_slice(self, ibox):
pstart = self.tree.box_target_starts[ibox]
return slice(
pstart, pstart + self.tree.box_target_counts_nonchild[ibox])
def reorder_sources(self, source_array):
return source_array[self.tree.user_source_ids]
def reorder_potentials(self, potentials):
return potentials[self.tree.sorted_target_ids]
def form_multipoles(self, source_boxes, src_weights):
mpoles = self.multipole_expansion_zeros()
for ibox in source_boxes:
pslice = self._get_source_slice(ibox)
mpoles[ibox] += np.sum(src_weights[pslice])
return mpoles
def coarsen_multipoles(self, parent_boxes, mpoles):
tree = self.tree
for ibox in parent_boxes:
for child in tree.box_child_ids[:, ibox]:
if child:
mpoles[ibox] += mpoles[child]
def eval_direct(self, target_boxes, neighbor_sources_starts,
neighbor_sources_lists, src_weights):
pot = self.potential_zeros()
for itgt_box, tgt_ibox in enumerate(target_boxes):
tgt_pslice = self._get_target_slice(tgt_ibox)
src_sum = 0
start, end = neighbor_sources_starts[itgt_box:itgt_box+2]
#print "DIR: %s <- %s" % (tgt_ibox, neighbor_sources_lists[start:end])
for src_ibox in neighbor_sources_lists[start:end]:
src_pslice = self._get_source_slice(src_ibox)
src_sum += np.sum(src_weights[src_pslice])
pot[tgt_pslice] = src_sum
# {{{ ref fmmlib pot computation
return pot
def get_fmmlib_ref_pot(wrangler, weights, sources_host, targets_host,
helmholtz_k, dipole_vec=None):
dims = sources_host.shape[0]
eqn_letter = "h" if helmholtz_k else "l"
use_dipoles = dipole_vec is not None
def multipole_to_local(self, target_or_target_parent_boxes,
starts, lists, mpole_exps):
local_exps = self.local_expansion_zeros()
import pyfmmlib
for itgt_box, tgt_ibox in enumerate(target_or_target_parent_boxes):
start, end = starts[itgt_box:itgt_box+2]
name = "fld" if dims == 3 else "grad"
dp = "_dp" if use_dipoles else ""
fmmlib_routine = getattr(
pyfmmlib,
f"{eqn_letter}pot{name}{dims}dall{dp}_vec")
contrib = 0
#print tgt_ibox, "<-", lists[start:end]
for src_ibox in lists[start:end]:
contrib += mpole_exps[src_ibox]
local_exps[tgt_ibox] += contrib
return local_exps
def eval_multipoles(self, target_boxes, sep_smaller_nonsiblings_starts,
sep_smaller_nonsiblings_lists, mpole_exps):
pot = self.potential_zeros()
for itgt_box, tgt_ibox in enumerate(target_boxes):
tgt_pslice = self._get_target_slice(tgt_ibox)
contrib = 0
start, end = sep_smaller_nonsiblings_starts[itgt_box:itgt_box+2]
for src_ibox in sep_smaller_nonsiblings_lists[start:end]:
contrib += mpole_exps[src_ibox]
pot[tgt_pslice] += contrib
return pot
def form_locals(self, target_or_target_parent_boxes, starts, lists, src_weights):
local_exps = self.local_expansion_zeros()
for itgt_box, tgt_ibox in enumerate(target_or_target_parent_boxes):
start, end = starts[itgt_box:itgt_box+2]
#print "LIST 4", tgt_ibox, "<-", lists[start:end]
contrib = 0
for src_ibox in lists[start:end]:
src_pslice = self._get_source_slice(src_ibox)
contrib += np.sum(src_weights[src_pslice])
local_exps[tgt_ibox] += contrib
return local_exps
def refine_locals(self, child_boxes, local_exps):
for ibox in child_boxes:
local_exps[ibox] += local_exps[self.tree.box_parent_ids[ibox]]
kwargs = {}
if dims == 3:
kwargs["iffld"] = False
else:
kwargs["ifgrad"] = False
kwargs["ifhess"] = False
return local_exps
if use_dipoles:
if helmholtz_k == 0 and dims == 2:
kwargs["dipstr"] = -weights * (dipole_vec[0] + 1j * dipole_vec[1])
else:
kwargs["dipstr"] = weights
kwargs["dipvec"] = dipole_vec
else:
kwargs["charge"] = weights
if helmholtz_k:
kwargs["zk"] = helmholtz_k
def eval_locals(self, target_boxes, local_exps):
pot = self.potential_zeros()
return wrangler.finalize_potentials(
fmmlib_routine(
sources=sources_host, targets=targets_host,
**kwargs)[0],
template_ary=weights)
for ibox in target_boxes:
tgt_pslice = self._get_target_slice(ibox)
pot[tgt_pslice] += local_exps[ibox]
# }}}
return pot
# {{{ fmm interaction completeness test
class ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder(
ConstantOneExpansionWrangler):
def __init__(self, tree, filtered_targets):
ConstantOneExpansionWrangler.__init__(self, tree)
self.filtered_targets = filtered_targets
def potential_zeros(self):
return np.zeros(self.filtered_targets.nfiltered_targets, dtype=np.float64)
def __init__(self, tree_indep, traversal, filtered_targets):
super().__init__(tree_indep, traversal)
self.filtered_targets = filtered_targets
def _get_target_slice(self, ibox):
pstart = self.filtered_targets.box_target_starts[ibox]
......@@ -192,6 +108,9 @@ class ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder(
pstart, pstart
+ self.filtered_targets.box_target_counts_nonchild[ibox])
def output_zeros(self):
return np.zeros(self.filtered_targets.nfiltered_targets, dtype=np.float64)
def reorder_potentials(self, potentials):
tree_order_all_potentials = np.zeros(self.tree.ntargets, potentials.dtype)
tree_order_all_potentials[
......@@ -203,8 +122,8 @@ class ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder(
class ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder(
ConstantOneExpansionWrangler):
def __init__(self, tree, filtered_targets):
ConstantOneExpansionWrangler.__init__(self, tree)
def __init__(self, tree_indep, traversal, filtered_targets):
super().__init__(tree_indep, traversal)
self.filtered_targets = filtered_targets
def _get_target_slice(self, ibox):
......@@ -214,55 +133,52 @@ class ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder(
return self.tree.sorted_target_ids[user_target_ids]
@pytest.mark.parametrize("well_sep_is_n_away", [1, 2])
@pytest.mark.parametrize(("dims", "nsources_req", "ntargets_req",
"who_has_extent", "source_gen", "target_gen", "filter_kind"),
"who_has_extent", "source_gen", "target_gen", "filter_kind",
"extent_norm", "from_sep_smaller_crit"),
[
(2, 10**5, None, "", p_normal, p_normal, None),
(3, 5 * 10**4, 4*10**4, "", p_normal, p_normal, None),
(2, 5 * 10**5, 4*10**4, "s", p_normal, p_normal, None),
(2, 5 * 10**5, 4*10**4, "st", p_normal, p_normal, None),
(2, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None),
(2, 5 * 10**5, 4*10**4, "st", p_surface, p_uniform, None),
(3, 10**5, None, "", p_normal, p_normal, None),
(3, 5 * 10**4, 4*10**4, "", p_normal, p_normal, None),
(3, 5 * 10**5, 4*10**4, "s", p_normal, p_normal, None),
(3, 5 * 10**5, 4*10**4, "st", p_normal, p_normal, None),
(3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None),
(3, 5 * 10**5, 4*10**4, "st", p_surface, p_uniform, None),
(2, 10**5, None, "", p_normal, p_normal, "user"),
(3, 5 * 10**4, 4*10**4, "", p_normal, p_normal, "user"),
(2, 5 * 10**5, 4*10**4, "s", p_normal, p_normal, "user"),
(2, 5 * 10**5, 4*10**4, "st", p_normal, p_normal, "user"),
(2, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, "user"),
(2, 5 * 10**5, 4*10**4, "st", p_surface, p_uniform, "user"),
(2, 10**5, None, "", p_normal, p_normal, "tree"),
(3, 5 * 10**4, 4*10**4, "", p_normal, p_normal, "tree"),
(2, 5 * 10**5, 4*10**4, "s", p_normal, p_normal, "tree"),
(2, 5 * 10**5, 4*10**4, "st", p_normal, p_normal, "tree"),
(2, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, "tree"),
(2, 5 * 10**5, 4*10**4, "st", p_surface, p_uniform, "tree"),
(1, 10**5, None, "", p_normal, p_normal, None, "linf", "static_linf"),
(2, 10**5, None, "", p_normal, p_normal, None, "linf", "static_linf"),
(2, 5 * 10**4, 4*10**4, "", p_normal, p_normal, None, "linf", "static_linf"), # noqa: E501
(2, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None, "linf", "static_linf"), # noqa: E501
(3, 10**5, None, "", p_normal, p_normal, None, "linf", "static_linf"),
(3, 5 * 10**5, 4*10**4, "", p_normal, p_normal, None, "linf", "static_linf"), # noqa: E501
(3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None, "linf", "static_linf"), # noqa: E501
(2, 10**5, None, "", p_normal, p_normal, "user", "linf", "static_linf"),
(3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, "user", "linf", "static_linf"), # noqa: E501
(2, 10**5, None, "", p_normal, p_normal, "tree", "linf", "static_linf"),
(3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, "tree", "linf", "static_linf"), # noqa: E501
(3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None, "linf", "static_linf"), # noqa: E501
(3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None, "linf", "precise_linf"), # noqa: E501
(3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None, "l2", "precise_linf"), # noqa: E501
(3, 5 * 10**5, 4*10**4, "t", p_normal, p_normal, None, "l2", "static_l2"),
])
def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req,
who_has_extent, source_gen, target_gen, filter_kind):
def test_fmm_completeness(actx_factory, dims, nsources_req, ntargets_req,
who_has_extent, source_gen, target_gen, filter_kind, well_sep_is_n_away,
extent_norm, from_sep_smaller_crit):
"""Tests whether the built FMM traversal structures and driver completely
capture all interactions.
"""
actx = actx_factory()
devname = actx.queue.device.name.lower()
if (dims == 1
and actx.queue.device.platform.name == "Portable Computing Language"
and ("nvidia" in devname or "tesla" in devname)):
pytest.xfail("1D FMM fails to build on POCL Nvidia")
sources_have_extent = "s" in who_has_extent
targets_have_extent = "t" in who_has_extent
logging.basicConfig(level=logging.INFO)
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
dtype = np.float64
try:
sources = source_gen(queue, nsources_req, dims, dtype, seed=15)
sources = source_gen(actx.queue, nsources_req, dims, dtype, seed=15)
nsources = len(sources[0])
if ntargets_req is None:
......@@ -270,143 +186,182 @@ def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req,
targets = None
ntargets = ntargets_req
else:
targets = target_gen(queue, ntargets_req, dims, dtype, seed=16)
targets = target_gen(actx.queue, ntargets_req, dims, dtype, seed=16)
ntargets = len(targets[0])
except ImportError:
pytest.skip("loo.py not available, but needed for particle array "
pytest.skip("loopy not available, but needed for particle array "
"generation")
from pyopencl.clrandom import RanluxGenerator
rng = RanluxGenerator(queue, seed=13)
rng = np.random.default_rng(12)
if sources_have_extent:
source_radii = 2**rng.uniform(queue, nsources, dtype=dtype,
a=-10, b=0)
source_radii = actx.from_numpy(
2**rng.uniform(-10, 0, (nsources)).astype(dtype)
)
else:
source_radii = None
if targets_have_extent:
target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype,
a=-10, b=0)
target_radii = actx.from_numpy(
2**rng.uniform(-10, 0, (ntargets,)).astype(dtype)
)
else:
target_radii = None
from boxtree import TreeBuilder
tb = TreeBuilder(ctx)
tb = TreeBuilder(actx.context)
tree, _ = tb(queue, sources, targets=targets,
tree, _ = tb(actx.queue, sources, targets=targets,
max_particles_in_box=30,
source_radii=source_radii, target_radii=target_radii,
debug=True)
debug=True, stick_out_factor=0.25, extent_norm=extent_norm)
if 0:
tree.get().plot()
tree = tree.get(queue=actx.queue)
tree.plot()
import matplotlib.pyplot as pt
pt.show()
from boxtree.traversal import FMMTraversalBuilder
tbuild = FMMTraversalBuilder(ctx)
trav, _ = tbuild(queue, tree, debug=True)
if trav.sep_close_smaller_starts is not None:
trav = trav.merge_close_lists(queue)
tbuild = FMMTraversalBuilder(actx.context,
well_sep_is_n_away=well_sep_is_n_away,
from_sep_smaller_crit=from_sep_smaller_crit)
trav, _ = tbuild(actx.queue, tree, debug=True)
weights = np.random.randn(nsources)
#weights = np.ones(nsources)
if who_has_extent:
pre_merge_trav = trav
trav = trav.merge_close_lists(actx.queue)
# weights = np.random.randn(nsources)
weights = np.ones(nsources)
weights_sum = np.sum(weights)
host_trav = trav.get(queue=queue)
host_trav = trav.get(queue=actx.queue)
host_tree = host_trav.tree
if who_has_extent:
pre_merge_host_trav = pre_merge_trav.get(queue=actx.queue)
from boxtree.tree import ParticleListFilter
plfilt = ParticleListFilter(actx.context)
tree_indep = ConstantOneTreeIndependentDataForWrangler()
if filter_kind:
flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2) \
.astype(np.int8)
flags = actx.from_numpy(
rng.integers(0, 2, ntargets or nsources, dtype=np.int8)
)
if filter_kind == "user":
from boxtree.tree import filter_target_lists_in_user_order
filtered_targets = filter_target_lists_in_user_order(queue, tree, flags)
filtered_targets = plfilt.filter_target_lists_in_user_order(
actx.queue, tree, flags)
wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder(
host_tree, filtered_targets.get(queue=queue))
tree_indep, host_trav,
filtered_targets.get(queue=actx.queue))
elif filter_kind == "tree":
from boxtree.tree import filter_target_lists_in_tree_order
filtered_targets = filter_target_lists_in_tree_order(queue, tree, flags)
filtered_targets = plfilt.filter_target_lists_in_tree_order(
actx.queue, tree, flags)
wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder(
host_tree, filtered_targets.get(queue=queue))
tree_indep, host_trav,
filtered_targets.get(queue=actx.queue))
else:
raise ValueError("unsupported value of 'filter_kind'")
else:
wrangler = ConstantOneExpansionWrangler(host_tree)
wrangler = ConstantOneExpansionWrangler(tree_indep, host_trav)
flags = 1 + actx.np.zeros(ntargets or nsources, dtype=np.int8)
if ntargets is None and not filter_kind:
# This check only works for targets == sources.
assert (wrangler.reorder_potentials(
wrangler.reorder_sources(weights)) == weights).all()
assert np.all(
wrangler.reorder_potentials(wrangler.reorder_sources(weights))
== weights)
from boxtree.fmm import drive_fmm
pot = drive_fmm(host_trav, wrangler, weights)
pot = drive_fmm(wrangler, (weights,))
# {{{ build, evaluate matrix (and identify missing interactions)
if filter_kind:
pot = pot[actx.to_numpy(flags) > 0]
if 0:
rel_err = la.norm((pot - weights_sum) / nsources)
good = rel_err < 1e-8
# {{{ build, evaluate matrix (and identify incorrect interactions)
if 0 and not good:
mat = np.zeros((ntargets, nsources), dtype)
from pytools import ProgressBar
logging.getLogger().setLevel(logging.WARNING)
pb = ProgressBar("matrix", nsources)
for i in range(nsources):
unit_vec = np.zeros(nsources, dtype=dtype)
unit_vec[i] = 1
mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec)
mat[:, i] = drive_fmm(host_trav, wrangler, (unit_vec,))
pb.progress()
pb.finished()
logging.getLogger().setLevel(logging.INFO)
import matplotlib.pyplot as pt
if 1:
pt.spy(mat)
if 0:
pt.imshow(mat)
pt.colorbar()
pt.show()
missing_tgts, missing_srcs = np.where(mat == 0)
incorrect_tgts, incorrect_srcs = np.where(mat != 1)
if 1 and len(missing_tgts):
if 1 and len(incorrect_tgts):
from boxtree.visualization import TreePlotter
plotter = TreePlotter(host_tree)
plotter.draw_tree(fill=False, edgecolor="black")
plotter.draw_box_numbers()
plotter.set_bounding_box()
tree_order_missing_tgts = \
host_tree.indices_to_tree_target_order(missing_tgts)
tree_order_missing_srcs = \
host_tree.indices_to_tree_source_order(missing_srcs)
tree_order_incorrect_tgts = \
host_tree.indices_to_tree_target_order(incorrect_tgts)
tree_order_incorrect_srcs = \
host_tree.indices_to_tree_source_order(incorrect_srcs)
src_boxes = [
host_tree.find_box_nr_for_source(i)
for i in tree_order_missing_srcs]
for i in tree_order_incorrect_srcs]
tgt_boxes = [
host_tree.find_box_nr_for_target(i)
for i in tree_order_missing_tgts]
for i in tree_order_incorrect_tgts]
print(src_boxes)
print(tgt_boxes)
pt.plot(
host_tree.targets[0][tree_order_missing_tgts],
host_tree.targets[1][tree_order_missing_tgts],
"rv")
pt.plot(
host_tree.sources[0][tree_order_missing_srcs],
host_tree.sources[1][tree_order_missing_srcs],
"go")
# plot all sources/targets
if 0:
pt.plot(
host_tree.targets[0],
host_tree.targets[1],
"v", alpha=0.9)
pt.plot(
host_tree.sources[0],
host_tree.sources[1],
"gx", alpha=0.9)
# plot offending sources/targets
if 0:
pt.plot(
host_tree.targets[0][tree_order_incorrect_tgts],
host_tree.targets[1][tree_order_incorrect_tgts],
"rv")
pt.plot(
host_tree.sources[0][tree_order_incorrect_srcs],
host_tree.sources[1][tree_order_incorrect_srcs],
"go")
pt.gca().set_aspect("equal")
from boxtree.visualization import draw_box_lists
draw_box_lists(
plotter,
pre_merge_host_trav if who_has_extent else host_trav, # pylint: disable=possibly-used-before-assignment
22)
# from boxtree.visualization import draw_same_level_non_well_sep_boxes
# draw_same_level_non_well_sep_boxes(plotter, host_trav, 2)
pt.show()
# }}}
if filter_kind:
pot = pot[flags.get() > 0]
rel_err = la.norm((pot - weights_sum) / nsources)
good = rel_err < 1e-8
if 0 and not good:
import matplotlib.pyplot as pt
pt.plot(pot-weights_sum)
......@@ -415,8 +370,8 @@ def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req,
if 0 and not good:
import matplotlib.pyplot as pt
filt_targets = [
host_tree.targets[0][flags.get() > 0],
host_tree.targets[1][flags.get() > 0],
host_tree.targets[0][actx.to_numpy(flags > 0)],
host_tree.targets[1][actx.to_numpy(flags > 0)],
]
host_tree.plot()
bad = np.abs(pot - weights_sum) >= 1e-3
......@@ -434,79 +389,432 @@ def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req,
# }}}
# {{{ test Helmholtz fmm with pyfmmlib
def test_pyfmmlib_fmm(ctx_getter):
logging.basicConfig(level=logging.INFO)
# {{{ test fmmlib integration
from pytest import importorskip
importorskip("pyfmmlib")
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
@pytest.mark.parametrize("dims", [2, 3])
@pytest.mark.parametrize("use_dipoles", [True, False])
@pytest.mark.parametrize("helmholtz_k", [0, 2])
def test_pyfmmlib_fmm(actx_factory, dims, use_dipoles, helmholtz_k):
pytest.importorskip("pyfmmlib")
actx = actx_factory()
nsources = 3000
ntargets = 1000
dims = 2
dtype = np.float64
helmholtz_k = 2
sources = p_normal(queue, nsources, dims, dtype, seed=15)
sources = p_normal(actx.queue, nsources, dims, dtype, seed=15)
targets = (
p_normal(queue, ntargets, dims, dtype, seed=18)
+ np.array([2, 0]))
p_normal(actx.queue, ntargets, dims, dtype, seed=18)
+ np.array([2, 0, 0])[:dims])
sources_host = particle_array_to_host(sources)
targets_host = particle_array_to_host(targets)
from boxtree import TreeBuilder
tb = TreeBuilder(ctx)
tb = TreeBuilder(actx.context)
tree, _ = tb(queue, sources, targets=targets,
tree, _ = tb(actx.queue, sources, targets=targets,
max_particles_in_box=30, debug=True)
from boxtree.traversal import FMMTraversalBuilder
tbuild = FMMTraversalBuilder(ctx)
trav, _ = tbuild(queue, tree, debug=True)
tbuild = FMMTraversalBuilder(actx.context)
trav, _ = tbuild(actx.queue, tree, debug=True)
trav = trav.get(queue=actx.queue)
rng = np.random.default_rng(20)
weights = rng.uniform(0.0, 1.0, (nsources,))
rng = np.random.default_rng(seed=42)
dipole_vec = rng.normal(size=(dims, nsources)) if use_dipoles else None
base_order = 20 if (dims == 2 and helmholtz_k == 0) else 10
trav = trav.get(queue=queue)
def fmm_level_to_order(tree, lev):
result = base_order
if lev < 3 and helmholtz_k:
# exercise order-varies-by-level capability
result += 5
if use_dipoles:
result += 1
return result
from boxtree.pyfmmlib_integration import (
FMMLibExpansionWrangler,
FMMLibTreeIndependentDataForWrangler,
Kernel,
)
tree_indep = FMMLibTreeIndependentDataForWrangler(
trav.tree.dimensions,
Kernel.HELMHOLTZ if helmholtz_k else Kernel.LAPLACE)
wrangler = FMMLibExpansionWrangler(
tree_indep, trav,
helmholtz_k=helmholtz_k,
fmm_level_to_order=fmm_level_to_order,
dipole_vec=dipole_vec)
from boxtree.fmm import drive_fmm
from pyopencl.clrandom import RanluxGenerator
rng = RanluxGenerator(queue, seed=20)
timing_data = {}
pot = drive_fmm(wrangler, (weights,), timing_data=timing_data)
print(timing_data)
assert timing_data
weights = rng.uniform(queue, nsources, dtype=np.float64).get()
#weights = np.ones(nsources)
# {{{ ref fmmlib computation
logger.info("computing direct (reference) result")
from pyfmmlib import hpotgrad2dall_vec
ref_pot, _, _ = hpotgrad2dall_vec(ifgrad=False, ifhess=False,
sources=sources_host.T, charge=weights,
targets=targets_host.T, zk=helmholtz_k)
ref_pot = get_fmmlib_ref_pot(wrangler, weights, sources_host.T,
targets_host.T, helmholtz_k, dipole_vec)
rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf)
logger.info("relative l2 error vs fmmlib direct: %g", rel_err)
assert rel_err < 1e-5, rel_err
# }}}
# {{{ check against sumpy
try:
import sumpy # noqa
except ImportError:
have_sumpy = False
from warnings import warn
warn("sumpy unavailable: cannot compute independent reference "
"values for pyfmmlib", stacklevel=1)
else:
have_sumpy = True
if have_sumpy:
from sumpy.kernel import ( # pylint:disable=import-error
DirectionalSourceDerivative,
HelmholtzKernel,
LaplaceKernel,
)
from sumpy.p2p import P2P # pylint:disable=import-error
sumpy_extra_kwargs = {}
if helmholtz_k:
knl = HelmholtzKernel(dims)
sumpy_extra_kwargs["k"] = helmholtz_k
else:
knl = LaplaceKernel(dims)
if use_dipoles:
knl = DirectionalSourceDerivative(knl)
sumpy_extra_kwargs["src_derivative_dir"] = dipole_vec
p2p = P2P(actx.context,
[knl],
exclude_self=False)
_evt, (sumpy_ref_pot,) = p2p(
actx.queue, targets, sources, (weights,),
out_host=True, **sumpy_extra_kwargs)
sumpy_rel_err = (
la.norm(pot - sumpy_ref_pot, np.inf)
/ la.norm(sumpy_ref_pot, np.inf))
from boxtree.pyfmmlib_integration import Helmholtz2DExpansionWrangler
wrangler = Helmholtz2DExpansionWrangler(trav.tree, helmholtz_k, nterms=10)
logger.info("relative l2 error vs sumpy direct: %g", sumpy_rel_err)
assert sumpy_rel_err < 1e-5, sumpy_rel_err
# }}}
# }}}
# {{{ test fmmlib numerical stability
@pytest.mark.parametrize("dims", [2, 3])
@pytest.mark.parametrize("helmholtz_k", [0, 2])
@pytest.mark.parametrize("order", [35])
def test_pyfmmlib_numerical_stability(actx_factory, dims, helmholtz_k, order):
pytest.importorskip("pyfmmlib")
actx = actx_factory()
nsources = 30
dtype = np.float64
# The input particles are arranged with geometrically increasing/decreasing
# spacing along a line, to build a deep tree that stress-tests the
# translations.
particle_line = np.array([2**-i for i in range(nsources//2)], dtype=dtype)
particle_line = np.hstack([particle_line, 3 - particle_line])
zero = np.zeros(nsources, dtype=dtype)
sources = np.vstack([
particle_line,
zero,
zero])[:dims]
targets = sources * (1 + 1e-3)
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
tree, _ = tb(actx.queue, sources, targets=targets,
max_particles_in_box=2, debug=True)
assert tree.nlevels >= 15
from boxtree.traversal import FMMTraversalBuilder
tbuild = FMMTraversalBuilder(actx.context)
trav, _ = tbuild(actx.queue, tree, debug=True)
trav = trav.get(queue=actx.queue)
weights = np.ones_like(sources[0])
from boxtree.pyfmmlib_integration import (
FMMLibExpansionWrangler,
FMMLibRotationData,
FMMLibTreeIndependentDataForWrangler,
Kernel,
)
def fmm_level_to_order(tree, lev):
return order
tree_indep = FMMLibTreeIndependentDataForWrangler(
trav.tree.dimensions,
Kernel.HELMHOLTZ if helmholtz_k else Kernel.LAPLACE)
wrangler = FMMLibExpansionWrangler(
tree_indep, trav,
helmholtz_k=helmholtz_k,
fmm_level_to_order=fmm_level_to_order,
rotation_data=FMMLibRotationData(actx.queue, trav))
from boxtree.fmm import drive_fmm
pot = drive_fmm(trav, wrangler, weights)
pot = drive_fmm(wrangler, (weights,))
assert not np.isnan(pot).any()
# {{{ ref fmmlib computation
logger.info("computing direct (reference) result")
ref_pot = get_fmmlib_ref_pot(wrangler, weights, sources, targets,
helmholtz_k)
rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf)
logger.info("relative l2 error vs fmmlib direct: %g", rel_err)
error_bound = (1/2) ** (1 + order) if dims == 2 else (3/4) ** (1 + order)
assert rel_err < error_bound, rel_err
# }}}
# }}}
# {{{ test particle count thresholding in traversal generation
@pytest.mark.parametrize("enable_extents", [True, False])
def test_interaction_list_particle_count_thresholding(actx_factory, enable_extents):
actx = actx_factory()
dims = 2
nsources = 1000
ntargets = 1000
dtype = np.float64
max_particles_in_box = 30
# Ensure that we have underfilled boxes.
from_sep_smaller_min_nsources_cumul = 1 + max_particles_in_box
from boxtree.fmm import drive_fmm
sources = p_normal(actx.queue, nsources, dims, dtype, seed=15)
targets = p_normal(actx.queue, ntargets, dims, dtype, seed=15)
rng = np.random.default_rng(22)
if enable_extents:
target_radii = actx.from_numpy(
2**rng.uniform(-10, 0, (ntargets,)).astype(dtype)
)
else:
target_radii = None
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
tree, _ = tb(actx.queue, sources, targets=targets,
max_particles_in_box=max_particles_in_box,
target_radii=target_radii,
debug=True, stick_out_factor=0.25)
from boxtree.traversal import FMMTraversalBuilder
tbuild = FMMTraversalBuilder(actx.context)
trav, _ = tbuild(actx.queue, tree, debug=True,
_from_sep_smaller_min_nsources_cumul=from_sep_smaller_min_nsources_cumul)
weights = np.ones(nsources)
weights_sum = np.sum(weights)
host_trav = trav.get(queue=actx.queue)
tree_indep = ConstantOneTreeIndependentDataForWrangler()
wrangler = ConstantOneExpansionWrangler(tree_indep, host_trav)
pot = drive_fmm(wrangler, (weights,))
assert np.all(pot == weights_sum)
# }}}
# {{{ test fmm with float32 dtype
@pytest.mark.parametrize("enable_extents", [True, False])
def test_fmm_float32(actx_factory, enable_extents):
actx = actx_factory()
from pyopencl.characterize import has_struct_arg_count_bug
if has_struct_arg_count_bug(actx.queue.device, actx.context):
pytest.xfail("won't work on devices with the struct arg count issue")
dims = 2
nsources = 1000
ntargets = 1000
dtype = np.float32
from boxtree.fmm import drive_fmm
sources = p_normal(actx.queue, nsources, dims, dtype, seed=15)
targets = p_normal(actx.queue, ntargets, dims, dtype, seed=15)
rng = np.random.default_rng(12)
if enable_extents:
target_radii = actx.from_numpy(
2**rng.uniform(-10, 0, (ntargets,)).astype(dtype)
)
else:
target_radii = None
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
tree, _ = tb(actx.queue, sources, targets=targets,
max_particles_in_box=30,
target_radii=target_radii,
debug=True, stick_out_factor=0.25)
from boxtree.traversal import FMMTraversalBuilder
tbuild = FMMTraversalBuilder(actx.context)
trav, _ = tbuild(actx.queue, tree, debug=True)
weights = np.ones(nsources)
weights_sum = np.sum(weights)
host_trav = trav.get(queue=actx.queue)
tree_indep = ConstantOneTreeIndependentDataForWrangler()
wrangler = ConstantOneExpansionWrangler(tree_indep, host_trav)
pot = drive_fmm(wrangler, (weights,))
assert np.all(pot == weights_sum)
# }}}
# {{{ test with fmm optimized 3d m2l
@pytest.mark.parametrize("well_sep_is_n_away", (1, 2))
@pytest.mark.parametrize("helmholtz_k", (0, 2))
@pytest.mark.parametrize("nsrcntgts", (20, 10000))
def test_fmm_with_optimized_3d_m2l(actx_factory, nsrcntgts, helmholtz_k,
well_sep_is_n_away):
pytest.importorskip("pyfmmlib")
actx = actx_factory()
dims = 3
nsources = ntargets = nsrcntgts // 2
dtype = np.float64
sources = p_normal(actx.queue, nsources, dims, dtype, seed=15)
targets = (
p_normal(actx.queue, ntargets, dims, dtype, seed=18)
+ np.array([2, 0, 0])[:dims])
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
tree, _ = tb(actx.queue, sources, targets=targets,
max_particles_in_box=30, debug=True)
from boxtree.traversal import FMMTraversalBuilder
tbuild = FMMTraversalBuilder(actx.context)
trav, _ = tbuild(actx.queue, tree, debug=True)
trav = trav.get(queue=actx.queue)
rng = np.random.default_rng(20)
weights = rng.uniform(0.0, 1.0, (nsources,))
base_order = 10
def fmm_level_to_order(tree, lev):
result = base_order
if lev < 3 and helmholtz_k:
# exercise order-varies-by-level capability
result += 5
return result
from boxtree.pyfmmlib_integration import (
FMMLibExpansionWrangler,
FMMLibRotationData,
FMMLibTreeIndependentDataForWrangler,
Kernel,
)
tree_indep = FMMLibTreeIndependentDataForWrangler(
trav.tree.dimensions,
Kernel.HELMHOLTZ if helmholtz_k else Kernel.LAPLACE)
baseline_wrangler = FMMLibExpansionWrangler(
tree_indep, trav,
helmholtz_k=helmholtz_k,
fmm_level_to_order=fmm_level_to_order)
optimized_wrangler = FMMLibExpansionWrangler(
tree_indep, trav,
helmholtz_k=helmholtz_k,
fmm_level_to_order=fmm_level_to_order,
rotation_data=FMMLibRotationData(actx.queue, trav))
from boxtree.fmm import drive_fmm
baseline_timing_data = {}
baseline_pot = drive_fmm(
baseline_wrangler, (weights,), timing_data=baseline_timing_data)
optimized_timing_data = {}
optimized_pot = drive_fmm(
optimized_wrangler, (weights,), timing_data=optimized_timing_data)
baseline_time = baseline_timing_data["multipole_to_local"]["process_elapsed"]
if baseline_time is not None:
print(f"Baseline M2L time : {baseline_time:#.4g} s")
opt_time = optimized_timing_data["multipole_to_local"]["process_elapsed"]
if opt_time is not None:
print(f"Optimized M2L time: {opt_time:#.4g} s")
rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot)
logger.info("relative l2 error: %g" % rel_err)
assert rel_err < 1e-5
assert np.allclose(baseline_pot, optimized_pot, atol=1e-13, rtol=1e-13)
# }}}
# You can test individual routines by typing
# $ python test_fmm.py 'test_routine(cl.create_some_context)'
# $ python test_fmm.py 'test_routine(_acf)'
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
from py.test.cmdline import main
from pytest import main
main([__file__])
# vim: fdm=marker
__copyright__ = "Copyright (C) 2012 Andreas Kloeckner \
Copyright (C) 2017 Matt Wala \
Copyright (C) 2018 Hao Gao"
__license__ = """
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import logging
import numpy as np
import pytest
from arraycontext import pytest_generate_tests_for_array_contexts
from boxtree.array_context import (
PytestPyOpenCLArrayContextFactory,
_acf, # noqa: F401
)
from boxtree.tools import (
make_normal_particle_array as p_normal,
make_surface_particle_array as p_surface,
make_uniform_particle_array as p_uniform,
)
logger = logging.getLogger(__name__)
pytest_generate_tests = pytest_generate_tests_for_array_contexts([
PytestPyOpenCLArrayContextFactory,
])
# {{{ test_allreduce_comm_pattern
@pytest.mark.parametrize("p", [1, 2, 3, 4, 5, 6, 7, 8, 16, 17])
def test_allreduce_comm_pattern(p):
from boxtree.tools import AllReduceCommPattern
# This models the parallel allreduce communication pattern.
# processor -> communication pattern of the processor
patterns = [AllReduceCommPattern(i, p) for i in range(p)]
# processor -> list of data items on the processor
data = [[i] for i in range(p)]
from copy import deepcopy
while not all(pat.done() for pat in patterns):
new_data = deepcopy(data)
for i in range(p):
if patterns[i].done():
for pat in patterns:
if not pat.done():
assert i not in pat.sources() | pat.sinks()
continue
# Check sources / sinks match up
for s in patterns[i].sinks():
assert i in patterns[s].sources()
for s in patterns[i].sources():
assert i in patterns[s].sinks()
# Send / recv data
for s in patterns[i].sinks():
new_data[s].extend(data[i])
for pat in patterns:
if not pat.done():
pat.advance()
data = new_data
for item in data:
assert len(item) == p
assert set(item) == set(range(p))
# }}}
# {{{ test_masked_matrix_compression
@pytest.mark.parametrize("order", ["C", "F"])
def test_masked_matrix_compression(actx_factory, order):
actx = actx_factory()
from boxtree.tools import MaskCompressorKernel
matcompr = MaskCompressorKernel(actx.context)
n = 40
m = 10
rng = np.random.default_rng(15)
arr = (rng.random((n, m)) > 0.5).astype(np.int8).copy(order=order)
d_arr = actx.from_numpy(arr)
arr_starts, arr_lists, _evt = matcompr(actx.queue, d_arr)
arr_starts = actx.to_numpy(arr_starts)
arr_lists = actx.to_numpy(arr_lists)
for i in range(n):
items = arr_lists[arr_starts[i]:arr_starts[i+1]]
assert set(items) == set(arr[i].nonzero()[0])
# }}}
# {{{ test_masked_list_compression
def test_masked_list_compression(actx_factory):
actx = actx_factory()
rng = np.random.default_rng(seed=42)
from boxtree.tools import MaskCompressorKernel
listcompr = MaskCompressorKernel(actx.context)
n = 20
arr = (rng.random(n) > 0.5).astype(np.int8)
d_arr = actx.from_numpy(arr)
arr_list, _evt = listcompr(actx.queue, d_arr)
arr_list = actx.to_numpy(arr_list)
assert set(arr_list) == set(arr.nonzero()[0])
# }}}
# {{{ test_device_record
def test_device_record(actx_factory):
actx = actx_factory()
from boxtree.tools import DeviceDataRecord
array = np.arange(60).reshape((3, 4, 5))
obj_array = np.empty((3,), dtype=object)
for i in range(3):
obj_array[i] = np.arange((i + 1) * 40).reshape(5, i + 1, 8)
record = DeviceDataRecord(
array=array,
obj_array=obj_array
)
record_dev = record.to_device(actx.queue)
record_host = record_dev.get(actx.queue)
assert np.array_equal(record_host.array, record.array)
for i in range(3):
assert np.array_equal(record_host.obj_array[i], record.obj_array[i])
# }}}
# {{{ test_particle_array
@pytest.mark.parametrize("array_factory", (p_normal, p_surface, p_uniform))
@pytest.mark.parametrize("dim", (2, 3))
@pytest.mark.parametrize("dtype", (np.float32, np.float64))
def test_particle_array(actx_factory, array_factory, dim, dtype):
actx = actx_factory()
particles = array_factory(actx.queue, 1000, dim, dtype)
assert len(particles) == dim
assert all(len(particles[0]) == len(axis) for axis in particles)
# }}}
# You can test individual routines by typing
# $ python test_tools.py 'test_routine'
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
from pytest import main
main([__file__])
# vim: fdm=marker
from __future__ import division
from __future__ import absolute_import
from six.moves import range
__copyright__ = "Copyright (C) 2013 Andreas Kloeckner"
__license__ = """
......@@ -24,20 +20,26 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import logging
import numpy as np
import numpy.linalg as la
import pyopencl as cl
import pytest
from pyopencl.tools import ( # noqa
pytest_generate_tests_for_pyopencl as pytest_generate_tests)
from arraycontext import pytest_generate_tests_for_array_contexts
from boxtree.array_context import (
PytestPyOpenCLArrayContextFactory,
_acf, # noqa: F401
)
from boxtree.tools import make_normal_particle_array
import logging
logger = logging.getLogger(__name__)
pytest_generate_tests = pytest_generate_tests_for_array_contexts([
PytestPyOpenCLArrayContextFactory,
])
# {{{ connectivity test
......@@ -48,30 +50,26 @@ logger = logging.getLogger(__name__)
(3, True),
(3, False),
])
def test_tree_connectivity(ctx_getter, dims, sources_are_targets):
logging.basicConfig(level=logging.INFO)
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
def test_tree_connectivity(actx_factory, dims, sources_are_targets):
actx = actx_factory()
dtype = np.float64
sources = make_normal_particle_array(queue, 1 * 10**5, dims, dtype)
sources = make_normal_particle_array(actx.queue, 1 * 10**5, dims, dtype)
if sources_are_targets:
targets = None
else:
targets = make_normal_particle_array(queue, 2 * 10**5, dims, dtype)
targets = make_normal_particle_array(actx.queue, 2 * 10**5, dims, dtype)
from boxtree import TreeBuilder
tb = TreeBuilder(ctx)
tree, _ = tb(queue, sources, max_particles_in_box=30,
tb = TreeBuilder(actx.context)
tree, _ = tb(actx.queue, sources, max_particles_in_box=30,
targets=targets, debug=True)
from boxtree.traversal import FMMTraversalBuilder
tg = FMMTraversalBuilder(ctx)
trav, _ = tg(queue, tree, debug=True)
tree = tree.get(queue=queue)
trav = trav.get(queue=queue)
tg = FMMTraversalBuilder(actx.context)
trav, _ = tg(actx.queue, tree, debug=True)
tree = tree.get(queue=actx.queue)
trav = trav.get(queue=actx.queue)
levels = tree.box_levels
parents = tree.box_parent_ids.T
......@@ -91,6 +89,7 @@ def test_tree_connectivity(ctx_getter, dims, sources_are_targets):
if 0:
import matplotlib.pyplot as pt
from boxtree.visualization import TreePlotter
plotter = TreePlotter(tree)
plotter.draw_tree(fill=False, edgecolor="black")
......@@ -108,7 +107,7 @@ def test_tree_connectivity(ctx_getter, dims, sources_are_targets):
assert ibox in nbl
for jbox in nbl:
assert (0 == children[jbox]).all(), (ibox, jbox, children[jbox])
assert np.all(children[jbox] == 0), (ibox, jbox, children[jbox])
logger.info("list 1 consists of source boxes")
......@@ -117,10 +116,10 @@ def test_tree_connectivity(ctx_getter, dims, sources_are_targets):
# {{{ separated siblings (list 2) are actually separated
for itgt_box, tgt_ibox in enumerate(trav.target_or_target_parent_boxes):
start, end = trav.sep_siblings_starts[itgt_box:itgt_box+2]
seps = trav.sep_siblings_lists[start:end]
start, end = trav.from_sep_siblings_starts[itgt_box:itgt_box+2]
seps = trav.from_sep_siblings_lists[start:end]
assert (levels[seps] == levels[tgt_ibox]).all()
assert np.all(levels[seps] == levels[tgt_ibox])
# three-ish box radii (half of size)
mindist = 2.5 * 0.5 * 2**-int(levels[tgt_ibox]) * tree.root_extent
......@@ -135,46 +134,63 @@ def test_tree_connectivity(ctx_getter, dims, sources_are_targets):
# }}}
if sources_are_targets:
# {{{ sep_{smaller,bigger} are duals of each other
# {{{ from_sep_{smaller,bigger} are duals of each other
assert (trav.target_or_target_parent_boxes == np.arange(tree.nboxes)).all()
assert np.all(trav.target_or_target_parent_boxes == np.arange(tree.nboxes))
# {{{ list 4 <= list 3
for itarget_box, ibox in enumerate(trav.target_boxes):
start, end = trav.sep_smaller_starts[itarget_box:itarget_box+2]
for jbox in trav.sep_smaller_lists[start:end]:
rstart, rend = trav.sep_bigger_starts[jbox:jbox+2]
for level, ssn in enumerate(trav.from_sep_smaller_by_level):
for itarget_box, ibox in \
enumerate(trav.target_boxes_sep_smaller_by_source_level[level]):
start, end = ssn.starts[itarget_box:itarget_box+2]
assert ibox in trav.sep_bigger_lists[rstart:rend], (ibox, jbox)
for jbox in ssn.lists[start:end]:
rstart, rend = trav.from_sep_bigger_starts[jbox:jbox+2]
assert ibox in trav.from_sep_bigger_lists[rstart:rend], \
(ibox, jbox)
# }}}
# {{{ list 4 <= list 3
box_to_target_box_index = np.empty(tree.nboxes, tree.box_id_dtype)
box_to_target_box_index.fill(-1)
box_to_target_box_index[trav.target_boxes] = np.arange(
len(trav.target_boxes), dtype=tree.box_id_dtype)
assert (trav.source_boxes == trav.target_boxes).all()
assert (trav.target_or_target_parent_boxes == np.arange(
tree.nboxes, dtype=tree.box_id_dtype)).all()
box_to_target_boxes_sep_smaller_by_source_level = []
for level in range(trav.tree.nlevels):
box_to_target_boxes_sep_smaller = np.empty(
tree.nboxes, tree.box_id_dtype)
box_to_target_boxes_sep_smaller.fill(-1)
box_to_target_boxes_sep_smaller[
trav.target_boxes_sep_smaller_by_source_level[level]
] = np.arange(
len(trav.target_boxes_sep_smaller_by_source_level[level]),
dtype=tree.box_id_dtype
)
box_to_target_boxes_sep_smaller_by_source_level.append(
box_to_target_boxes_sep_smaller)
assert np.all(trav.source_boxes == trav.target_boxes)
assert np.all(trav.target_or_target_parent_boxes == np.arange(
tree.nboxes, dtype=tree.box_id_dtype))
for ibox in range(tree.nboxes):
start, end = trav.sep_bigger_starts[ibox:ibox+2]
start, end = trav.from_sep_bigger_starts[ibox:ibox+2]
for jbox in trav.sep_bigger_lists[start:end]:
# In principle, entries of sep_bigger_lists are
for jbox in trav.from_sep_bigger_lists[start:end]:
# In principle, entries of from_sep_bigger_lists are
# source boxes. In this special case, source and target boxes
# are the same thing (i.e. leaves--see assertion above), so we
# may treat them as targets anyhow.
jtgt_box = box_to_target_box_index[jbox]
assert jtgt_box != -1
good = False
rstart, rend = trav.sep_smaller_starts[jtgt_box:jtgt_box+2]
good = ibox in trav.sep_smaller_lists[rstart:rend]
for level, ssn in enumerate(trav.from_sep_smaller_by_level):
jtgt_box = \
box_to_target_boxes_sep_smaller_by_source_level[level][jbox]
if jtgt_box == -1:
continue
rstart, rend = ssn.starts[jtgt_box:jtgt_box + 2]
good = good or ibox in ssn.lists[rstart:rend]
if not good:
from boxtree.visualization import TreePlotter
......@@ -182,8 +198,8 @@ def test_tree_connectivity(ctx_getter, dims, sources_are_targets):
plotter.draw_tree(fill=False, edgecolor="black", zorder=10)
plotter.set_bounding_box()
plotter.draw_box(ibox, facecolor='green', alpha=0.5)
plotter.draw_box(jbox, facecolor='red', alpha=0.5)
plotter.draw_box(ibox, facecolor="green", alpha=0.5)
plotter.draw_box(jbox, facecolor="red", alpha=0.5)
import matplotlib.pyplot as pt
pt.gca().set_aspect("equal")
......@@ -199,130 +215,194 @@ def test_tree_connectivity(ctx_getter, dims, sources_are_targets):
# }}}
# {{{ sep_smaller satisfies relative level assumption
# {{{ from_sep_smaller satisfies relative level assumption
for itarget_box, ibox in enumerate(trav.target_boxes):
start, end = trav.sep_smaller_starts[itarget_box:itarget_box+2]
# for itarget_box, ibox in enumerate(trav.target_boxes):
# for ssn in trav.from_sep_smaller_by_level:
for level, ssn in enumerate(trav.from_sep_smaller_by_level):
for itarget_box, ibox in enumerate(
trav.target_boxes_sep_smaller_by_source_level[level]):
start, end = ssn.starts[itarget_box:itarget_box+2]
for jbox in trav.sep_smaller_lists[start:end]:
assert levels[ibox] < levels[jbox]
for jbox in ssn.lists[start:end]:
assert levels[ibox] < levels[jbox]
logger.info("list 3 satisfies relative level assumption")
# }}}
# {{{ sep_smaller satisfies relative level assumption
# {{{ from_sep_bigger satisfies relative level assumption
for itgt_box, tgt_ibox in enumerate(trav.target_or_target_parent_boxes):
start, end = trav.sep_bigger_starts[itgt_box:itgt_box+2]
start, end = trav.from_sep_bigger_starts[itgt_box:itgt_box+2]
for jbox in trav.sep_bigger_lists[start:end]:
for jbox in trav.from_sep_bigger_lists[start:end]:
assert levels[tgt_ibox] > levels[jbox]
logger.info("list 4 satisfies relative level assumption")
# }}}
# {{{ level_start_*_box_nrs lists make sense
for name, ref_array in [
("level_start_source_box_nrs", trav.source_boxes),
("level_start_source_parent_box_nrs", trav.source_parent_boxes),
("level_start_target_box_nrs", trav.target_boxes),
("level_start_target_or_target_parent_box_nrs",
trav.target_or_target_parent_boxes)
]:
level_starts = getattr(trav, name)
for lev in range(tree.nlevels):
start, stop = level_starts[lev:lev+2]
box_nrs = ref_array[start:stop]
assert np.all(tree.box_levels[box_nrs] == lev), name
# }}}
# }}}
# {{{ visualization helper (not a test)
# {{{ visualization helper
# Set 'visualize' kwarg to True to actually plot. Otherwise, this
# test simply ensures that interaction list plotting is still
# working.
def plot_traversal(ctx_getter, do_plot=False):
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
def test_plot_traversal(actx_factory, well_sep_is_n_away=1, visualize=False):
pytest.importorskip("matplotlib")
actx = actx_factory()
#for dims in [2, 3]:
for dims in [2]:
nparticles = 10**4
dtype = np.float64
from pyopencl.clrandom import RanluxGenerator
rng = RanluxGenerator(queue, seed=15)
from pytools.obj_array import make_obj_array
rng = np.random.default_rng(15)
particles = make_obj_array([
rng.normal(queue, nparticles, dtype=dtype)
actx.from_numpy(rng.normal(0.0, 1.0, (nparticles,)).astype(dtype))
for i in range(dims)])
#if do_plot:
#pt.plot(particles[0].get(), particles[1].get(), "x")
from boxtree import TreeBuilder
tb = TreeBuilder(ctx)
tb = TreeBuilder(actx.context)
queue.finish()
tree = tb(queue, particles, max_particles_in_box=30, debug=True)
actx.queue.finish()
tree, _ = tb(actx.queue, particles, max_particles_in_box=30, debug=True)
from boxtree.traversal import FMMTraversalBuilder
tg = FMMTraversalBuilder(ctx)
trav = tg(queue, tree).get()
tg = FMMTraversalBuilder(actx.context, well_sep_is_n_away=well_sep_is_n_away)
trav, _ = tg(actx.queue, tree)
tree = tree.get(queue=actx.queue)
trav = trav.get(queue=actx.queue)
from boxtree.visualization import TreePlotter
plotter = TreePlotter(tree)
plotter.draw_tree(fill=False, edgecolor="black")
#plotter.draw_box_numbers()
# plotter.draw_box_numbers()
plotter.set_bounding_box()
from random import randrange, seed
seed(7)
from boxtree.visualization import draw_box_lists
if well_sep_is_n_away == 1:
draw_box_lists(plotter, trav, 380)
elif well_sep_is_n_away == 2:
draw_box_lists(plotter, trav, 320)
# {{{ generic box drawing helper
if visualize:
import matplotlib.pyplot as pt
pt.gca().set_xticks([])
pt.gca().set_yticks([])
def draw_some_box_lists(starts, lists, key_to_box=None,
count=5):
actual_count = 0
while actual_count < count:
if key_to_box is not None:
key = randrange(len(key_to_box))
ibox = key_to_box[key]
else:
key = ibox = randrange(tree.nboxes)
pt.show()
start, end = starts[key:key+2]
if start == end:
continue
# }}}
#print ibox, start, end, lists[start:end]
for jbox in lists[start:end]:
plotter.draw_box(jbox, facecolor='yellow')
plotter.draw_box(ibox, facecolor='red')
# {{{ test_from_sep_siblings_translation_and_rotation_classes
actual_count += 1
@pytest.mark.parametrize("well_sep_is_n_away", (1, 2))
def test_from_sep_siblings_translation_and_rotation_classes(
actx_factory, well_sep_is_n_away):
actx = actx_factory()
# }}}
dims = 3
nparticles = 10**4
dtype = np.float64
if 0:
# colleagues
draw_some_box_lists(
trav.colleagues_starts,
trav.colleagues_lists)
elif 0:
# near neighbors ("list 1")
draw_some_box_lists(
trav.neighbor_leaves_starts,
trav.neighbor_leaves_lists,
key_to_box=trav.source_boxes)
elif 0:
# well-separated siblings (list 2)
draw_some_box_lists(
trav.sep_siblings_starts,
trav.sep_siblings_lists)
elif 1:
# separated smaller (list 3)
draw_some_box_lists(
trav.sep_smaller_starts,
trav.sep_smaller_lists,
key_to_box=trav.source_boxes)
elif 1:
# separated bigger (list 4)
draw_some_box_lists(
trav.sep_bigger_starts,
trav.sep_bigger_lists)
# {{{ build tree
import matplotlib.pyplot as pt
pt.show()
from pytools.obj_array import make_obj_array
rng = np.random.default_rng(15)
particles = make_obj_array([
actx.from_numpy(rng.normal(0.0, 1.0, (nparticles,)).astype(dtype))
for i in range(dims)])
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
actx.queue.finish()
tree, _ = tb(actx.queue, particles, max_particles_in_box=30, debug=True)
# }}}
# {{{ build traversal
from boxtree.rotation_classes import RotationClassesBuilder
from boxtree.translation_classes import TranslationClassesBuilder
from boxtree.traversal import FMMTraversalBuilder
tg = FMMTraversalBuilder(actx.context, well_sep_is_n_away=well_sep_is_n_away)
trav, _ = tg(actx.queue, tree)
rb = RotationClassesBuilder(actx.context)
result, _ = rb(actx.queue, trav, tree)
tb = TranslationClassesBuilder(actx.context)
result_tb, _ = tb(actx.queue, trav, tree)
rot_classes = actx.to_numpy(
result.from_sep_siblings_rotation_classes)
rot_angles = actx.to_numpy(
result.from_sep_siblings_rotation_class_to_angle)
translation_classes = actx.to_numpy(
result_tb.from_sep_siblings_translation_classes)
distance_vectors = actx.to_numpy(
result_tb.from_sep_siblings_translation_class_to_distance_vector)
tree = tree.get(queue=actx.queue)
trav = trav.get(queue=actx.queue)
centers = tree.box_centers.T
# }}}
# For each entry of from_sep_siblings, compute the source-target translation
# direction as a vector, and check that the from_sep_siblings rotation class
# in the traversal corresponds to the angle with the z-axis of the
# translation direction.
for itgt_box, tgt_ibox in enumerate(trav.target_or_target_parent_boxes):
start, end = trav.from_sep_siblings_starts[itgt_box:itgt_box+2]
seps = trav.from_sep_siblings_lists[start:end]
level_rot_classes = rot_classes[start:end]
level_translation_classes = translation_classes[start:end]
actual_translation_vecs = distance_vectors[:, level_translation_classes].T
expected_translation_vecs = centers[tgt_ibox] - centers[seps]
if len(expected_translation_vecs) > 0:
assert np.allclose(actual_translation_vecs, expected_translation_vecs,
atol=1e-13, rtol=1e-13)
theta = np.arctan2(
la.norm(expected_translation_vecs[:, :dims - 1], axis=1),
expected_translation_vecs[:, dims - 1])
level_rot_angles = rot_angles[level_rot_classes]
assert np.allclose(theta, level_rot_angles, atol=1e-13, rtol=1e-13)
# }}}
......@@ -335,7 +415,7 @@ if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
from py.test.cmdline import main
from pytest import main
main([__file__])
# vim: fdm=marker
from __future__ import division
from __future__ import absolute_import
from __future__ import print_function
import six
from six.moves import range
__copyright__ = "Copyright (C) 2012 Andreas Kloeckner"
__license__ = """
......@@ -26,65 +20,68 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import logging
import sys
import numpy as np
import sys
import pytest
import logging
from arraycontext import pytest_generate_tests_for_array_contexts
import pyopencl as cl
from pyopencl.tools import ( # noqa
pytest_generate_tests_for_pyopencl as pytest_generate_tests)
from boxtree.array_context import (
PytestPyOpenCLArrayContextFactory,
_acf, # noqa: F401
)
from boxtree.tools import make_normal_particle_array
logger = logging.getLogger(__name__)
pytest_generate_tests = pytest_generate_tests_for_array_contexts([
PytestPyOpenCLArrayContextFactory,
])
# {{{ bounding box test
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
@pytest.mark.parametrize("dims", [2, 3])
@pytest.mark.parametrize("nparticles", [9, 4096, 10**5])
def test_bounding_box(ctx_getter, dtype, dims, nparticles):
logging.basicConfig(level=logging.INFO)
def test_bounding_box(actx_factory, dtype, dims, nparticles):
actx = actx_factory()
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
from boxtree.tools import AXIS_NAMES
from boxtree.bounding_box import BoundingBoxFinder
bbf = BoundingBoxFinder(ctx)
from boxtree.tools import AXIS_NAMES
bbf = BoundingBoxFinder(actx.context)
axis_names = AXIS_NAMES[:dims]
logger.info("%s - %s %s", dtype, dims, nparticles)
logger.info("%s - %s %s" % (dtype, dims, nparticles))
particles = make_normal_particle_array(actx.queue, nparticles, dims, dtype)
particles = make_normal_particle_array(queue, nparticles, dims, dtype)
bbox_min = [np.min(actx.to_numpy(x)) for x in particles]
bbox_max = [np.max(actx.to_numpy(x)) for x in particles]
bbox_min = [np.min(x.get()) for x in particles]
bbox_max = [np.max(x.get()) for x in particles]
bbox_cl, evt = bbf(particles, radii=None)
bbox_cl = bbox_cl.get()
bbox_cl, _evt = bbf(particles, radii=None)
bbox_cl = actx.to_numpy(bbox_cl)
bbox_min_cl = np.empty(dims, dtype)
bbox_max_cl = np.empty(dims, dtype)
for i, ax in enumerate(axis_names):
bbox_min_cl[i] = bbox_cl["min_"+ax]
bbox_max_cl[i] = bbox_cl["max_"+ax]
bbox_min_cl[i] = bbox_cl[f"min_{ax}"]
bbox_max_cl[i] = bbox_cl[f"max_{ax}"]
assert (bbox_min == bbox_min_cl).all()
assert (bbox_max == bbox_max_cl).all()
assert np.all(bbox_min == bbox_min_cl)
assert np.all(bbox_max == bbox_max_cl)
# }}}
# {{{ test basic (no source/target distinction) tree build
def run_build_test(builder, queue, dims, dtype, nparticles, do_plot,
max_particles_in_box=30, **kwargs):
def run_build_test(builder, actx, dims, dtype, nparticles, visualize,
max_particles_in_box=None, max_leaf_refine_weight=None,
refine_weights=None, **kwargs):
dtype = np.dtype(dtype)
if dtype == np.float32:
......@@ -92,41 +89,50 @@ def run_build_test(builder, queue, dims, dtype, nparticles, do_plot,
elif dtype == np.float64:
tol = 1e-12
else:
raise RuntimeError("unsupported dtype: %s" % dtype)
raise RuntimeError(f"unsupported dtype: {dtype}")
if (dtype == np.float32
and dims == 2
and queue.device.platform.name == "Portable Computing Language"):
pytest.xfail("2D float doesn't work on POCL")
logger.info(75 * "-")
logger.info(75*"-")
logger.info("%dD %s - %d particles - max %d per box - %s" % (
if max_particles_in_box is not None:
logger.info("%dD %s - %d particles - max %d per box - %s",
dims, dtype.type.__name__, nparticles, max_particles_in_box,
" - ".join("%s: %s" % (k, v) for k, v in six.iteritems(kwargs))))
logger.info(75*"-")
" - ".join(f"{k}: {v}" for k, v in kwargs.items()))
else:
logger.info("%dD %s - %d particles - max leaf weight %d - %s",
dims, dtype.type.__name__, nparticles, max_leaf_refine_weight,
" - ".join(f"{k}: {v}" for k, v in kwargs.items()))
logger.info(75 * "-")
particles = make_normal_particle_array(queue, nparticles, dims, dtype)
particles = make_normal_particle_array(actx.queue, nparticles, dims, dtype)
if do_plot:
if visualize:
import matplotlib.pyplot as pt
pt.plot(particles[0].get(), particles[1].get(), "x")
np_particles = actx.to_numpy(particles)
pt.plot(np_particles[0], np_particles[1], "x")
queue.finish()
actx.queue.finish()
tree, _ = builder(queue, particles,
max_particles_in_box=max_particles_in_box, debug=True,
**kwargs)
tree = tree.get(queue=queue)
tree, _ = builder(actx.queue, particles,
max_particles_in_box=max_particles_in_box,
refine_weights=refine_weights,
max_leaf_refine_weight=max_leaf_refine_weight,
debug=True, **kwargs)
tree = tree.get(queue=actx.queue)
sorted_particles = np.array(list(tree.sources))
unsorted_particles = np.array([pi.get() for pi in particles])
assert (sorted_particles
== unsorted_particles[:, tree.user_source_ids]).all()
unsorted_particles = np.array([actx.to_numpy(pi) for pi in particles])
assert np.all(sorted_particles
== unsorted_particles[:, tree.user_source_ids])
if refine_weights is not None:
refine_weights_reordered = (
actx.to_numpy(refine_weights)[tree.user_source_ids])
all_good_so_far = True
if do_plot:
if visualize:
from boxtree.visualization import TreePlotter
plotter = TreePlotter(tree)
plotter.draw_tree(fill=False, edgecolor="black", zorder=10)
......@@ -136,19 +142,36 @@ def run_build_test(builder, queue, dims, dtype, nparticles, do_plot,
scaled_tol = tol*tree.root_extent
for ibox in range(tree.nboxes):
# Empty boxes exist in non-pruned trees--which themselves are undocumented.
# These boxes will fail these tests.
if not (tree.box_flags[ibox] & bfe.HAS_OWN_SRCNTGTS):
if not (tree.box_flags[ibox] & bfe.IS_SOURCE_OR_TARGET_BOX):
continue
extent_low, extent_high = tree.get_box_extent(ibox)
assert (extent_low >= tree.bounding_box[0] - scaled_tol).all(), (
assert np.all(extent_low >= tree.bounding_box[0] - scaled_tol), (
ibox, extent_low, tree.bounding_box[0])
assert (extent_high <= tree.bounding_box[1] + scaled_tol).all(), (
assert np.all(extent_high <= tree.bounding_box[1] + scaled_tol), (
ibox, extent_high, tree.bounding_box[1])
center = tree.box_centers[:, ibox]
for _, bbox_min, bbox_max in [
(
"source",
tree.box_source_bounding_box_min[:, ibox],
tree.box_source_bounding_box_max[:, ibox]),
(
"target",
tree.box_target_bounding_box_min[:, ibox],
tree.box_target_bounding_box_max[:, ibox]),
]:
assert np.all(extent_low - scaled_tol <= bbox_min)
assert np.all(bbox_min - scaled_tol <= center)
assert np.all(bbox_max - scaled_tol <= extent_high)
assert np.all(center - scaled_tol <= bbox_max)
start = tree.box_source_starts[ibox]
box_children = tree.box_child_ids[:, ibox]
......@@ -162,12 +185,10 @@ def run_build_test(builder, queue, dims, dtype, nparticles, do_plot,
start:start+tree.box_source_counts_cumul[ibox]]
good = (
(box_particles < extent_high[:, np.newaxis] + scaled_tol)
&
(extent_low[:, np.newaxis] - scaled_tol <= box_particles)
)
& (extent_low[:, np.newaxis] - scaled_tol <= box_particles))
all_good_here = good.all()
if do_plot and not all_good_here and all_good_so_far:
all_good_here = np.all(good)
if visualize and not all_good_here and all_good_so_far:
pt.plot(
box_particles[0, np.where(~good)[1]],
box_particles[1, np.where(~good)[1]], "ro")
......@@ -177,9 +198,26 @@ def run_build_test(builder, queue, dims, dtype, nparticles, do_plot,
if not all_good_here:
print("BAD BOX", ibox)
if not (tree.box_flags[ibox] & bfe.HAS_SOURCE_OR_TARGET_CHILD_BOXES):
# Check that leaf particle density is as promised.
nparticles_in_box = tree.box_source_counts_cumul[ibox]
if max_particles_in_box is not None:
if nparticles_in_box > max_particles_in_box:
print("too many particles "
f"({nparticles_in_box} > {max_particles_in_box}); box {ibox}")
all_good_here = False
else:
assert refine_weights is not None
box_weight = np.sum(
refine_weights_reordered[start:start+nparticles_in_box]) # pylint: disable=possibly-used-before-assignment
if box_weight > max_leaf_refine_weight:
print("refine weight exceeded "
f"({box_weight} > {max_leaf_refine_weight}); box {ibox}")
all_good_here = False
all_good_so_far = all_good_so_far and all_good_here
if do_plot:
if visualize:
pt.gca().set_aspect("equal", "datalim")
pt.show()
......@@ -191,97 +229,107 @@ def particle_tree_test_decorator(f):
f = pytest.mark.parametrize("dtype", [np.float64, np.float32])(f)
f = pytest.mark.parametrize("dims", [2, 3])(f)
def wrapper(*args, **kwargs):
logging.basicConfig(level=logging.INFO)
f(*args, **kwargs)
return f
@particle_tree_test_decorator
def test_single_boxparticle_tree(ctx_getter, dtype, dims, do_plot=False):
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
def test_single_box_particle_tree(actx_factory, dtype, dims, visualize=False):
actx = actx_factory()
from boxtree import TreeBuilder
builder = TreeBuilder(ctx)
builder = TreeBuilder(actx.context)
run_build_test(builder, queue, dims,
dtype, 4, do_plot=do_plot)
run_build_test(builder, actx, dims,
dtype, 4, max_particles_in_box=30, visualize=visualize)
@particle_tree_test_decorator
def test_two_level_particle_tree(ctx_getter, dtype, dims, do_plot=False):
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
def test_two_level_particle_tree(actx_factory, dtype, dims, visualize=False):
actx = actx_factory()
from boxtree import TreeBuilder
builder = TreeBuilder(ctx)
builder = TreeBuilder(actx.context)
run_build_test(builder, queue, dims,
dtype, 50, do_plot=do_plot)
run_build_test(builder, actx, dims,
dtype, 50, max_particles_in_box=30, visualize=visualize)
@particle_tree_test_decorator
def test_unpruned_particle_tree(ctx_getter, dtype, dims, do_plot=False):
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
def test_unpruned_particle_tree(actx_factory, dtype, dims, visualize=False):
actx = actx_factory()
from boxtree import TreeBuilder
builder = TreeBuilder(ctx)
builder = TreeBuilder(actx.context)
# test unpruned tree build
run_build_test(builder, queue, dims, dtype, 10**5,
do_plot=do_plot, skip_prune=True)
run_build_test(builder, actx, dims, dtype, 10**5,
visualize=visualize, max_particles_in_box=30, skip_prune=True)
@particle_tree_test_decorator
def test_particle_tree_with_reallocations(ctx_getter, dtype, dims, do_plot=False):
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
def test_particle_tree_with_reallocations(
actx_factory, dtype, dims, visualize=False):
actx = actx_factory()
from boxtree import TreeBuilder
builder = TreeBuilder(ctx)
builder = TreeBuilder(actx.context)
run_build_test(builder, queue, dims, dtype, 10**5,
do_plot=do_plot, nboxes_guess=5)
run_build_test(builder, actx, dims, dtype, 10**5,
max_particles_in_box=30, visualize=visualize, nboxes_guess=5)
@particle_tree_test_decorator
def test_particle_tree_with_many_empty_leaves(
ctx_getter, dtype, dims, do_plot=False):
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
actx_factory, dtype, dims, visualize=False):
actx = actx_factory()
from boxtree import TreeBuilder
builder = TreeBuilder(actx.context)
run_build_test(builder, actx, dims, dtype, 10**5,
max_particles_in_box=5, visualize=visualize)
@particle_tree_test_decorator
def test_vanilla_particle_tree(actx_factory, dtype, dims, visualize=False):
actx = actx_factory()
from boxtree import TreeBuilder
builder = TreeBuilder(ctx)
builder = TreeBuilder(actx.context)
run_build_test(builder, queue, dims, dtype, 10**5,
do_plot=do_plot, max_particles_in_box=5)
run_build_test(builder, actx, dims, dtype, 10**5,
max_particles_in_box=30, visualize=visualize)
@particle_tree_test_decorator
def test_vanilla_particle_tree(ctx_getter, dtype, dims, do_plot=False):
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
def test_explicit_refine_weights_particle_tree(
actx_factory, dtype, dims, visualize=False):
actx = actx_factory()
from boxtree import TreeBuilder
builder = TreeBuilder(ctx)
builder = TreeBuilder(actx.context)
run_build_test(builder, queue, dims, dtype, 10**5,
do_plot=do_plot)
nparticles = 10**5
rng = np.random.default_rng(10)
refine_weights = actx.from_numpy(
rng.integers(1, 10, (nparticles,), dtype=np.int32)
)
run_build_test(builder, actx, dims, dtype, nparticles,
refine_weights=refine_weights, max_leaf_refine_weight=100,
visualize=visualize)
@particle_tree_test_decorator
def test_non_adaptive_particle_tree(ctx_getter, dtype, dims, do_plot=False):
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
def test_non_adaptive_particle_tree(actx_factory, dtype, dims, visualize=False):
actx = actx_factory()
from boxtree import TreeBuilder
builder = TreeBuilder(ctx)
builder = TreeBuilder(actx.context)
run_build_test(builder, queue, dims, dtype, 10**4,
do_plot=do_plot, non_adaptive=True)
run_build_test(builder, actx, dims, dtype, 10**4,
max_particles_in_box=30, visualize=visualize, kind="non-adaptive")
# }}}
......@@ -290,63 +338,63 @@ def test_non_adaptive_particle_tree(ctx_getter, dtype, dims, do_plot=False):
@pytest.mark.opencl
@pytest.mark.parametrize("dims", [2, 3])
def test_source_target_tree(ctx_getter, dims, do_plot=False):
logging.basicConfig(level=logging.INFO)
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
def test_source_target_tree(actx_factory, dims, visualize=False):
actx = actx_factory()
nsources = 2 * 10**5
ntargets = 3 * 10**5
dtype = np.float64
sources = make_normal_particle_array(queue, nsources, dims, dtype,
sources = make_normal_particle_array(actx.queue, nsources, dims, dtype,
seed=12)
targets = make_normal_particle_array(queue, ntargets, dims, dtype,
targets = make_normal_particle_array(actx.queue, ntargets, dims, dtype,
seed=19)
if do_plot:
if visualize:
import matplotlib.pyplot as pt
pt.plot(sources[0].get(), sources[1].get(), "rx")
pt.plot(targets[0].get(), targets[1].get(), "g+")
np_sources, np_targets = actx.to_numpy(sources), actx.to_numpy(targets)
pt.plot(np_sources[0], np_sources[1], "rx")
pt.plot(np_targets[0], np_targets[1], "g+")
pt.show()
from boxtree import TreeBuilder
tb = TreeBuilder(ctx)
tb = TreeBuilder(actx.context)
queue.finish()
tree, _ = tb(queue, sources, targets=targets,
actx.queue.finish()
tree, _ = tb(actx.queue, sources, targets=targets,
max_particles_in_box=10, debug=True)
tree = tree.get(queue=queue)
tree = tree.get(queue=actx.queue)
sorted_sources = np.array(list(tree.sources))
sorted_targets = np.array(list(tree.targets))
unsorted_sources = np.array([pi.get() for pi in sources])
unsorted_targets = np.array([pi.get() for pi in targets])
assert (sorted_sources
== unsorted_sources[:, tree.user_source_ids]).all()
unsorted_sources = np.array([actx.to_numpy(pi) for pi in sources])
unsorted_targets = np.array([actx.to_numpy(pi) for pi in targets])
assert np.all(sorted_sources
== unsorted_sources[:, tree.user_source_ids])
user_target_ids = np.empty(tree.ntargets, dtype=np.intp)
user_target_ids[tree.sorted_target_ids] = np.arange(tree.ntargets, dtype=np.intp)
assert (sorted_targets
== unsorted_targets[:, user_target_ids]).all()
assert np.all(sorted_targets
== unsorted_targets[:, user_target_ids])
all_good_so_far = True
if do_plot:
if visualize:
from boxtree.visualization import TreePlotter
plotter = TreePlotter(tree)
plotter.draw_tree(fill=False, edgecolor="black", zorder=10)
plotter.set_bounding_box()
tol = 1e-15
for ibox in range(tree.nboxes):
extent_low, extent_high = tree.get_box_extent(ibox)
assert (extent_low >=
tree.bounding_box[0] - 1e-12*tree.root_extent).all(), ibox
assert (extent_high <=
tree.bounding_box[1] + 1e-12*tree.root_extent).all(), ibox
assert np.all(extent_low
>= tree.bounding_box[0] - 1e-12*tree.root_extent), ibox
assert np.all(extent_high
<= tree.bounding_box[1] + 1e-12*tree.root_extent), ibox
src_start = tree.box_source_starts[ibox]
tgt_start = tree.box_target_starts[ibox]
......@@ -367,14 +415,14 @@ def test_source_target_tree(ctx_getter, dims, do_plot=False):
("targets", sorted_targets[:,
tgt_start:tgt_start+tree.box_target_counts_cumul[ibox]]),
]:
good = (
(particles < extent_high[:, np.newaxis])
&
(extent_low[:, np.newaxis] <= particles)
).all(axis=0)
all_good_here = good.all()
if do_plot and not all_good_here:
good = np.all(
(particles < extent_high[:, np.newaxis] + tol)
& (extent_low[:, np.newaxis] - tol <= particles),
axis=0)
all_good_here = np.all(good)
if visualize and not all_good_here:
pt.plot(
particles[0, np.where(~good)[0]],
particles[1, np.where(~good)[0]], "ro")
......@@ -382,13 +430,14 @@ def test_source_target_tree(ctx_getter, dims, do_plot=False):
plotter.draw_box(ibox, edgecolor="red")
pt.show()
if not all_good_here:
print("BAD BOX %s %d" % (what, ibox))
if not all_good_here:
print(f"BAD BOX {what} {ibox}")
all_good_so_far = all_good_so_far and all_good_here
all_good_so_far = all_good_so_far and all_good_here
assert all_good_so_far
if do_plot:
if visualize:
pt.gca().set_aspect("equal", "datalim")
pt.show()
......@@ -399,54 +448,84 @@ def test_source_target_tree(ctx_getter, dims, do_plot=False):
@pytest.mark.opencl
@pytest.mark.parametrize("dims", [2, 3])
def test_extent_tree(ctx_getter, dims, do_plot=False):
logging.basicConfig(level=logging.INFO)
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
@pytest.mark.parametrize("extent_norm", ["linf", "l2"])
def test_extent_tree(actx_factory, dims, extent_norm, visualize=False):
actx = actx_factory()
nsources = 100000
ntargets = 200000
dtype = np.float64
npoint_sources_per_source = 16
sources = make_normal_particle_array(queue, nsources, dims, dtype,
sources = make_normal_particle_array(actx.queue, nsources, dims, dtype,
seed=12)
targets = make_normal_particle_array(queue, ntargets, dims, dtype,
targets = make_normal_particle_array(actx.queue, ntargets, dims, dtype,
seed=19)
from pyopencl.clrandom import RanluxGenerator
rng = RanluxGenerator(queue, seed=13)
source_radii = 2**rng.uniform(queue, nsources, dtype=dtype,
a=-10, b=0)
target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype,
a=-10, b=0)
refine_weights = actx.np.zeros(nsources + ntargets, np.int32)
refine_weights[:nsources] = 1
rng = np.random.default_rng(13)
source_radii = actx.from_numpy(
2**rng.uniform(-10, 0, (nsources,)).astype(dtype)
)
target_radii = actx.from_numpy(
2**rng.uniform(-10, 0, (ntargets,)).astype(dtype)
)
from boxtree import TreeBuilder
tb = TreeBuilder(ctx)
tb = TreeBuilder(actx.context)
queue.finish()
dev_tree, _ = tb(queue, sources, targets=targets,
source_radii=source_radii, target_radii=target_radii,
max_particles_in_box=10, debug=True)
actx.queue.finish()
dev_tree, _ = tb(actx.queue, sources, targets=targets,
source_radii=source_radii,
target_radii=target_radii,
extent_norm=extent_norm,
refine_weights=refine_weights,
max_leaf_refine_weight=20,
# max_particles_in_box=10,
# Set artificially small, to exercise the reallocation code.
nboxes_guess=10,
debug=True,
stick_out_factor=0)
logger.info("transfer tree, check orderings")
tree = dev_tree.get(queue=queue)
tree = dev_tree.get(queue=actx.queue)
if visualize:
import matplotlib.pyplot as pt
np_sources, np_targets = actx.to_numpy(sources), actx.to_numpy(targets)
pt.plot(np_sources[0], np_sources[1], "rx")
pt.plot(np_targets[0], np_targets[1], "g+")
from boxtree.visualization import TreePlotter
plotter = TreePlotter(tree)
plotter.draw_tree(fill=False, edgecolor="black", zorder=10)
plotter.draw_box_numbers()
plotter.set_bounding_box()
pt.gca().set_aspect("equal", "datalim")
pt.show()
sorted_sources = np.array(list(tree.sources))
sorted_targets = np.array(list(tree.targets))
sorted_source_radii = tree.source_radii
sorted_target_radii = tree.target_radii
unsorted_sources = np.array([pi.get() for pi in sources])
unsorted_targets = np.array([pi.get() for pi in targets])
unsorted_source_radii = source_radii.get()
unsorted_target_radii = target_radii.get()
assert (sorted_sources
== unsorted_sources[:, tree.user_source_ids]).all()
assert (sorted_source_radii
== unsorted_source_radii[tree.user_source_ids]).all()
unsorted_sources = np.array([actx.to_numpy(pi) for pi in sources])
unsorted_targets = np.array([actx.to_numpy(pi) for pi in targets])
unsorted_source_radii = actx.to_numpy(source_radii)
unsorted_target_radii = actx.to_numpy(target_radii)
assert np.all(sorted_sources
== unsorted_sources[:, tree.user_source_ids])
assert np.all(sorted_source_radii
== unsorted_source_radii[tree.user_source_ids])
# {{{ test box structure, stick-out criterion
......@@ -455,25 +534,36 @@ def test_extent_tree(ctx_getter, dims, do_plot=False):
user_target_ids = np.empty(tree.ntargets, dtype=np.intp)
user_target_ids[tree.sorted_target_ids] = np.arange(tree.ntargets, dtype=np.intp)
if ntargets:
assert (sorted_targets
== unsorted_targets[:, user_target_ids]).all()
assert (sorted_target_radii
== unsorted_target_radii[user_target_ids]).all()
assert np.all(sorted_targets
== unsorted_targets[:, user_target_ids])
assert np.all(sorted_target_radii
== unsorted_target_radii[user_target_ids])
all_good_so_far = True
# {{{ check sources, targets
assert np.sum(tree.box_source_counts_nonchild) == nsources
assert np.sum(tree.box_target_counts_nonchild) == ntargets
for ibox in range(tree.nboxes):
extent_low, extent_high = tree.get_box_extent(ibox)
kid_sum = sum(
tree.box_target_counts_cumul[ichild_box]
for ichild_box in tree.box_child_ids[:, ibox]
if ichild_box != 0)
assert (
tree.box_target_counts_cumul[ibox]
== (
tree.box_target_counts_nonchild[ibox]
+ kid_sum)), ibox
box_radius = np.max(extent_high-extent_low) * 0.5
stick_out_dist = tree.stick_out_factor * box_radius
for ibox in range(tree.nboxes):
extent_low, extent_high = tree.get_box_extent(ibox)
assert (extent_low >=
tree.bounding_box[0] - 1e-12*tree.root_extent).all(), ibox
assert (extent_high <=
tree.bounding_box[1] + 1e-12*tree.root_extent).all(), ibox
assert np.all(extent_low
>= tree.bounding_box[0] - 1e-12*tree.root_extent), ibox
assert np.all(extent_high
<= tree.bounding_box[1] + 1e-12*tree.root_extent), ibox
box_children = tree.box_child_ids[:, ibox]
existing_children = box_children[box_children != 0]
......@@ -485,6 +575,19 @@ def test_extent_tree(ctx_getter, dims, do_plot=False):
+ np.sum(tree.box_target_counts_cumul[existing_children])
== tree.box_target_counts_cumul[ibox])
del existing_children
del box_children
for ibox in range(tree.nboxes):
lev = int(tree.box_levels[ibox])
box_radius = 0.5 * tree.root_extent / (1 << lev)
box_center = tree.box_centers[:, ibox]
extent_low = box_center - box_radius
extent_high = box_center + box_radius
stick_out_dist = tree.stick_out_factor * box_radius
radius_with_stickout = (1 + tree.stick_out_factor) * box_radius
for what, starts, counts, points, radii in [
("source", tree.box_source_starts, tree.box_source_counts_cumul,
sorted_sources, sorted_source_radii),
......@@ -496,18 +599,32 @@ def test_extent_tree(ctx_getter, dims, do_plot=False):
check_particles = points[:, bslice]
check_radii = radii[bslice]
good = (
(check_particles + check_radii
< extent_high[:, np.newaxis] + stick_out_dist)
&
(extent_low[:, np.newaxis] - stick_out_dist
<= check_particles - check_radii)
).all(axis=0)
if extent_norm == "linf":
good = np.all(
(check_particles + check_radii
< extent_high[:, np.newaxis] + stick_out_dist)
&
(extent_low[:, np.newaxis] - stick_out_dist
<= check_particles - check_radii),
axis=0)
elif extent_norm == "l2":
center_dists = np.sqrt(
np.sum(
(check_particles - box_center.reshape(-1, 1))**2,
axis=0))
all_good_here = good.all()
good = (
(center_dists + check_radii)**2
< dims * radius_with_stickout**2)
else:
raise ValueError(f"unexpected value of extent_norm: '{extent_norm}'")
all_good_here = np.all(good)
if not all_good_here:
print("BAD BOX %s %d level %d" % (what, ibox, tree.box_levels[ibox]))
print(f"BAD BOX {what} {ibox} level {tree.box_levels[ibox]}")
all_good_so_far = all_good_so_far and all_good_here
assert all_good_here
......@@ -522,24 +639,25 @@ def test_extent_tree(ctx_getter, dims, do_plot=False):
logger.info("creating point sources")
np.random.seed(20)
from pytools.obj_array import make_obj_array
point_sources = make_obj_array([
cl.array.to_device(queue,
actx.from_numpy(
unsorted_sources[i][:, np.newaxis]
+ unsorted_source_radii[:, np.newaxis]
* np.random.uniform(
-1, 1, size=(nsources, npoint_sources_per_source))
)
* rng.uniform(-1, 1, size=(nsources, npoint_sources_per_source))
)
for i in range(dims)])
point_source_starts = cl.array.arange(queue,
0, (nsources+1)*npoint_sources_per_source, npoint_sources_per_source,
dtype=tree.particle_id_dtype)
point_source_starts = actx.from_numpy(
np.arange(
0,
(nsources + 1) * npoint_sources_per_source,
npoint_sources_per_source,
dtype=tree.particle_id_dtype)
)
from boxtree.tree import link_point_sources
dev_tree = link_point_sources(queue, dev_tree,
dev_tree = link_point_sources(actx.queue, dev_tree,
point_source_starts, point_sources,
debug=True)
......@@ -548,52 +666,52 @@ def test_extent_tree(ctx_getter, dims, do_plot=False):
# }}}
# {{{ geometry query test
# {{{ leaves to balls query test
@pytest.mark.opencl
@pytest.mark.geo_lookup
@pytest.mark.parametrize("dims", [2, 3])
def test_geometry_query(ctx_getter, dims, do_plot=False):
logging.basicConfig(level=logging.INFO)
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
def test_leaves_to_balls_query(actx_factory, dims, visualize=False):
actx = actx_factory()
nparticles = 10**5
dtype = np.float64
particles = make_normal_particle_array(queue, nparticles, dims, dtype)
particles = make_normal_particle_array(actx.queue, nparticles, dims, dtype)
if do_plot:
if visualize:
import matplotlib.pyplot as pt
pt.plot(particles[0].get(), particles[1].get(), "x")
np_particles = actx.to_numpy(particles)
pt.plot(np_particles[0], np_particles[1], "x")
from boxtree import TreeBuilder
tb = TreeBuilder(ctx)
tb = TreeBuilder(actx.context)
queue.finish()
tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True)
actx.queue.finish()
tree, _ = tb(actx.queue, particles, max_particles_in_box=30, debug=True)
nballs = 10**4
ball_centers = make_normal_particle_array(queue, nballs, dims, dtype)
ball_radii = cl.array.empty(queue, nballs, dtype).fill(0.1)
ball_centers = make_normal_particle_array(actx.queue, nballs, dims, dtype)
ball_radii = 0.1 + actx.np.zeros(nballs, dtype)
from boxtree.geo_lookup import LeavesToBallsLookupBuilder
lblb = LeavesToBallsLookupBuilder(ctx)
from boxtree.area_query import LeavesToBallsLookupBuilder
lblb = LeavesToBallsLookupBuilder(actx.context)
lbl, _ = lblb(queue, tree, ball_centers, ball_radii)
lbl, _ = lblb(actx.queue, tree, ball_centers, ball_radii)
# get data to host for test
tree = tree.get(queue=queue)
lbl = lbl.get(queue=queue)
ball_centers = np.array([x.get() for x in ball_centers]).T
ball_radii = ball_radii.get()
tree = tree.get(queue=actx.queue)
lbl = lbl.get(queue=actx.queue)
ball_centers = np.array([actx.to_numpy(x) for x in ball_centers]).T
ball_radii = actx.to_numpy(ball_radii)
assert len(lbl.balls_near_box_starts) == tree.nboxes + 1
from boxtree import box_flags_enum
for ibox in range(tree.nboxes):
# We only want leaves here.
if tree.box_flags[ibox] & box_flags_enum.HAS_CHILDREN:
if tree.box_flags[ibox] & box_flags_enum.HAS_SOURCE_OR_TARGET_CHILD_BOXES:
continue
box_center = tree.box_centers[:, ibox]
......@@ -604,8 +722,6 @@ def test_geometry_query(ctx_getter, dims, do_plot=False):
near_circles, = np.where(linf_circle_dists - ball_radii < box_rad)
start, end = lbl.balls_near_box_starts[ibox:ibox+2]
#print sorted(lbl.balls_near_box_lists[start:end])
#print sorted(near_circles)
assert sorted(lbl.balls_near_box_lists[start:end]) == sorted(near_circles)
# }}}
......@@ -613,70 +729,402 @@ def test_geometry_query(ctx_getter, dims, do_plot=False):
# {{{ area query test
def run_area_query_test(actx, tree, ball_centers, ball_radii):
"""
Performs an area query and checks that the result is as expected.
"""
from boxtree.area_query import AreaQueryBuilder
aqb = AreaQueryBuilder(actx.context)
area_query, _ = aqb(actx.queue, tree, ball_centers, ball_radii)
# Get data to host for test.
tree = tree.get(queue=actx.queue)
area_query = area_query.get(queue=actx.queue)
ball_centers = np.array([actx.to_numpy(x) for x in ball_centers]).T
ball_radii = actx.to_numpy(ball_radii)
from boxtree import box_flags_enum
leaf_boxes, = (
tree.box_flags & box_flags_enum.HAS_SOURCE_OR_TARGET_CHILD_BOXES == 0
).nonzero()
leaf_box_radii = np.empty(len(leaf_boxes))
dims = len(tree.sources)
leaf_box_centers = np.empty((len(leaf_boxes), dims))
for idx, leaf_box in enumerate(leaf_boxes):
box_center = tree.box_centers[:, leaf_box]
ext_l, ext_h = tree.get_box_extent(leaf_box)
leaf_box_radii[idx] = np.max(ext_h - ext_l) * 0.5
leaf_box_centers[idx] = box_center
for ball_nr, (ball_center, ball_radius) \
in enumerate(zip(ball_centers, ball_radii, strict=True)):
linf_box_dists = np.max(np.abs(ball_center - leaf_box_centers), axis=-1)
near_leaves_indices, \
= np.where(linf_box_dists < ball_radius + leaf_box_radii)
near_leaves = leaf_boxes[near_leaves_indices]
start, end = area_query.leaves_near_ball_starts[ball_nr:ball_nr+2]
found = area_query.leaves_near_ball_lists[start:end]
actual = near_leaves
assert set(found) == set(actual), (found, actual)
@pytest.mark.opencl
@pytest.mark.area_query
@pytest.mark.parametrize("dims", [2, 3])
def test_area_query(ctx_getter, dims, do_plot=False):
logging.basicConfig(level=logging.INFO)
def test_area_query(actx_factory, dims, visualize=False):
actx = actx_factory()
nparticles = 10**5
dtype = np.float64
particles = make_normal_particle_array(actx.queue, nparticles, dims, dtype)
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
if visualize:
import matplotlib.pyplot as pt
np_particles = actx.to_numpy(particles)
pt.plot(np_particles[0], np_particles[1], "x")
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
actx.queue.finish()
tree, _ = tb(actx.queue, particles, max_particles_in_box=30, debug=True)
nballs = 10**4
ball_centers = make_normal_particle_array(actx.queue, nballs, dims, dtype)
ball_radii = 0.1 + actx.np.zeros(nballs, dtype)
run_area_query_test(actx, tree, ball_centers, ball_radii)
@pytest.mark.opencl
@pytest.mark.area_query
@pytest.mark.parametrize("dims", [2, 3])
def test_area_query_balls_outside_bbox(actx_factory, dims, visualize=False):
"""
The input to the area query includes balls whose centers are not within
the tree bounding box.
"""
actx = actx_factory()
nparticles = 10**4
dtype = np.float64
particles = make_normal_particle_array(actx.queue, nparticles, dims, dtype)
if visualize:
import matplotlib.pyplot as pt
np_particles = actx.to_numpy(particles)
pt.plot(np_particles[0], np_particles[1], "x")
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
actx.queue.finish()
tree, _ = tb(actx.queue, particles, max_particles_in_box=30, debug=True)
nballs = 10**4
bbox_min = tree.bounding_box[0].min()
bbox_max = tree.bounding_box[1].max()
from pytools.obj_array import make_obj_array
rng = np.random.default_rng(13)
ball_centers = make_obj_array([
actx.from_numpy(
rng.uniform(bbox_min - 1, bbox_max + 1, nballs).astype(dtype))
for i in range(dims)])
ball_radii = 0.1 + actx.np.zeros(nballs, dtype)
run_area_query_test(actx, tree, ball_centers, ball_radii)
@pytest.mark.opencl
@pytest.mark.area_query
@pytest.mark.parametrize("dims", [2, 3])
def test_area_query_elwise(actx_factory, dims, visualize=False):
actx = actx_factory()
nparticles = 10**5
dtype = np.float64
particles = make_normal_particle_array(queue, nparticles, dims, dtype)
particles = make_normal_particle_array(actx.queue, nparticles, dims, dtype)
if do_plot:
if visualize:
import matplotlib.pyplot as pt
pt.plot(particles[0].get(), particles[1].get(), "x")
np_particles = actx.to_numpy(particles)
pt.plot(np_particles[0], np_particles[1], "x")
from boxtree import TreeBuilder
tb = TreeBuilder(ctx)
tb = TreeBuilder(actx.context)
queue.finish()
tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True)
actx.queue.finish()
tree, _ = tb(actx.queue, particles, max_particles_in_box=30, debug=True)
nballs = 10**4
ball_centers = make_normal_particle_array(queue, nballs, dims, dtype)
ball_radii = cl.array.empty(queue, nballs, dtype).fill(0.1)
ball_centers = make_normal_particle_array(actx.queue, nballs, dims, dtype)
ball_radii = 0.1 + actx.np.zeros(nballs, dtype)
from boxtree.area_query import AreaQueryElementwiseTemplate, PeerListFinder
template = AreaQueryElementwiseTemplate(
extra_args="""
coord_t *ball_radii,
%for ax in AXIS_NAMES[:dimensions]:
coord_t *ball_${ax},
%endfor
""",
ball_center_and_radius_expr="""
%for ax in AXIS_NAMES[:dimensions]:
${ball_center}.${ax} = ball_${ax}[${i}];
%endfor
${ball_radius} = ball_radii[${i}];
""",
leaf_found_op="")
peer_lists, evt = PeerListFinder(actx.context)(actx.queue, tree)
kernel = template.generate(
actx.context,
dims,
tree.coord_dtype,
tree.box_id_dtype,
peer_lists.peer_list_starts.dtype,
tree.nlevels)
evt = kernel(
*template.unwrap_args(
tree, peer_lists, ball_radii, *ball_centers),
queue=actx.queue,
wait_for=[evt],
range=slice(len(ball_radii)))
from boxtree.area_query import AreaQueryBuilder
aqb = AreaQueryBuilder(ctx)
# }}}
# {{{ level restriction test
area_query, _ = aqb(queue, tree, ball_centers, ball_radii)
@pytest.mark.opencl
@pytest.mark.parametrize("lookbehind", [0, 1])
@pytest.mark.parametrize("skip_prune", [True, False])
@pytest.mark.parametrize("dims", [2, 3])
def test_level_restriction(
actx_factory, dims, skip_prune, lookbehind, visualize=False):
actx = actx_factory()
nparticles = 10**5
dtype = np.float64
from boxtree.tools import make_surface_particle_array
particles = make_surface_particle_array(
actx.queue, nparticles, dims, dtype, seed=15)
if visualize:
import matplotlib.pyplot as pt
np_particles = actx.to_numpy(particles)
pt.plot(np_particles[0], np_particles[1], "x")
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
actx.queue.finish()
tree_dev, _ = tb(actx.queue, particles,
kind="adaptive-level-restricted",
max_particles_in_box=30, debug=True,
skip_prune=skip_prune, lr_lookbehind=lookbehind,
# Artificially low to exercise reallocation code
nboxes_guess=10)
def find_neighbors(leaf_box_centers, leaf_box_radii):
# We use an area query with a ball that is slightly larger than
# the size of a leaf box to find the neighboring leaves.
#
# Note that since this comes from an area query, the self box will be
# included in the neighbor list.
from boxtree.area_query import AreaQueryBuilder
aqb = AreaQueryBuilder(actx.context)
ball_radii = actx.from_numpy(np.min(leaf_box_radii) / 2 + leaf_box_radii)
leaf_box_centers = [actx.from_numpy(axis) for axis in leaf_box_centers]
area_query, _ = aqb(actx.queue, tree_dev, leaf_box_centers, ball_radii)
area_query = area_query.get(queue=actx.queue)
return (area_query.leaves_near_ball_starts,
area_query.leaves_near_ball_lists)
# Get data to host for test.
tree = tree.get(queue=queue)
area_query = area_query.get(queue=queue)
ball_centers = np.array([x.get() for x in ball_centers]).T
ball_radii = ball_radii.get()
tree = tree_dev.get(queue=actx.queue)
# Find leaf boxes.
from boxtree import box_flags_enum
leaf_boxes = np.array([ibox for ibox in range(tree.nboxes)
if tree.box_flags[ibox] & ~box_flags_enum.HAS_CHILDREN])
leaf_boxes, = (
tree.box_flags & box_flags_enum.HAS_SOURCE_OR_TARGET_CHILD_BOXES == 0
).nonzero()
leaf_box_radii = np.empty(len(leaf_boxes))
leaf_box_centers = np.empty((len(leaf_boxes), dims))
leaf_box_centers = np.empty((dims, len(leaf_boxes)))
for idx, leaf_box in enumerate(leaf_boxes):
box_center = tree.box_centers[:, leaf_box]
ext_l, ext_h = tree.get_box_extent(leaf_box)
leaf_box_radii[idx] = np.max(ext_h - ext_l) * 0.5
leaf_box_centers[idx] = box_center
leaf_box_centers[:, idx] = box_center
for ball_nr, (ball_center, ball_radius) \
in enumerate(zip(ball_centers, ball_radii)):
linf_box_dists = np.max(np.abs(ball_center - leaf_box_centers), axis=-1)
near_leaves_indices, \
= np.where(linf_box_dists < ball_radius + leaf_box_radii)
near_leaves = leaf_boxes[near_leaves_indices]
neighbor_starts, neighbor_and_self_lists = find_neighbors(
leaf_box_centers, leaf_box_radii)
start, end = area_query.leaves_near_ball_starts[ball_nr:ball_nr+2]
found = area_query.leaves_near_ball_lists[start:end]
actual = near_leaves
assert sorted(found) == sorted(actual)
# Check level restriction.
for leaf_idx, leaf in enumerate(leaf_boxes):
neighbors = neighbor_and_self_lists[
neighbor_starts[leaf_idx]:neighbor_starts[leaf_idx+1]]
neighbor_levels = np.array(tree.box_levels[neighbors], dtype=int)
leaf_level = int(tree.box_levels[leaf])
assert np.all(np.abs(neighbor_levels - leaf_level) <= 1), \
(neighbor_levels, leaf_level)
# }}}
# {{{ space invader query test
@pytest.mark.opencl
@pytest.mark.geo_lookup
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
@pytest.mark.parametrize("dims", [2, 3])
def test_space_invader_query(actx_factory, dims, dtype, visualize=False):
actx = actx_factory()
dtype = np.dtype(dtype)
nparticles = 10**5
particles = make_normal_particle_array(actx.queue, nparticles, dims, dtype)
if visualize:
import matplotlib.pyplot as pt
np_particles = actx.to_numpy(particles)
pt.plot(np_particles[0], np_particles[1], "x")
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
actx.queue.finish()
tree, _ = tb(actx.queue, particles, max_particles_in_box=30, debug=True)
nballs = 10**4
ball_centers = make_normal_particle_array(actx.queue, nballs, dims, dtype)
ball_radii = 0.1 + actx.np.zeros(nballs, dtype)
from boxtree.area_query import LeavesToBallsLookupBuilder, SpaceInvaderQueryBuilder
siqb = SpaceInvaderQueryBuilder(actx.context)
# We can use leaves-to-balls lookup to get the set of overlapping balls for
# each box, and from there to compute the outer space invader distance.
lblb = LeavesToBallsLookupBuilder(actx.context)
siq, _ = siqb(actx.queue, tree, ball_centers, ball_radii)
lbl, _ = lblb(actx.queue, tree, ball_centers, ball_radii)
# get data to host for test
tree = tree.get(queue=actx.queue)
siq = siq.get(queue=actx.queue)
lbl = lbl.get(queue=actx.queue)
ball_centers = np.array([actx.to_numpy(x) for x in ball_centers])
ball_radii = actx.to_numpy(ball_radii)
# Find leaf boxes.
from boxtree import box_flags_enum
outer_space_invader_dist = np.zeros(tree.nboxes)
for ibox in range(tree.nboxes):
# We only want leaves here.
if tree.box_flags[ibox] & box_flags_enum.HAS_SOURCE_OR_TARGET_CHILD_BOXES:
continue
start, end = lbl.balls_near_box_starts[ibox:ibox + 2]
space_invaders = lbl.balls_near_box_lists[start:end]
if len(space_invaders) > 0:
outer_space_invader_dist[ibox] = np.max(np.abs(
tree.box_centers[:, ibox].reshape((-1, 1))
- ball_centers[:, space_invaders]))
assert np.allclose(siq, outer_space_invader_dist)
# }}}
# {{{ test_same_tree_with_zero_weight_particles
@pytest.mark.opencl
@pytest.mark.parametrize("dims", [2, 3])
def test_same_tree_with_zero_weight_particles(actx_factory, dims):
actx = actx_factory()
ntargets_values = [300, 400, 500]
stick_out_factors = [0, 0.1, 0.3, 1]
nsources = 20
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
trees = []
rng = np.random.default_rng(10)
for stick_out_factor in stick_out_factors:
for ntargets in [40]:
sources = rng.random((dims, nsources))**2
sources[:, 0] = -0.1
sources[:, 1] = 1.1
targets = rng.random((dims, max(ntargets_values)))[:, :ntargets].copy()
target_radii = rng.random(max(ntargets_values))[:ntargets]
sources = actx.from_numpy(sources)
targets = actx.from_numpy(targets)
refine_weights = actx.np.zeros(nsources + ntargets, np.int32)
refine_weights[:nsources] = 1
refine_weights[nsources:] = 0
tree, _ = tb(actx.queue, sources, targets=targets,
target_radii=target_radii,
stick_out_factor=stick_out_factor,
max_leaf_refine_weight=10,
refine_weights=refine_weights,
debug=True)
tree = tree.get(queue=actx.queue)
trees.append(tree)
print("TREE:", tree.nboxes)
if 0:
import matplotlib.pyplot as plt
for tree in trees:
plt.figure()
tree.plot()
plt.show()
# }}}
# {{{ test_max_levels_error
def test_max_levels_error(actx_factory):
actx = actx_factory()
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
sources = [actx.np.zeros(11, np.float64) for i in range(2)]
from boxtree.tree_build import MaxLevelsExceeded
with pytest.raises(MaxLevelsExceeded):
_tree, _ = tb(actx.queue, sources, max_particles_in_box=10, debug=True)
# }}}
......@@ -688,7 +1136,7 @@ if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
from py.test.cmdline import main
from pytest import main
main([__file__])
# vim: fdm=marker
__copyright__ = "Copyright (C) 2012 Andreas Kloeckner, Xiaoyu Wei"
__license__ = """
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import logging
import sys
import numpy as np
import pytest
from arraycontext import pytest_generate_tests_for_array_contexts
# This means boxtree's tests have a hard dependency on meshmode. That's OK.
from meshmode import _acf # noqa: F401
from meshmode.array_context import PytestPyOpenCLArrayContextFactory
from boxtree import (
make_meshmode_mesh_from_leaves,
make_tree_of_boxes_root,
uniformly_refine_tree_of_boxes,
)
logger = logging.getLogger(__name__)
pytest_generate_tests = pytest_generate_tests_for_array_contexts([
PytestPyOpenCLArrayContextFactory,
])
# {{{ make_global_leaf_quadrature
def make_global_leaf_quadrature(actx, tob, order):
from meshmode.discretization.poly_element import (
GaussLegendreTensorProductGroupFactory,
)
group_factory = GaussLegendreTensorProductGroupFactory(order=order)
mesh, _ = make_meshmode_mesh_from_leaves(tob)
if 0:
import matplotlib.pyplot as plt
from meshmode.mesh import visualization as mvis
mvis.draw_2d_mesh(mesh,
set_bounding_box=True,
draw_vertex_numbers=False,
draw_element_numbers=False)
plt.plot(tob.box_centers[0][tob.leaf_boxes],
tob.box_centers[1][tob.leaf_boxes], "rx")
plt.plot(mesh.vertices[0], mesh.vertices[1], "ro")
plt.show()
from meshmode.discretization import Discretization
discr = Discretization(actx, mesh, group_factory)
lflevels = tob.box_levels[tob.leaf_boxes]
lfmeasures = (tob.root_extent / (2**lflevels))**tob.dimensions
from arraycontext import flatten
weights = flatten(actx.thaw(discr.quad_weights()), actx)
jacobians = actx.from_numpy(
np.repeat(lfmeasures/(2**tob.dimensions), discr.groups[0].nunit_dofs)
)
q = weights * jacobians
from pytools.obj_array import make_obj_array
nodes = discr.nodes()
x = make_obj_array([flatten(actx.thaw(axis), actx) for axis in nodes])
return x, q
# }}}
# {{{ test_uniform_tree_of_boxes
@pytest.mark.parametrize("dim", [1, 2, 3])
@pytest.mark.parametrize("order", [1, 2, 3])
@pytest.mark.parametrize("nlevels", [1, 4])
def test_uniform_tree_of_boxes(actx_factory, dim, order, nlevels):
actx = actx_factory()
rng = np.random.default_rng(seed=42)
lower_bounds = rng.random(dim)
radius = rng.random() + 0.1
upper_bounds = lower_bounds + radius
tob = make_tree_of_boxes_root((lower_bounds, upper_bounds))
for _ in range(nlevels - 1):
tob = uniformly_refine_tree_of_boxes(tob)
_, q = make_global_leaf_quadrature(actx, tob, order)
# integrates 1 exactly
box_area = actx.np.sum(q)
assert np.isclose(actx.to_numpy(box_area), radius**dim)
# }}}
# {{{ test_uniform_tree_of_boxes_convergence
@pytest.mark.parametrize("dim", [1, 2, 3])
@pytest.mark.parametrize("order", [1, 2, 3])
def test_uniform_tree_of_boxes_convergence(actx_factory, dim, order):
actx = actx_factory()
radius = np.pi
lower_bounds = np.zeros(dim) - radius / 2
upper_bounds = lower_bounds + radius
tob = make_tree_of_boxes_root((lower_bounds, upper_bounds))
min_level = 0
max_level = 1
for _ in range(min_level):
tob = uniformly_refine_tree_of_boxes(tob)
# integrate cos(0.1*x + 0.2*y + 0.3*z + e) over [-pi/2, pi/2]**dim
qexact_table = {
1: 20 * np.sin(np.pi/20) * np.cos(np.e),
2: 50 * (np.sqrt(5) - 1) * np.sin(np.pi/20) * np.cos(np.e),
3: 250/3 * (np.sqrt(10 - 2*np.sqrt(5)) - 2) * np.cos(np.e)
}
qexact = qexact_table[dim]
from pytools.convergence import EOCRecorder
eoc_rec = EOCRecorder()
for _ in range(min_level, max_level + 1):
x, q = make_global_leaf_quadrature(actx, tob, order)
x = np.array([actx.to_numpy(xx) for xx in x])
q = actx.to_numpy(q)
inner = np.ones_like(q) * np.e
for iaxis in range(dim):
inner += (iaxis + 1) * 0.1 * x[iaxis]
f = np.cos(inner)
qh = np.sum(f * q)
err = abs(qexact - qh)
if err < 1e-14:
break # eoc will be off after hitting machine epsilon
# under uniform refinement, last box is always leaf
eoc_rec.add_data_point(tob.get_box_size(-1), err)
tob = uniformly_refine_tree_of_boxes(tob)
if len(eoc_rec.history) > 1:
# Gauss quadrature is exact up to degree 2q+1
eps = 0.05
assert eoc_rec.order_estimate() >= 2*order + 2 - eps
else:
print(err)
assert err < 1e-14
# }}}
# {{{ test_tree_plot
def test_tree_plot():
radius = np.pi
dim = 2
nlevels = 3
lower_bounds = np.zeros(dim) - radius / 2
upper_bounds = lower_bounds + radius
tob = make_tree_of_boxes_root((lower_bounds, upper_bounds))
for _ in range(nlevels - 1):
tob = uniformly_refine_tree_of_boxes(tob)
# test TreePlotter compatibility
from boxtree.visualization import TreePlotter
tp = TreePlotter(tob)
tp.draw_tree()
tp.set_bounding_box()
# import matplotlib.pyplot as plt
# plt.show()
# }}}
# {{{ test_traversal_from_tob
def test_traversal_from_tob(actx_factory):
actx = actx_factory()
radius = np.pi
dim = 2
nlevels = 3
lower_bounds = np.zeros(dim) - radius/2
upper_bounds = lower_bounds + radius
tob = make_tree_of_boxes_root((lower_bounds, upper_bounds))
for _ in range(nlevels):
tob = uniformly_refine_tree_of_boxes(tob)
from boxtree.tree_of_boxes import _sort_boxes_by_level
tob = _sort_boxes_by_level(tob)
from dataclasses import replace
tob = replace(
tob,
box_centers=actx.from_numpy(tob.box_centers),
root_extent=tob.root_extent,
box_parent_ids=actx.from_numpy(tob.box_parent_ids),
box_child_ids=actx.from_numpy(tob.box_child_ids),
box_levels=actx.from_numpy(tob.box_levels),
box_flags=actx.from_numpy(tob.box_flags),
)
from boxtree.traversal import FMMTraversalBuilder
tg = FMMTraversalBuilder(actx.context)
_trav, _ = tg(actx.queue, tob)
# }}}
# You can test individual routines by typing
# $ python test_tree.py 'test_routine(cl.create_some_context)'
if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
from pytest import main
main([__file__])
# vim: fdm=marker