From eda8122c5d7bca68d43c9ad3fd60aee147eced88 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sat, 10 Nov 2018 22:14:58 -0600 Subject: [PATCH 01/50] Add list 1 cost counter --- boxtree/cost.py | 128 +++++++++++++++++++++++++++++++++ test/test_performance_model.py | 87 ++++++++++++++++++++++ 2 files changed, 215 insertions(+) create mode 100644 boxtree/cost.py create mode 100644 test/test_performance_model.py diff --git a/boxtree/cost.py b/boxtree/cost.py new file mode 100644 index 0000000..fd96f39 --- /dev/null +++ b/boxtree/cost.py @@ -0,0 +1,128 @@ +import numpy as np +import pyopencl as cl +import pyopencl.array # noqa: F401 +from pyopencl.elementwise import ElementwiseKernel +from pyopencl.tools import dtype_to_ctype +from abc import ABC, abstractmethod +from mako.template import Template + + +class CostCounter(ABC): + @abstractmethod + def collect_direct_interaction_data(self, traversal, tree): + pass + + +class CLCostCounter(CostCounter): + def __init__(self, queue): + self.queue = queue + + def collect_direct_interaction_data(self, traversal, tree): + ntarget_boxes = len(traversal.target_boxes) + particle_id_dtype = tree.particle_id_dtype + box_id_dtype = tree.box_id_dtype + + count_direct_interaction_knl = ElementwiseKernel( + self.queue.context, + Template(""" + ${particle_id_t} *srcs_by_itgt_box, + ${box_id_t} *source_boxes_starts, + ${box_id_t} *source_boxes_lists, + ${particle_id_t} *box_source_counts_nonchild + """).render( + particle_id_t=dtype_to_ctype(particle_id_dtype), + box_id_t=dtype_to_ctype(box_id_dtype) + ), + Template(""" + ${particle_id_t} nsources = 0; + ${box_id_t} source_boxes_start_idx = source_boxes_starts[i]; + ${box_id_t} source_boxes_end_idx = source_boxes_starts[i + 1]; + + for(${box_id_t} cur_source_boxes_idx = source_boxes_start_idx; + cur_source_boxes_idx < source_boxes_end_idx; + cur_source_boxes_idx++) + { + ${box_id_t} cur_source_box = source_boxes_lists[ + cur_source_boxes_idx + ]; + nsources += box_source_counts_nonchild[cur_source_box]; + } + + srcs_by_itgt_box[i] = nsources + """).render( + particle_id_t=dtype_to_ctype(particle_id_dtype), + box_id_t=dtype_to_ctype(box_id_dtype) + ), + name="count_direct_interaction" + ) + + box_source_counts_nonchild_dev = cl.array.to_device( + self.queue, tree.box_source_counts_nonchild + ) + + # List 1 + nlist1_srcs_by_itgt_box_dev = cl.array.zeros( + self.queue, (ntarget_boxes,), dtype=particle_id_dtype + ) + neighbor_source_boxes_starts_dev = cl.array.to_device( + self.queue, traversal.neighbor_source_boxes_starts + ) + neighbor_source_boxes_lists_dev = cl.array.to_device( + self.queue, traversal.neighbor_source_boxes_lists + ) + count_direct_interaction_knl( + nlist1_srcs_by_itgt_box_dev, + neighbor_source_boxes_starts_dev, + neighbor_source_boxes_lists_dev, + box_source_counts_nonchild_dev + ) + + result = dict() + result["nlist1_srcs_by_itgt_box"] = nlist1_srcs_by_itgt_box_dev.get() + + return result + + +class PythonCostCounter(CostCounter): + def collect_direct_interaction_data(self, traversal, tree): + ntarget_boxes = len(traversal.target_boxes) + + # target box index -> nsources + nlist1_srcs_by_itgt_box = np.zeros(ntarget_boxes, dtype=np.intp) + nlist3close_srcs_by_itgt_box = np.zeros(ntarget_boxes, dtype=np.intp) + nlist4close_srcs_by_itgt_box = np.zeros(ntarget_boxes, dtype=np.intp) + + for itgt_box in range(ntarget_boxes): + nlist1_srcs = 0 + start, end = traversal.neighbor_source_boxes_starts[itgt_box:itgt_box+2] + for src_ibox in traversal.neighbor_source_boxes_lists[start:end]: + nlist1_srcs += tree.box_source_counts_nonchild[src_ibox] + + nlist1_srcs_by_itgt_box[itgt_box] = nlist1_srcs + + nlist3close_srcs = 0 + # Could be None, if not using targets with extent. + if traversal.from_sep_close_smaller_starts is not None: + start, end = ( + traversal.from_sep_close_smaller_starts[itgt_box:itgt_box+2]) + for src_ibox in traversal.from_sep_close_smaller_lists[start:end]: + nlist3close_srcs += tree.box_source_counts_nonchild[src_ibox] + + nlist3close_srcs_by_itgt_box[itgt_box] = nlist3close_srcs + + nlist4close_srcs = 0 + # Could be None, if not using targets with extent. + if traversal.from_sep_close_bigger_starts is not None: + start, end = ( + traversal.from_sep_close_bigger_starts[itgt_box:itgt_box+2]) + for src_ibox in traversal.from_sep_close_bigger_lists[start:end]: + nlist4close_srcs += tree.box_source_counts_nonchild[src_ibox] + + nlist4close_srcs_by_itgt_box[itgt_box] = nlist4close_srcs + + result = dict() + result["nlist1_srcs_by_itgt_box"] = nlist1_srcs_by_itgt_box + result["nlist3close_srcs_by_itgt_box"] = nlist3close_srcs_by_itgt_box + result["nlist4close_srcs_by_itgt_box"] = nlist4close_srcs_by_itgt_box + + return result diff --git a/test/test_performance_model.py b/test/test_performance_model.py new file mode 100644 index 0000000..28d6f6b --- /dev/null +++ b/test/test_performance_model.py @@ -0,0 +1,87 @@ +import numpy as np +import pyopencl as cl +import time + +import pytest +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + + +@pytest.mark.opencl +@pytest.mark.parametrize( + ("nsources", "ntargets", "dims", "dtype"), [ + (5000, 5000, 3, np.float64) + ] +) +def test_performance_counter(ctx_factory, nsources, ntargets, dims, dtype): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + # {{{ Generate sources, targets and target_radii + + from boxtree.tools import make_normal_particle_array as p_normal + sources = p_normal(queue, nsources, dims, dtype, seed=15) + targets = p_normal(queue, ntargets, dims, dtype, seed=18) + + from pyopencl.clrandom import PhiloxGenerator + rng = PhiloxGenerator(queue.context, seed=22) + target_radii = rng.uniform( + queue, ntargets, a=0, b=0.05, dtype=dtype + ).get() + + # }}} + + # {{{ Generate tree and traversal + + from boxtree import TreeBuilder + tb = TreeBuilder(ctx) + tree, _ = tb( + queue, sources, targets=targets, target_radii=target_radii, + stick_out_factor=0.15, max_particles_in_box=60, debug=True + ) + + from boxtree.traversal import FMMTraversalBuilder + tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) + d_trav, _ = tg(queue, tree, debug=True) + trav = d_trav.get(queue=queue) + + # }}} + + from boxtree.cost import CLCostCounter, PythonCostCounter + cl_cost_counter = CLCostCounter(queue) + python_cost_counter = PythonCostCounter() + + start_time = time.time() + cl_direct_interaction = cl_cost_counter.collect_direct_interaction_data( + trav, trav.tree + ) + print("OpenCL time {0}".format(str(time.time() - start_time))) + start_time = time.time() + python_direct_interaction = python_cost_counter.collect_direct_interaction_data( + trav, trav.tree + ) + print("Python time {0}".format(str(time.time() - start_time))) + + for field in ["nlist1_srcs_by_itgt_box"]: + assert np.equal( + cl_direct_interaction[field], + python_direct_interaction[field] + ).all() + + +def main(): + nsouces = 5000 + ntargets = 5000 + ndims = 3 + dtype = np.float64 + ctx_factory = cl.create_some_context + + test_performance_counter(ctx_factory, nsouces, ntargets, ndims, dtype) + + +if __name__ == "__main__": + import sys + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + main() -- GitLab From 683387c353d6bec6f9f32ae68b9a6c4f92e72ade Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 11 Nov 2018 10:14:20 -0600 Subject: [PATCH 02/50] Fix Python2 compatibility --- boxtree/cost.py | 14 +++++++++++++- test/test_performance_model.py | 11 +++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index fd96f39..35c35e8 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -3,8 +3,20 @@ import pyopencl as cl import pyopencl.array # noqa: F401 from pyopencl.elementwise import ElementwiseKernel from pyopencl.tools import dtype_to_ctype -from abc import ABC, abstractmethod from mako.template import Template +from functools import partial +import sys + +if sys.version_info >= (3, 0): + Template = partial(Template, strict_undefined=True) +else: + Template = partial(Template, strict_undefined=True, disable_unicode=True) + +if sys.version_info >= (3, 4): + from abc import ABC, abstractmethod +else: + from abc import ABCMeta, abstractmethod + ABC = ABCMeta('ABC', (), {}) class CostCounter(ABC): diff --git a/test/test_performance_model.py b/test/test_performance_model.py index 28d6f6b..c53b649 100644 --- a/test/test_performance_model.py +++ b/test/test_performance_model.py @@ -6,6 +6,12 @@ import pytest from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) +import logging +import os +logging.basicConfig(level=os.environ.get("LOGLEVEL", "WARNING")) +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + @pytest.mark.opencl @pytest.mark.parametrize( @@ -55,12 +61,13 @@ def test_performance_counter(ctx_factory, nsources, ntargets, dims, dtype): cl_direct_interaction = cl_cost_counter.collect_direct_interaction_data( trav, trav.tree ) - print("OpenCL time {0}".format(str(time.time() - start_time))) + logger.info("OpenCL time {0}".format(str(time.time() - start_time))) + start_time = time.time() python_direct_interaction = python_cost_counter.collect_direct_interaction_data( trav, trav.tree ) - print("Python time {0}".format(str(time.time() - start_time))) + logger.info("Python time {0}".format(str(time.time() - start_time))) for field in ["nlist1_srcs_by_itgt_box"]: assert np.equal( -- GitLab From 0e6bf0d0bcb0a64a10b1fa5d4970596fb1847861 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 12 Nov 2018 00:02:09 -0600 Subject: [PATCH 03/50] Add counter for list 3 close and list 4 close --- boxtree/cost.py | 49 ++++++++++++++++++++++++++++++++-- test/test_performance_model.py | 11 +++++--- 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 35c35e8..a841c26 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -60,7 +60,7 @@ class CLCostCounter(CostCounter): nsources += box_source_counts_nonchild[cur_source_box]; } - srcs_by_itgt_box[i] = nsources + srcs_by_itgt_box[i] = nsources; """).render( particle_id_t=dtype_to_ctype(particle_id_dtype), box_id_t=dtype_to_ctype(box_id_dtype) @@ -71,6 +71,7 @@ class CLCostCounter(CostCounter): box_source_counts_nonchild_dev = cl.array.to_device( self.queue, tree.box_source_counts_nonchild ) + result = dict() # List 1 nlist1_srcs_by_itgt_box_dev = cl.array.zeros( @@ -82,6 +83,7 @@ class CLCostCounter(CostCounter): neighbor_source_boxes_lists_dev = cl.array.to_device( self.queue, traversal.neighbor_source_boxes_lists ) + count_direct_interaction_knl( nlist1_srcs_by_itgt_box_dev, neighbor_source_boxes_starts_dev, @@ -89,9 +91,52 @@ class CLCostCounter(CostCounter): box_source_counts_nonchild_dev ) - result = dict() result["nlist1_srcs_by_itgt_box"] = nlist1_srcs_by_itgt_box_dev.get() + # List 3 close + if traversal.from_sep_close_smaller_starts is not None: + nlist3close_srcs_by_itgt_box_dev = cl.array.zeros( + self.queue, (ntarget_boxes,), dtype=particle_id_dtype + ) + from_sep_close_smaller_starts_dev = cl.array.to_device( + self.queue, traversal.from_sep_close_smaller_starts + ) + from_sep_close_smaller_lists_dev = cl.array.to_device( + self.queue, traversal.from_sep_close_smaller_lists + ) + + count_direct_interaction_knl( + nlist3close_srcs_by_itgt_box_dev, + from_sep_close_smaller_starts_dev, + from_sep_close_smaller_lists_dev, + box_source_counts_nonchild_dev + ) + + result["nlist3close_srcs_by_itgt_box"] = \ + nlist3close_srcs_by_itgt_box_dev.get() + + # List 4 close + if traversal.from_sep_close_bigger_starts is not None: + nlist4close_srcs_by_itgt_box_dev = cl.array.zeros( + self.queue, (ntarget_boxes,), dtype=particle_id_dtype + ) + from_sep_close_bigger_starts_dev = cl.array.to_device( + self.queue, traversal.from_sep_close_bigger_starts + ) + from_sep_close_bigger_lists_dev = cl.array.to_device( + self.queue, traversal.from_sep_close_bigger_lists + ) + + count_direct_interaction_knl( + nlist4close_srcs_by_itgt_box_dev, + from_sep_close_bigger_starts_dev, + from_sep_close_bigger_lists_dev, + box_source_counts_nonchild_dev + ) + + result["nlist4close_srcs_by_itgt_box"] = \ + nlist4close_srcs_by_itgt_box_dev.get() + return result diff --git a/test/test_performance_model.py b/test/test_performance_model.py index c53b649..d6a246d 100644 --- a/test/test_performance_model.py +++ b/test/test_performance_model.py @@ -61,15 +61,20 @@ def test_performance_counter(ctx_factory, nsources, ntargets, dims, dtype): cl_direct_interaction = cl_cost_counter.collect_direct_interaction_data( trav, trav.tree ) - logger.info("OpenCL time {0}".format(str(time.time() - start_time))) + logger.info("OpenCL time for collect_direct_interaction_data: {0}".format( + str(time.time() - start_time)) + ) start_time = time.time() python_direct_interaction = python_cost_counter.collect_direct_interaction_data( trav, trav.tree ) - logger.info("Python time {0}".format(str(time.time() - start_time))) + logger.info("Python time for collect_direct_interaction_data: {0}".format( + str(time.time() - start_time)) + ) - for field in ["nlist1_srcs_by_itgt_box"]: + for field in ["nlist1_srcs_by_itgt_box", "nlist3close_srcs_by_itgt_box", + "nlist4close_srcs_by_itgt_box"]: assert np.equal( cl_direct_interaction[field], python_direct_interaction[field] -- GitLab From 64f9e0b6debc7a1f2c59af32591c4b4001a11d73 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 12 Nov 2018 07:48:13 -0600 Subject: [PATCH 04/50] Performance model -> cost model --- test/{test_performance_model.py => test_cost_model.py} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename test/{test_performance_model.py => test_cost_model.py} (94%) diff --git a/test/test_performance_model.py b/test/test_cost_model.py similarity index 94% rename from test/test_performance_model.py rename to test/test_cost_model.py index d6a246d..3a44fe5 100644 --- a/test/test_performance_model.py +++ b/test/test_cost_model.py @@ -19,7 +19,7 @@ logger.setLevel(logging.INFO) (5000, 5000, 3, np.float64) ] ) -def test_performance_counter(ctx_factory, nsources, ntargets, dims, dtype): +def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): ctx = ctx_factory() queue = cl.CommandQueue(ctx) @@ -88,7 +88,7 @@ def main(): dtype = np.float64 ctx_factory = cl.create_some_context - test_performance_counter(ctx_factory, nsouces, ntargets, ndims, dtype) + test_cost_counter(ctx_factory, nsouces, ntargets, ndims, dtype) if __name__ == "__main__": -- GitLab From da012faa72be5cd3eb380b9adc3031ce93ad8f88 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sat, 24 Nov 2018 21:51:02 -0600 Subject: [PATCH 05/50] Add count_direct --- boxtree/cost.py | 41 ++++++++++++++++++++++++++++++++++++++--- test/test_cost_model.py | 40 ++++++++++++++++++++++++++++++---------- 2 files changed, 68 insertions(+), 13 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index a841c26..824c7c4 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -21,15 +21,49 @@ else: class CostCounter(ABC): @abstractmethod - def collect_direct_interaction_data(self, traversal, tree): + def collect_direct_interaction_data(self, traversal): + """Count the number of sources in direct interaction boxes. + + :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. + :return: a :class:`dict` contains fields "nlist1_srcs_by_itgt_box", + "nlist3close_srcs_by_itgt_box", and "nlist4close_srcs_by_itgt_box". Each + of these fields is a :class:`numpy.ndarray` of shape + (traversal.ntarget_boxes,), documenting the number of sources in list 1, + list 3 close and list 4 close, respectively. + """ pass + def count_direct(self, traversal): + """Count direct evaluations of each target box of *traversal*. + + :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. + :return: a :class:`numpy.ndarray` of shape (traversal.ntarget_boxes,). + """ + tree = traversal.tree + + direct_interaction_data = self.collect_direct_interaction_data(traversal) + nlist1_srcs_by_itgt_box = ( + direct_interaction_data["nlist1_srcs_by_itgt_box"]) + nlist3close_srcs_by_itgt_box = ( + direct_interaction_data["nlist3close_srcs_by_itgt_box"]) + nlist4close_srcs_by_itgt_box = ( + direct_interaction_data["nlist4close_srcs_by_itgt_box"]) + + ntargets = tree.box_target_counts_nonchild[ + traversal.target_boxes + ] + + return ntargets * (nlist1_srcs_by_itgt_box + + nlist3close_srcs_by_itgt_box + + nlist4close_srcs_by_itgt_box) + class CLCostCounter(CostCounter): def __init__(self, queue): self.queue = queue - def collect_direct_interaction_data(self, traversal, tree): + def collect_direct_interaction_data(self, traversal): + tree = traversal.tree ntarget_boxes = len(traversal.target_boxes) particle_id_dtype = tree.particle_id_dtype box_id_dtype = tree.box_id_dtype @@ -141,7 +175,8 @@ class CLCostCounter(CostCounter): class PythonCostCounter(CostCounter): - def collect_direct_interaction_data(self, traversal, tree): + def collect_direct_interaction_data(self, traversal): + tree = traversal.tree ntarget_boxes = len(traversal.target_boxes) # target box index -> nsources diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 3a44fe5..5e8b20f 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -57,21 +57,21 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): cl_cost_counter = CLCostCounter(queue) python_cost_counter = PythonCostCounter() + # {{{ Test collect_direct_interaction_data + start_time = time.time() - cl_direct_interaction = cl_cost_counter.collect_direct_interaction_data( - trav, trav.tree - ) + cl_direct_interaction = \ + cl_cost_counter.collect_direct_interaction_data(trav) logger.info("OpenCL time for collect_direct_interaction_data: {0}".format( - str(time.time() - start_time)) - ) + str(time.time() - start_time) + )) start_time = time.time() - python_direct_interaction = python_cost_counter.collect_direct_interaction_data( - trav, trav.tree - ) + python_direct_interaction = \ + python_cost_counter.collect_direct_interaction_data(trav) logger.info("Python time for collect_direct_interaction_data: {0}".format( - str(time.time() - start_time)) - ) + str(time.time() - start_time) + )) for field in ["nlist1_srcs_by_itgt_box", "nlist3close_srcs_by_itgt_box", "nlist4close_srcs_by_itgt_box"]: @@ -80,6 +80,26 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): python_direct_interaction[field] ).all() + # }}} + + # {{{ Test count_direct + + start_time = time.time() + cl_count_direct = cl_cost_counter.count_direct(trav) + logger.info("OpenCL time for count_direct: {0}".format( + str(time.time() - start_time) + )) + + start_time = time.time() + python_count_direct = python_cost_counter.count_direct(trav) + logger.info("Python time for count_direct: {0}".format( + str(time.time() - start_time) + )) + + assert np.equal(cl_count_direct, python_count_direct).all() + + # }}} + def main(): nsouces = 5000 -- GitLab From e3960e354495e55f3e5a1d8c174eec0b27f9a97f Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 9 Dec 2018 11:54:38 -0600 Subject: [PATCH 06/50] Integrate cost model with pymbolic --- boxtree/cost.py | 155 +++++++++++++++++++++++++++++++++++++--- test/test_cost_model.py | 49 ++++++++++--- 2 files changed, 186 insertions(+), 18 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 824c7c4..390353c 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -1,3 +1,31 @@ +from __future__ import division, absolute_import + +__copyright__ = """ +Copyright (C) 2013 Andreas Kloeckner +Copyright (C) 2018 Matt Wala +Copyright (C) 2018 Hao Gao +""" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + import numpy as np import pyopencl as cl import pyopencl.array # noqa: F401 @@ -5,6 +33,7 @@ from pyopencl.elementwise import ElementwiseKernel from pyopencl.tools import dtype_to_ctype from mako.template import Template from functools import partial +from pymbolic import var import sys if sys.version_info >= (3, 0): @@ -19,7 +48,111 @@ else: ABC = ABCMeta('ABC', (), {}) -class CostCounter(ABC): +class TranslationCostModel: + """Provides modeled costs for individual translations or evaluations.""" + + def __init__(self, ncoeffs_fmm_by_level, uses_point_and_shoot): + self.ncoeffs_fmm_by_level = ncoeffs_fmm_by_level + self.uses_point_and_shoot = uses_point_and_shoot + + @staticmethod + def direct(): + return var("c_p2p") + + def p2l(self, level): + return var("c_p2l") * self.ncoeffs_fmm_by_level[level] + + def l2p(self, level): + return var("c_l2p") * self.ncoeffs_fmm_by_level[level] + + def p2m(self, level): + return var("c_p2m") * self.ncoeffs_fmm_by_level[level] + + def m2p(self, level): + return var("c_m2p") * self.ncoeffs_fmm_by_level[level] + + def m2m(self, src_level, tgt_level): + return var("c_m2m") * self.e2e_cost( + self.ncoeffs_fmm_by_level[src_level], + self.ncoeffs_fmm_by_level[tgt_level]) + + def l2l(self, src_level, tgt_level): + return var("c_l2l") * self.e2e_cost( + self.ncoeffs_fmm_by_level[src_level], + self.ncoeffs_fmm_by_level[tgt_level]) + + def m2l(self, src_level, tgt_level): + return var("c_m2l") * self.e2e_cost( + self.ncoeffs_fmm_by_level[src_level], + self.ncoeffs_fmm_by_level[tgt_level]) + + def e2e_cost(self, nsource_coeffs, ntarget_coeffs): + if self.uses_point_and_shoot: + return ( + # Rotate the coordinate system to be z axis aligned. + nsource_coeffs ** (3 / 2) + # Translate the expansion along the z axis. + + nsource_coeffs ** (1 / 2) * ntarget_coeffs + # Rotate the coordinate system back. + + ntarget_coeffs ** (3 / 2)) + + return nsource_coeffs * ntarget_coeffs + + +# {{{ translation cost model factories + +def pde_aware_translation_cost_model(dim, nlevels): + """Create a cost model for FMM translation operators that make use of the + knowledge that the potential satisfies a PDE. + """ + p_fmm = np.array([var("p_fmm_lev%d" % i) for i in range(nlevels)]) + ncoeffs_fmm = (p_fmm + 1) ** (dim - 1) + + if dim == 3: + uses_point_and_shoot = True + else: + uses_point_and_shoot = False + + return TranslationCostModel( + ncoeffs_fmm_by_level=ncoeffs_fmm, + uses_point_and_shoot=uses_point_and_shoot + ) + + +def taylor_translation_cost_model(dim, nlevels): + """Create a cost model for FMM translation based on Taylor expansions + in Cartesian coordinates. + """ + p_fmm = np.array([var("p_fmm_lev%d" % i) for i in range(nlevels)]) + ncoeffs_fmm = (p_fmm + 1) ** dim + + return TranslationCostModel( + ncoeffs_fmm_by_level=ncoeffs_fmm, + uses_point_and_shoot=False + ) + +# }}} + + +class CostModel(ABC): + def __init__(self, translation_cost_model_factory, calibration_params=None): + """ + :arg translation_cost_model_factory: a function, which takes tree dimension + and the number of tree levels as arguments, returns an object of + :class:`TranslationCostModel`. + :arg calibration_params: TODO + """ + self.translation_cost_model_factory = translation_cost_model_factory + if calibration_params is None: + calibration_params = dict() + self.calibration_params = calibration_params + + def with_calibration_params(self, calibration_params): + """Return a copy of *self* with a new set of calibration parameters.""" + return type(self)( + translation_cost_model_factory=self.translation_cost_model_factory, + calibration_params=calibration_params) + @abstractmethod def collect_direct_interaction_data(self, traversal): """Count the number of sources in direct interaction boxes. @@ -33,9 +166,11 @@ class CostCounter(ABC): """ pass - def count_direct(self, traversal): + def count_direct(self, xlat_cost, traversal): """Count direct evaluations of each target box of *traversal*. + :arg xlat_cost: a :class:`TranslationCostModel` object which specifies the + translation cost. :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. :return: a :class:`numpy.ndarray` of shape (traversal.ntarget_boxes,). """ @@ -53,14 +188,18 @@ class CostCounter(ABC): traversal.target_boxes ] - return ntargets * (nlist1_srcs_by_itgt_box - + nlist3close_srcs_by_itgt_box - + nlist4close_srcs_by_itgt_box) + return ntargets * ( + nlist1_srcs_by_itgt_box + + nlist3close_srcs_by_itgt_box + + nlist4close_srcs_by_itgt_box + ) * xlat_cost.direct() -class CLCostCounter(CostCounter): - def __init__(self, queue): +class CLCostModel(CostModel): + def __init__(self, queue, translation_cost_model_factory, + calibration_params=None): self.queue = queue + super().__init__(translation_cost_model_factory, calibration_params) def collect_direct_interaction_data(self, traversal): tree = traversal.tree @@ -174,7 +313,7 @@ class CLCostCounter(CostCounter): return result -class PythonCostCounter(CostCounter): +class PythonCostModel(CostModel): def collect_direct_interaction_data(self, traversal): tree = traversal.tree ntarget_boxes = len(traversal.target_boxes) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 5e8b20f..7978659 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -5,6 +5,7 @@ import time import pytest from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) +from pymbolic import evaluate import logging import os @@ -43,7 +44,7 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): tb = TreeBuilder(ctx) tree, _ = tb( queue, sources, targets=targets, target_radii=target_radii, - stick_out_factor=0.15, max_particles_in_box=60, debug=True + stick_out_factor=0.15, max_particles_in_box=30, debug=True ) from boxtree.traversal import FMMTraversalBuilder @@ -53,22 +54,40 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): # }}} - from boxtree.cost import CLCostCounter, PythonCostCounter - cl_cost_counter = CLCostCounter(queue) - python_cost_counter = PythonCostCounter() + # {{{ Construct cost models + + from boxtree.cost import CLCostModel, PythonCostModel + cl_cost_model = CLCostModel(queue, None) + python_cost_model = PythonCostModel(None) + + CONSTANT_ONE_PARAMS = dict( + c_l2l=1, + c_l2p=1, + c_m2l=1, + c_m2m=1, + c_m2p=1, + c_p2l=1, + c_p2m=1, + c_p2p=1 + ) + + from boxtree.cost import pde_aware_translation_cost_model + xlat_cost = pde_aware_translation_cost_model(dims, trav.tree.nlevels) + + # }}} # {{{ Test collect_direct_interaction_data start_time = time.time() cl_direct_interaction = \ - cl_cost_counter.collect_direct_interaction_data(trav) + cl_cost_model.collect_direct_interaction_data(trav) logger.info("OpenCL time for collect_direct_interaction_data: {0}".format( str(time.time() - start_time) )) start_time = time.time() python_direct_interaction = \ - python_cost_counter.collect_direct_interaction_data(trav) + python_cost_model.collect_direct_interaction_data(trav) logger.info("Python time for collect_direct_interaction_data: {0}".format( str(time.time() - start_time) )) @@ -85,13 +104,23 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): # {{{ Test count_direct start_time = time.time() - cl_count_direct = cl_cost_counter.count_direct(trav) + + cl_count_direct = evaluate( + cl_cost_model.count_direct(xlat_cost, trav), + context=CONSTANT_ONE_PARAMS + ) + logger.info("OpenCL time for count_direct: {0}".format( str(time.time() - start_time) )) start_time = time.time() - python_count_direct = python_cost_counter.count_direct(trav) + + python_count_direct = evaluate( + python_cost_model.count_direct(xlat_cost, trav), + context=CONSTANT_ONE_PARAMS + ) + logger.info("Python time for count_direct: {0}".format( str(time.time() - start_time) )) @@ -102,8 +131,8 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): def main(): - nsouces = 5000 - ntargets = 5000 + nsouces = 100000 + ntargets = 100000 ndims = 3 dtype = np.float64 ctx_factory = cl.create_some_context -- GitLab From e42ce7b7d023081e8363f2a17d2f170161945af1 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 10 Dec 2018 09:47:48 -0600 Subject: [PATCH 07/50] Fix tese case and flake8 --- boxtree/cost.py | 4 +++- test/test_cost_model.py | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 390353c..902e999 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -199,7 +199,9 @@ class CLCostModel(CostModel): def __init__(self, queue, translation_cost_model_factory, calibration_params=None): self.queue = queue - super().__init__(translation_cost_model_factory, calibration_params) + super(CLCostModel, self).__init__( + translation_cost_model_factory, calibration_params + ) def collect_direct_interaction_data(self, traversal): tree = traversal.tree diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 7978659..86e966a 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -60,7 +60,7 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): cl_cost_model = CLCostModel(queue, None) python_cost_model = PythonCostModel(None) - CONSTANT_ONE_PARAMS = dict( + constant_one_params = dict( c_l2l=1, c_l2p=1, c_m2l=1, @@ -107,7 +107,7 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): cl_count_direct = evaluate( cl_cost_model.count_direct(xlat_cost, trav), - context=CONSTANT_ONE_PARAMS + context=constant_one_params ) logger.info("OpenCL time for count_direct: {0}".format( @@ -118,7 +118,7 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): python_count_direct = evaluate( python_cost_model.count_direct(xlat_cost, trav), - context=CONSTANT_ONE_PARAMS + context=constant_one_params ) logger.info("Python time for count_direct: {0}".format( -- GitLab From 30b5bfea2374bd68f3dac5908dc393d4f382ac60 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 16 Dec 2018 17:04:22 -0600 Subject: [PATCH 08/50] Separate aggregate and per box cost --- boxtree/cost.py | 214 ++++++++++++++++------------------------ test/test_cost_model.py | 48 ++++----- 2 files changed, 113 insertions(+), 149 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 902e999..b1a15ff 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -34,6 +34,7 @@ from pyopencl.tools import dtype_to_ctype from mako.template import Template from functools import partial from pymbolic import var +from pytools import memoize_method import sys if sys.version_info >= (3, 0): @@ -135,92 +136,67 @@ def taylor_translation_cost_model(dim, nlevels): class CostModel(ABC): - def __init__(self, translation_cost_model_factory, calibration_params=None): + def __init__(self, translation_cost_model_factory): """ :arg translation_cost_model_factory: a function, which takes tree dimension and the number of tree levels as arguments, returns an object of :class:`TranslationCostModel`. - :arg calibration_params: TODO """ self.translation_cost_model_factory = translation_cost_model_factory - if calibration_params is None: - calibration_params = dict() - self.calibration_params = calibration_params - - def with_calibration_params(self, calibration_params): - """Return a copy of *self* with a new set of calibration parameters.""" - return type(self)( - translation_cost_model_factory=self.translation_cost_model_factory, - calibration_params=calibration_params) @abstractmethod - def collect_direct_interaction_data(self, traversal): + def process_direct(self, traversal, c_p2p): """Count the number of sources in direct interaction boxes. :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. - :return: a :class:`dict` contains fields "nlist1_srcs_by_itgt_box", - "nlist3close_srcs_by_itgt_box", and "nlist4close_srcs_by_itgt_box". Each - of these fields is a :class:`numpy.ndarray` of shape - (traversal.ntarget_boxes,), documenting the number of sources in list 1, - list 3 close and list 4 close, respectively. + :arg c_p2p: calibration constant. + :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape + (traversal.ntarget_boxes,), with each entry represents the cost of the + box. """ pass - def count_direct(self, xlat_cost, traversal): + @abstractmethod + def process_direct_aggregate(self, traversal, xlat_cost): """Count direct evaluations of each target box of *traversal*. :arg xlat_cost: a :class:`TranslationCostModel` object which specifies the translation cost. :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. - :return: a :class:`numpy.ndarray` of shape (traversal.ntarget_boxes,). + :return: a :class:`pymbolic.primitives.Product` object representing the + aggregate cost of direct evaluations in all boxes. """ - tree = traversal.tree - - direct_interaction_data = self.collect_direct_interaction_data(traversal) - nlist1_srcs_by_itgt_box = ( - direct_interaction_data["nlist1_srcs_by_itgt_box"]) - nlist3close_srcs_by_itgt_box = ( - direct_interaction_data["nlist3close_srcs_by_itgt_box"]) - nlist4close_srcs_by_itgt_box = ( - direct_interaction_data["nlist4close_srcs_by_itgt_box"]) - - ntargets = tree.box_target_counts_nonchild[ - traversal.target_boxes - ] - - return ntargets * ( - nlist1_srcs_by_itgt_box - + nlist3close_srcs_by_itgt_box - + nlist4close_srcs_by_itgt_box - ) * xlat_cost.direct() + pass class CLCostModel(CostModel): - def __init__(self, queue, translation_cost_model_factory, - calibration_params=None): + """ + Note: For methods in this class, arguments like *traversal* should live on device + memory. + """ + def __init__(self, queue, translation_cost_model_factory): self.queue = queue super(CLCostModel, self).__init__( - translation_cost_model_factory, calibration_params + translation_cost_model_factory ) - def collect_direct_interaction_data(self, traversal): - tree = traversal.tree - ntarget_boxes = len(traversal.target_boxes) - particle_id_dtype = tree.particle_id_dtype - box_id_dtype = tree.box_id_dtype - - count_direct_interaction_knl = ElementwiseKernel( + @memoize_method + def process_direct_knl(self, particle_id_dtype, box_id_dtype): + return ElementwiseKernel( self.queue.context, Template(""" - ${particle_id_t} *srcs_by_itgt_box, + double *direct_by_itgt_box, ${box_id_t} *source_boxes_starts, ${box_id_t} *source_boxes_lists, - ${particle_id_t} *box_source_counts_nonchild + ${particle_id_t} *box_source_counts_nonchild, + ${particle_id_t} *box_target_counts_nonchild, + ${box_id_t} *target_boxes, + double c_p2p """).render( particle_id_t=dtype_to_ctype(particle_id_dtype), box_id_t=dtype_to_ctype(box_id_dtype) ), - Template(""" + Template(r""" ${particle_id_t} nsources = 0; ${box_id_t} source_boxes_start_idx = source_boxes_starts[i]; ${box_id_t} source_boxes_end_idx = source_boxes_starts[i + 1]; @@ -235,127 +211,111 @@ class CLCostModel(CostModel): nsources += box_source_counts_nonchild[cur_source_box]; } - srcs_by_itgt_box[i] = nsources; + ${particle_id_t} ntargets = box_target_counts_nonchild[ + target_boxes[i] + ]; + + direct_by_itgt_box[i] += (nsources * ntargets * c_p2p); """).render( particle_id_t=dtype_to_ctype(particle_id_dtype), box_id_t=dtype_to_ctype(box_id_dtype) ), - name="count_direct_interaction" + name="process_direct" ) - box_source_counts_nonchild_dev = cl.array.to_device( - self.queue, tree.box_source_counts_nonchild - ) - result = dict() + def process_direct(self, traversal, c_p2p): + tree = traversal.tree + ntarget_boxes = len(traversal.target_boxes) + particle_id_dtype = tree.particle_id_dtype + box_id_dtype = tree.box_id_dtype - # List 1 - nlist1_srcs_by_itgt_box_dev = cl.array.zeros( - self.queue, (ntarget_boxes,), dtype=particle_id_dtype + count_direct_interaction_knl = self.process_direct_knl( + particle_id_dtype, box_id_dtype ) - neighbor_source_boxes_starts_dev = cl.array.to_device( - self.queue, traversal.neighbor_source_boxes_starts - ) - neighbor_source_boxes_lists_dev = cl.array.to_device( - self.queue, traversal.neighbor_source_boxes_lists + + direct_by_itgt_box_dev = cl.array.zeros( + self.queue, (ntarget_boxes,), dtype=np.float64 ) + # List 1 count_direct_interaction_knl( - nlist1_srcs_by_itgt_box_dev, - neighbor_source_boxes_starts_dev, - neighbor_source_boxes_lists_dev, - box_source_counts_nonchild_dev + direct_by_itgt_box_dev, + traversal.neighbor_source_boxes_starts, + traversal.neighbor_source_boxes_lists, + traversal.tree.box_source_counts_nonchild, + traversal.tree.box_target_counts_nonchild, + traversal.target_boxes, + c_p2p ) - result["nlist1_srcs_by_itgt_box"] = nlist1_srcs_by_itgt_box_dev.get() - # List 3 close if traversal.from_sep_close_smaller_starts is not None: - nlist3close_srcs_by_itgt_box_dev = cl.array.zeros( - self.queue, (ntarget_boxes,), dtype=particle_id_dtype - ) - from_sep_close_smaller_starts_dev = cl.array.to_device( - self.queue, traversal.from_sep_close_smaller_starts - ) - from_sep_close_smaller_lists_dev = cl.array.to_device( - self.queue, traversal.from_sep_close_smaller_lists - ) - count_direct_interaction_knl( - nlist3close_srcs_by_itgt_box_dev, - from_sep_close_smaller_starts_dev, - from_sep_close_smaller_lists_dev, - box_source_counts_nonchild_dev + direct_by_itgt_box_dev, + traversal.from_sep_close_smaller_starts, + traversal.from_sep_close_smaller_lists, + traversal.tree.box_source_counts_nonchild, + traversal.tree.box_target_counts_nonchild, + traversal.target_boxes, + c_p2p ) - result["nlist3close_srcs_by_itgt_box"] = \ - nlist3close_srcs_by_itgt_box_dev.get() - # List 4 close if traversal.from_sep_close_bigger_starts is not None: - nlist4close_srcs_by_itgt_box_dev = cl.array.zeros( - self.queue, (ntarget_boxes,), dtype=particle_id_dtype - ) - from_sep_close_bigger_starts_dev = cl.array.to_device( - self.queue, traversal.from_sep_close_bigger_starts - ) - from_sep_close_bigger_lists_dev = cl.array.to_device( - self.queue, traversal.from_sep_close_bigger_lists - ) - count_direct_interaction_knl( - nlist4close_srcs_by_itgt_box_dev, - from_sep_close_bigger_starts_dev, - from_sep_close_bigger_lists_dev, - box_source_counts_nonchild_dev + direct_by_itgt_box_dev, + traversal.from_sep_close_bigger_starts, + traversal.from_sep_close_bigger_lists, + traversal.tree.box_source_counts_nonchild, + traversal.tree.box_target_counts_nonchild, + traversal.target_boxes, + c_p2p ) - result["nlist4close_srcs_by_itgt_box"] = \ - nlist4close_srcs_by_itgt_box_dev.get() + return direct_by_itgt_box_dev - return result + def process_direct_aggregate(self, traversal, xlat_cost): + result_dev = cl.array.sum(self.process_direct(traversal, 1.0)) + return result_dev.get().reshape(-1)[0] * xlat_cost.direct() class PythonCostModel(CostModel): - def collect_direct_interaction_data(self, traversal): + def process_direct(self, traversal, c_p2p): tree = traversal.tree ntarget_boxes = len(traversal.target_boxes) # target box index -> nsources - nlist1_srcs_by_itgt_box = np.zeros(ntarget_boxes, dtype=np.intp) - nlist3close_srcs_by_itgt_box = np.zeros(ntarget_boxes, dtype=np.intp) - nlist4close_srcs_by_itgt_box = np.zeros(ntarget_boxes, dtype=np.intp) + direct_by_itgt_box = np.zeros(ntarget_boxes, dtype=np.float64) for itgt_box in range(ntarget_boxes): - nlist1_srcs = 0 + nsources = 0 + start, end = traversal.neighbor_source_boxes_starts[itgt_box:itgt_box+2] for src_ibox in traversal.neighbor_source_boxes_lists[start:end]: - nlist1_srcs += tree.box_source_counts_nonchild[src_ibox] + nsources += tree.box_source_counts_nonchild[src_ibox] - nlist1_srcs_by_itgt_box[itgt_box] = nlist1_srcs - - nlist3close_srcs = 0 # Could be None, if not using targets with extent. if traversal.from_sep_close_smaller_starts is not None: start, end = ( - traversal.from_sep_close_smaller_starts[itgt_box:itgt_box+2]) + traversal.from_sep_close_smaller_starts[itgt_box:itgt_box+2] + ) for src_ibox in traversal.from_sep_close_smaller_lists[start:end]: - nlist3close_srcs += tree.box_source_counts_nonchild[src_ibox] - - nlist3close_srcs_by_itgt_box[itgt_box] = nlist3close_srcs + nsources += tree.box_source_counts_nonchild[src_ibox] - nlist4close_srcs = 0 # Could be None, if not using targets with extent. if traversal.from_sep_close_bigger_starts is not None: start, end = ( - traversal.from_sep_close_bigger_starts[itgt_box:itgt_box+2]) + traversal.from_sep_close_bigger_starts[itgt_box:itgt_box+2] + ) for src_ibox in traversal.from_sep_close_bigger_lists[start:end]: - nlist4close_srcs += tree.box_source_counts_nonchild[src_ibox] + nsources += tree.box_source_counts_nonchild[src_ibox] - nlist4close_srcs_by_itgt_box[itgt_box] = nlist4close_srcs + ntargets = tree.box_target_counts_nonchild[ + traversal.target_boxes[itgt_box] + ] + direct_by_itgt_box[itgt_box] += (nsources * ntargets * c_p2p) - result = dict() - result["nlist1_srcs_by_itgt_box"] = nlist1_srcs_by_itgt_box - result["nlist3close_srcs_by_itgt_box"] = nlist3close_srcs_by_itgt_box - result["nlist4close_srcs_by_itgt_box"] = nlist4close_srcs_by_itgt_box + return direct_by_itgt_box - return result + def process_direct_aggregate(self, traversal, xlat_cost): + return np.sum(self.process_direct(traversal, 1.0)) * xlat_cost.direct() diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 86e966a..00ebc9b 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -49,8 +49,8 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) - d_trav, _ = tg(queue, tree, debug=True) - trav = d_trav.get(queue=queue) + trav_dev, _ = tg(queue, tree, debug=True) + trav = trav_dev.get(queue=queue) # }}} @@ -76,56 +76,60 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): # }}} - # {{{ Test collect_direct_interaction_data + # {{{ Test process_direct + queue.finish() start_time = time.time() - cl_direct_interaction = \ - cl_cost_model.collect_direct_interaction_data(trav) + + cl_direct = cl_cost_model.process_direct(trav_dev, 5.0) + + queue.finish() logger.info("OpenCL time for collect_direct_interaction_data: {0}".format( str(time.time() - start_time) )) start_time = time.time() - python_direct_interaction = \ - python_cost_model.collect_direct_interaction_data(trav) + + python_direct = python_cost_model.process_direct(trav, 5.0) + logger.info("Python time for collect_direct_interaction_data: {0}".format( str(time.time() - start_time) )) - for field in ["nlist1_srcs_by_itgt_box", "nlist3close_srcs_by_itgt_box", - "nlist4close_srcs_by_itgt_box"]: - assert np.equal( - cl_direct_interaction[field], - python_direct_interaction[field] - ).all() + assert np.equal(cl_direct.get(), python_direct).all() # }}} - # {{{ Test count_direct + # {{{ Test process_direct_aggregate start_time = time.time() - cl_count_direct = evaluate( - cl_cost_model.count_direct(xlat_cost, trav), - context=constant_one_params - ) + cl_direct_aggregate = cl_cost_model.process_direct_aggregate(trav_dev, xlat_cost) + queue.finish() logger.info("OpenCL time for count_direct: {0}".format( str(time.time() - start_time) )) + cl_direct_aggregate_num = evaluate( + cl_direct_aggregate, context=constant_one_params + ) + start_time = time.time() - python_count_direct = evaluate( - python_cost_model.count_direct(xlat_cost, trav), - context=constant_one_params + python_direct_aggregate = python_cost_model.process_direct_aggregate( + trav, xlat_cost ) logger.info("Python time for count_direct: {0}".format( str(time.time() - start_time) )) - assert np.equal(cl_count_direct, python_count_direct).all() + python_direct_aggregate_num = evaluate( + python_direct_aggregate, context=constant_one_params + ) + + assert cl_direct_aggregate_num == python_direct_aggregate_num # }}} -- GitLab From de5118be1e0d041d1d2f1f8869ada1642754df3c Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 16 Dec 2018 23:32:01 -0600 Subject: [PATCH 09/50] Add process_list2 --- boxtree/cost.py | 88 +++++++++++++++++++++++++++++++++++++++-- test/test_cost_model.py | 41 +++++++++++++++++-- 2 files changed, 121 insertions(+), 8 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index b1a15ff..6c823fa 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -146,7 +146,7 @@ class CostModel(ABC): @abstractmethod def process_direct(self, traversal, c_p2p): - """Count the number of sources in direct interaction boxes. + """Direct evaluation cost of each target box of *traversal*. :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. :arg c_p2p: calibration constant. @@ -158,20 +158,32 @@ class CostModel(ABC): @abstractmethod def process_direct_aggregate(self, traversal, xlat_cost): - """Count direct evaluations of each target box of *traversal*. + """Direct evaluation cost of all boxes. + :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. :arg xlat_cost: a :class:`TranslationCostModel` object which specifies the translation cost. - :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. :return: a :class:`pymbolic.primitives.Product` object representing the aggregate cost of direct evaluations in all boxes. """ pass + @abstractmethod + def process_list2(self, traversal, m2l_cost): + """ + :param traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. + :param m2l_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` + of shape (nlevels,) representing the translation cost of each level. + :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape + (ntarget_or_target_parent_boxes,), with each entry represents the cost + of multipole-to-local translations to this box. + """ + pass + class CLCostModel(CostModel): """ - Note: For methods in this class, arguments like *traversal* should live on device + Note: For methods in this class, argument *traversal* should live on device memory. """ def __init__(self, queue, translation_cost_model_factory): @@ -180,6 +192,8 @@ class CLCostModel(CostModel): translation_cost_model_factory ) + # {{{ direct evaluation to point targets (lists 1, 3 close, 4 close) + @memoize_method def process_direct_knl(self, particle_id_dtype, box_id_dtype): return ElementwiseKernel( @@ -278,6 +292,59 @@ class CLCostModel(CostModel): result_dev = cl.array.sum(self.process_direct(traversal, 1.0)) return result_dev.get().reshape(-1)[0] * xlat_cost.direct() + # }}} + + # {{{ translate separated siblings' ("list 2") mpoles to local + + @memoize_method + def process_list2_knl(self, box_id_dtype, box_level_dtype): + return ElementwiseKernel( + self.queue.context, + Template(r""" + double *nm2l, + ${box_id_t} *target_or_target_parent_boxes, + ${box_id_t} *from_sep_siblings_starts, + ${box_level_t} *box_levels, + double *m2l_cost + """).render( + box_id_t=dtype_to_ctype(box_id_dtype), + box_level_t=dtype_to_ctype(box_level_dtype) + ), + Template(r""" + ${box_id_t} start = from_sep_siblings_starts[i]; + ${box_id_t} end = from_sep_siblings_starts[i+1]; + ${box_level_t} ilevel = box_levels[target_or_target_parent_boxes[i]]; + + nm2l[i] += (end - start) * m2l_cost[ilevel]; + """).render( + box_id_t=dtype_to_ctype(box_id_dtype), + box_level_t=dtype_to_ctype(box_level_dtype) + ) + ) + + def process_list2(self, traversal, m2l_cost): + tree = traversal.tree + box_id_dtype = tree.box_id_dtype + box_level_dtype = tree.box_level_dtype + + ntarget_or_target_parent_boxes = len(traversal.target_or_target_parent_boxes) + nm2l = cl.array.zeros( + self.queue, (ntarget_or_target_parent_boxes,), dtype=np.float64 + ) + + process_list2_knl = self.process_list2_knl(box_id_dtype, box_level_dtype) + process_list2_knl( + nm2l, + traversal.target_or_target_parent_boxes, + traversal.from_sep_siblings_starts, + tree.box_levels, + m2l_cost + ) + + return nm2l + + # }}} + class PythonCostModel(CostModel): def process_direct(self, traversal, c_p2p): @@ -319,3 +386,16 @@ class PythonCostModel(CostModel): def process_direct_aggregate(self, traversal, xlat_cost): return np.sum(self.process_direct(traversal, 1.0)) * xlat_cost.direct() + + def process_list2(self, traversal, m2l_cost): + tree = traversal.tree + ntarget_or_target_parent_boxes = len(traversal.target_or_target_parent_boxes) + nm2l = np.zeros(ntarget_or_target_parent_boxes, dtype=np.float64) + + for itgt_box, tgt_ibox in enumerate(traversal.target_or_target_parent_boxes): + start, end = traversal.from_sep_siblings_starts[itgt_box:itgt_box+2] + + ilevel = tree.box_levels[tgt_ibox] + nm2l[itgt_box] += m2l_cost[ilevel] * (end - start) + + return nm2l diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 00ebc9b..0d9dea3 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -70,6 +70,8 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): c_p2m=1, c_p2p=1 ) + for ilevel in range(trav.tree.nlevels): + constant_one_params["p_fmm_lev%d" % ilevel] = 1 from boxtree.cost import pde_aware_translation_cost_model xlat_cost = pde_aware_translation_cost_model(dims, trav.tree.nlevels) @@ -84,7 +86,7 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): cl_direct = cl_cost_model.process_direct(trav_dev, 5.0) queue.finish() - logger.info("OpenCL time for collect_direct_interaction_data: {0}".format( + logger.info("OpenCL time for process_direct: {0}".format( str(time.time() - start_time) )) @@ -92,7 +94,7 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): python_direct = python_cost_model.process_direct(trav, 5.0) - logger.info("Python time for collect_direct_interaction_data: {0}".format( + logger.info("Python time for process_direct: {0}".format( str(time.time() - start_time) )) @@ -107,7 +109,7 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): cl_direct_aggregate = cl_cost_model.process_direct_aggregate(trav_dev, xlat_cost) queue.finish() - logger.info("OpenCL time for count_direct: {0}".format( + logger.info("OpenCL time for process_direct_aggregate: {0}".format( str(time.time() - start_time) )) @@ -121,7 +123,7 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): trav, xlat_cost ) - logger.info("Python time for count_direct: {0}".format( + logger.info("Python time for process_direct_aggregate: {0}".format( str(time.time() - start_time) )) @@ -133,6 +135,37 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): # }}} + # {{{ Test process_list2 + + nlevels = trav.tree.nlevels + m2l_cost = np.zeros(nlevels, dtype=np.float64) + for ilevel in range(nlevels): + m2l_cost[ilevel] = evaluate( + xlat_cost.m2l(ilevel, ilevel), + context=constant_one_params + ) + m2l_cost_dev = cl.array.to_device(queue, m2l_cost) + + queue.finish() + start_time = time.time() + + cl_m2l_cost = cl_cost_model.process_list2(trav_dev, m2l_cost_dev) + + queue.finish() + logger.info("OpenCL time for process_list2: {0}".format( + str(time.time() - start_time) + )) + + start_time = time.time() + python_m2l_cost = python_cost_model.process_list2(trav, m2l_cost) + logger.info("Python time for process_list2: {0}".format( + str(time.time() - start_time) + )) + + assert np.equal(cl_m2l_cost.get(), python_m2l_cost).all() + + # }}} + def main(): nsouces = 100000 -- GitLab From 355d06f0791d9357074f1ed5d4d2da08b5b7db38 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Thu, 20 Dec 2018 23:16:58 -0600 Subject: [PATCH 10/50] Use array instead of symbolic for process methods --- boxtree/cost.py | 41 +++++++++++++++++++++-------------------- test/test_cost_model.py | 22 ++++++---------------- 2 files changed, 27 insertions(+), 36 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 6c823fa..cfffe87 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -156,18 +156,6 @@ class CostModel(ABC): """ pass - @abstractmethod - def process_direct_aggregate(self, traversal, xlat_cost): - """Direct evaluation cost of all boxes. - - :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. - :arg xlat_cost: a :class:`TranslationCostModel` object which specifies the - translation cost. - :return: a :class:`pymbolic.primitives.Product` object representing the - aggregate cost of direct evaluations in all boxes. - """ - pass - @abstractmethod def process_list2(self, traversal, m2l_cost): """ @@ -180,6 +168,17 @@ class CostModel(ABC): """ pass + @staticmethod + @abstractmethod + def aggregate(per_box_result): + """ Sum all entries of *per_box_result* into a number. + + :param per_box_result: an object of :class:`numpy.ndarray` or + :class:`pyopencl.array.Array`, the result to be sumed. + :return: a :class:`float`, the result of the sum. + """ + pass + class CLCostModel(CostModel): """ @@ -288,10 +287,6 @@ class CLCostModel(CostModel): return direct_by_itgt_box_dev - def process_direct_aggregate(self, traversal, xlat_cost): - result_dev = cl.array.sum(self.process_direct(traversal, 1.0)) - return result_dev.get().reshape(-1)[0] * xlat_cost.direct() - # }}} # {{{ translate separated siblings' ("list 2") mpoles to local @@ -319,7 +314,8 @@ class CLCostModel(CostModel): """).render( box_id_t=dtype_to_ctype(box_id_dtype), box_level_t=dtype_to_ctype(box_level_dtype) - ) + ), + name="process_list2" ) def process_list2(self, traversal, m2l_cost): @@ -345,6 +341,10 @@ class CLCostModel(CostModel): # }}} + @staticmethod + def aggregate(per_box_result): + return cl.array.sum(per_box_result).get().reshape(-1)[0] + class PythonCostModel(CostModel): def process_direct(self, traversal, c_p2p): @@ -384,9 +384,6 @@ class PythonCostModel(CostModel): return direct_by_itgt_box - def process_direct_aggregate(self, traversal, xlat_cost): - return np.sum(self.process_direct(traversal, 1.0)) * xlat_cost.direct() - def process_list2(self, traversal, m2l_cost): tree = traversal.tree ntarget_or_target_parent_boxes = len(traversal.target_or_target_parent_boxes) @@ -399,3 +396,7 @@ class PythonCostModel(CostModel): nm2l[itgt_box] += m2l_cost[ilevel] * (end - start) return nm2l + + @staticmethod + def aggregate(per_box_result): + return np.sum(per_box_result) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 0d9dea3..e2bbe3b 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -102,36 +102,26 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): # }}} - # {{{ Test process_direct_aggregate + # {{{ Test aggregate start_time = time.time() - cl_direct_aggregate = cl_cost_model.process_direct_aggregate(trav_dev, xlat_cost) + cl_direct_aggregate = cl_cost_model.aggregate(cl_direct) queue.finish() - logger.info("OpenCL time for process_direct_aggregate: {0}".format( + logger.info("OpenCL time for aggregate: {0}".format( str(time.time() - start_time) )) - cl_direct_aggregate_num = evaluate( - cl_direct_aggregate, context=constant_one_params - ) - start_time = time.time() - python_direct_aggregate = python_cost_model.process_direct_aggregate( - trav, xlat_cost - ) + python_direct_aggregate = python_cost_model.aggregate(python_direct) - logger.info("Python time for process_direct_aggregate: {0}".format( + logger.info("Python time for aggregate: {0}".format( str(time.time() - start_time) )) - python_direct_aggregate_num = evaluate( - python_direct_aggregate, context=constant_one_params - ) - - assert cl_direct_aggregate_num == python_direct_aggregate_num + assert cl_direct_aggregate == python_direct_aggregate # }}} -- GitLab From 69c0e2ce4ec2cac9843453b925c29138ecc9f7a9 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sat, 22 Dec 2018 00:40:38 -0600 Subject: [PATCH 11/50] Add process_list3 --- boxtree/cost.py | 84 ++++++++++++++++++++++++++++++++++++++++- test/test_cost_model.py | 30 +++++++++++++++ 2 files changed, 113 insertions(+), 1 deletion(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index cfffe87..bd8236b 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -163,11 +163,24 @@ class CostModel(ABC): :param m2l_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape (nlevels,) representing the translation cost of each level. :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape - (ntarget_or_target_parent_boxes,), with each entry represents the cost + (ntarget_or_target_parent_boxes,), with each entry representing the cost of multipole-to-local translations to this box. """ pass + @abstractmethod + def process_list3(self, traversal, m2p_cost): + """ + :param traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. + :param m2p_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` + of shape (nlevels,) where the ith entry represents the evaluation cost + from multipole expansion at level i to a point. + :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape + (nboxes,), with each entry representing the cost of evaluating all + targets inside this box from multipole expansions of list-3 boxes. + """ + pass + @staticmethod @abstractmethod def aggregate(per_box_result): @@ -341,6 +354,61 @@ class CLCostModel(CostModel): # }}} + # {{{ evaluate sep. smaller mpoles ("list 3") at particles + + @memoize_method + def process_list3_knl(self, box_id_dtype, particle_id_dtype): + return ElementwiseKernel( + self.queue.context, + Template(r""" + ${box_id_t} *target_boxes_sep_smaller, + ${box_id_t} *sep_smaller_start, + ${particle_id_t} *box_target_counts_nonchild, + double m2p_cost_current_level, + double *nm2p + """).render( + box_id_t=dtype_to_ctype(box_id_dtype), + particle_id_t=dtype_to_ctype(particle_id_dtype) + ), + Template(r""" + ${box_id_t} target_box = target_boxes_sep_smaller[i]; + ${box_id_t} start = sep_smaller_start[i]; + ${box_id_t} end = sep_smaller_start[i+1]; + ${particle_id_t} ntargets = box_target_counts_nonchild[target_box]; + nm2p[target_box] += ( + ntargets * (end - start) * m2p_cost_current_level + ); + """).render( + box_id_t=dtype_to_ctype(box_id_dtype), + particle_id_t=dtype_to_ctype(particle_id_dtype) + ), + name="process_list3" + ) + + def process_list3(self, traversal, m2p_cost): + tree = traversal.tree + nm2p = cl.array.zeros(self.queue, tree.nboxes, dtype=np.float64) + + process_list3_knl = self.process_list3_knl( + tree.box_id_dtype, tree.particle_id_dtype + ) + + for ilevel, sep_smaller_list in enumerate( + traversal.from_sep_smaller_by_level): + process_list3_knl( + traversal.target_boxes_sep_smaller_by_source_level[ilevel], + sep_smaller_list.starts, + tree.box_target_counts_nonchild, + m2p_cost[ilevel].get().reshape(-1)[0], + nm2p, + queue=self.queue + ) + self.queue.finish() + + return nm2p + + # }}} + @staticmethod def aggregate(per_box_result): return cl.array.sum(per_box_result).get().reshape(-1)[0] @@ -397,6 +465,20 @@ class PythonCostModel(CostModel): return nm2l + def process_list3(self, traversal, m2p_cost): + tree = traversal.tree + nm2p = np.zeros(tree.nboxes, dtype=np.float64) + + for ilevel, sep_smaller_list in enumerate( + traversal.from_sep_smaller_by_level): + for itgt_box, tgt_ibox in enumerate( + traversal.target_boxes_sep_smaller_by_source_level[ilevel]): + ntargets = tree.box_target_counts_nonchild[tgt_ibox] + start, end = sep_smaller_list.starts[itgt_box:itgt_box + 2] + nm2p[tgt_ibox] += ntargets * (end - start) * m2p_cost[ilevel] + + return nm2p + @staticmethod def aggregate(per_box_result): return np.sum(per_box_result) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index e2bbe3b..201d1aa 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -156,6 +156,36 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): # }}} + # {{{ Test process_list 3 + + m2p_cost = np.zeros(nlevels, dtype=np.float64) + for ilevel in range(nlevels): + m2p_cost[ilevel] = evaluate( + xlat_cost.m2p(ilevel), + context=constant_one_params + ) + m2p_cost_dev = cl.array.to_device(queue, m2p_cost) + + queue.finish() + start_time = time.time() + + cl_m2p_cost = cl_cost_model.process_list3(trav_dev, m2p_cost_dev) + + queue.finish() + logger.info("OpenCL time for process_list3: {0}".format( + str(time.time() - start_time) + )) + + start_time = time.time() + python_m2p_cost = python_cost_model.process_list3(trav, m2p_cost) + logger.info("Python time for process_list3: {0}".format( + str(time.time() - start_time) + )) + + assert np.equal(cl_m2p_cost.get(), python_m2p_cost).all() + + # }}} + def main(): nsouces = 100000 -- GitLab From a47e9ffd06eab9ae195b7805040b4f182adbfb7d Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 23 Dec 2018 00:52:48 -0600 Subject: [PATCH 12/50] Add process_list4 --- boxtree/cost.py | 86 +++++++++++++++++++++++++++++++++++++++++ test/test_cost_model.py | 30 ++++++++++++++ 2 files changed, 116 insertions(+) diff --git a/boxtree/cost.py b/boxtree/cost.py index bd8236b..a750064 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -181,6 +181,19 @@ class CostModel(ABC): """ pass + @abstractmethod + def process_list4(self, traversal, p2l_cost): + """ + :param traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. + :param p2l_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` + of shape (nlevels,) where the ith entry represents the translation cost + from a point to the local expansion at level i. + :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape + (ntarget_or_target_parent_boxes,), with each entry representing the cost + of point-to-local translations to this box. + """ + pass + @staticmethod @abstractmethod def aggregate(per_box_result): @@ -409,6 +422,65 @@ class CLCostModel(CostModel): # }}} + # {{{ form locals for separated bigger source boxes ("list 4") + + @memoize_method + def process_list4_knl(self, box_id_dtype, particle_id_dtype, box_level_dtype): + return ElementwiseKernel( + self.queue.context, + Template(r""" + double *nm2p, + ${box_id_t} *from_sep_bigger_starts, + ${box_id_t} *from_sep_bigger_lists, + ${particle_id_t} *box_source_counts_nonchild, + ${box_level_t} *box_levels, + double *p2l_cost + """).render( + box_id_t=dtype_to_ctype(box_id_dtype), + particle_id_t=dtype_to_ctype(particle_id_dtype), + box_level_t=dtype_to_ctype(box_level_dtype) + ), + Template(r""" + ${box_id_t} start = from_sep_bigger_starts[i]; + ${box_id_t} end = from_sep_bigger_starts[i+1]; + for(${box_id_t} idx=start; idx < end; idx++) { + ${box_id_t} src_ibox = from_sep_bigger_lists[idx]; + ${particle_id_t} nsources = box_source_counts_nonchild[src_ibox]; + ${box_level_t} ilevel = box_levels[src_ibox]; + nm2p[i] += nsources * p2l_cost[ilevel]; + } + """).render( + box_id_t=dtype_to_ctype(box_id_dtype), + particle_id_t=dtype_to_ctype(particle_id_dtype), + box_level_t=dtype_to_ctype(box_level_dtype) + ), + name="process_list4" + ) + + def process_list4(self, traversal, p2l_cost): + tree = traversal.tree + target_or_target_parent_boxes = traversal.target_or_target_parent_boxes + nm2p = cl.array.zeros( + self.queue, len(target_or_target_parent_boxes), dtype=np.float64 + ) + + process_list4_knl = self.process_list4_knl( + tree.box_id_dtype, tree.particle_id_dtype, tree.box_level_dtype + ) + + process_list4_knl( + nm2p, + traversal.from_sep_bigger_starts, + traversal.from_sep_bigger_lists, + tree.box_source_counts_nonchild, + tree.box_levels, + p2l_cost + ) + + return nm2p + + # }}} + @staticmethod def aggregate(per_box_result): return cl.array.sum(per_box_result).get().reshape(-1)[0] @@ -479,6 +551,20 @@ class PythonCostModel(CostModel): return nm2p + def process_list4(self, traversal, p2l_cost): + tree = traversal.tree + target_or_target_parent_boxes = traversal.target_or_target_parent_boxes + nm2p = np.zeros(len(target_or_target_parent_boxes), dtype=np.float64) + + for itgt_box in range(len(target_or_target_parent_boxes)): + start, end = traversal.from_sep_bigger_starts[itgt_box:itgt_box+2] + for src_ibox in traversal.from_sep_bigger_lists[start:end]: + nsources = tree.box_source_counts_nonchild[src_ibox] + ilevel = tree.box_levels[src_ibox] + nm2p[itgt_box] += nsources * p2l_cost[ilevel] + + return nm2p + @staticmethod def aggregate(per_box_result): return np.sum(per_box_result) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 201d1aa..2ef1af3 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -186,6 +186,36 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): # }}} + # {{{ Test process list 4 + + p2l_cost = np.zeros(nlevels, dtype=np.float64) + for ilevel in range(nlevels): + p2l_cost[ilevel] = evaluate( + xlat_cost.p2l(ilevel), + context=constant_one_params + ) + p2l_cost_dev = cl.array.to_device(queue, p2l_cost) + + queue.finish() + start_time = time.time() + + cl_p2l_cost = cl_cost_model.process_list4(trav_dev, p2l_cost_dev) + + queue.finish() + logger.info("OpenCL time for process_list4: {0}".format( + str(time.time() - start_time) + )) + + start_time = time.time() + python_p2l_cost = python_cost_model.process_list4(trav, p2l_cost) + logger.info("Python time for process_list4: {0}".format( + str(time.time() - start_time) + )) + + assert np.equal(cl_p2l_cost.get(), python_p2l_cost).all() + + # }}} + def main(): nsouces = 100000 -- GitLab From 44fa08eaf1c3e8c12474bc5aab50bb771e512694 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Fri, 28 Dec 2018 06:42:53 +0800 Subject: [PATCH 13/50] Add process_eval_locals --- boxtree/cost.py | 93 ++++++++++++++++++++++++++++++++++++++++- test/test_cost_model.py | 32 +++++++++++++- 2 files changed, 122 insertions(+), 3 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index a750064..3f10cbe 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -151,8 +151,7 @@ class CostModel(ABC): :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. :arg c_p2p: calibration constant. :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape - (traversal.ntarget_boxes,), with each entry represents the cost of the - box. + (ntarget_boxes,), with each entry represents the cost of the box. """ pass @@ -194,6 +193,23 @@ class CostModel(ABC): """ pass + @abstractmethod + def process_eval_locals(self, traversal, l2p_cost, + box_target_counts_nonchild=None): + """ + :param traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. + :param l2p_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` + of shape (nlevels,) where the ith entry represents the cost of evaluating + the potential of a target in a box of level i using the box's local + expansion. + :param box_target_counts_nonchild: if None, use + traversal.tree.box_target_counts_nonchild. + :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape + (ntarget_boxes,), the cost of evaluating the potentials of all targets + inside this box from its local expansion. + """ + pass + @staticmethod @abstractmethod def aggregate(per_box_result): @@ -481,6 +497,61 @@ class CLCostModel(CostModel): # }}} + # {{{ evaluate local expansions at targets + + @memoize_method + def process_eval_locals_knl(self, box_id_dtype, particle_id_dtype, + box_level_dtype): + return ElementwiseKernel( + self.queue.context, + Template(r""" + double *neval_locals, + ${box_id_t} *target_boxes, + ${particle_id_t} *box_target_counts_nonchild, + ${box_level_t} *box_levels, + double *l2p_cost + """).render( + box_id_t=dtype_to_ctype(box_id_dtype), + particle_id_t=dtype_to_ctype(particle_id_dtype), + box_level_t=dtype_to_ctype(box_level_dtype) + ), + Template(r""" + ${box_id_t} box_idx = target_boxes[i]; + ${particle_id_t} ntargets = box_target_counts_nonchild[box_idx]; + ${box_level_t} ilevel = box_levels[box_idx]; + neval_locals[i] += ntargets * l2p_cost[ilevel]; + """).render( + box_id_t=dtype_to_ctype(box_id_dtype), + particle_id_t=dtype_to_ctype(particle_id_dtype), + box_level_t=dtype_to_ctype(box_level_dtype) + ), + name="process_eval_locals" + ) + + def process_eval_locals(self, traversal, l2p_cost, + box_target_counts_nonchild=None): + tree = traversal.tree + ntarget_boxes = len(traversal.target_boxes) + neval_locals = cl.array.zeros(self.queue, ntarget_boxes, dtype=np.float64) + if box_target_counts_nonchild is None: + box_target_counts_nonchild = traversal.tree.box_target_counts_nonchild + + process_eval_locals_knl = self.process_eval_locals_knl( + tree.box_id_dtype, tree.particle_id_dtype, tree.box_level_dtype + ) + + process_eval_locals_knl( + neval_locals, + traversal.target_boxes, + box_target_counts_nonchild, + tree.box_levels, + l2p_cost + ) + + return neval_locals + + # }}} + @staticmethod def aggregate(per_box_result): return cl.array.sum(per_box_result).get().reshape(-1)[0] @@ -565,6 +636,24 @@ class PythonCostModel(CostModel): return nm2p + def process_eval_locals(self, traversal, l2p_cost, + box_target_counts_nonchild=None): + tree = traversal.tree + ntarget_boxes = len(traversal.target_boxes) + neval_locals = np.zeros(ntarget_boxes, dtype=np.float64) + if box_target_counts_nonchild is None: + box_target_counts_nonchild = tree.box_target_counts_nonchild + + for target_lev in range(tree.nlevels): + start, stop = traversal.level_start_target_box_nrs[ + target_lev:target_lev+2] + for itgt_box, tgt_ibox in enumerate( + traversal.target_boxes[start:stop], start): + neval_locals[itgt_box] += (box_target_counts_nonchild[tgt_ibox] + * l2p_cost[target_lev]) + + return neval_locals + @staticmethod def aggregate(per_box_result): return np.sum(per_box_result) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 2ef1af3..d56e81a 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -186,7 +186,7 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): # }}} - # {{{ Test process list 4 + # {{{ Test process_list4 p2l_cost = np.zeros(nlevels, dtype=np.float64) for ilevel in range(nlevels): @@ -216,6 +216,36 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): # }}} + # {{{ Test process_eval_locals + + l2p_cost = np.zeros(nlevels, dtype=np.float64) + for ilevel in range(nlevels): + l2p_cost[ilevel] = evaluate( + xlat_cost.l2p(ilevel), + context=constant_one_params + ) + l2p_cost_dev = cl.array.to_device(queue, l2p_cost) + + queue.finish() + start_time = time.time() + + cl_l2p_const = cl_cost_model.process_eval_locals(trav_dev, l2p_cost_dev) + + queue.finish() + logger.info("OpenCL time for process_eval_locals: {0}".format( + str(time.time() - start_time) + )) + + start_time = time.time() + python_l2p_cost = python_cost_model.process_eval_locals(trav, l2p_cost) + logger.info("Python time for process_eval_locals: {0}".format( + str(time.time() - start_time) + )) + + assert np.equal(cl_l2p_const.get(), python_l2p_cost).all() + + # }}} + def main(): nsouces = 100000 -- GitLab From 14e0880f8c524ae178b085da5843c4e6cfcbe6fd Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 31 Dec 2018 12:11:05 +0800 Subject: [PATCH 14/50] Add estimate_calibration_params (training) --- boxtree/cost.py | 172 +++++++++++++++++++++++++++++++++++++--- test/test_cost_model.py | 100 +++++++++++++++++++++-- 2 files changed, 255 insertions(+), 17 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 3f10cbe..ce79fcf 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -33,7 +33,7 @@ from pyopencl.elementwise import ElementwiseKernel from pyopencl.tools import dtype_to_ctype from mako.template import Template from functools import partial -from pymbolic import var +from pymbolic import var, evaluate from pytools import memoize_method import sys @@ -158,8 +158,8 @@ class CostModel(ABC): @abstractmethod def process_list2(self, traversal, m2l_cost): """ - :param traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. - :param m2l_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` + :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. + :arg m2l_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape (nlevels,) representing the translation cost of each level. :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape (ntarget_or_target_parent_boxes,), with each entry representing the cost @@ -170,8 +170,8 @@ class CostModel(ABC): @abstractmethod def process_list3(self, traversal, m2p_cost): """ - :param traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. - :param m2p_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` + :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. + :arg m2p_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape (nlevels,) where the ith entry represents the evaluation cost from multipole expansion at level i to a point. :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape @@ -183,8 +183,8 @@ class CostModel(ABC): @abstractmethod def process_list4(self, traversal, p2l_cost): """ - :param traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. - :param p2l_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` + :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. + :arg p2l_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape (nlevels,) where the ith entry represents the translation cost from a point to the local expansion at level i. :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape @@ -197,12 +197,12 @@ class CostModel(ABC): def process_eval_locals(self, traversal, l2p_cost, box_target_counts_nonchild=None): """ - :param traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. - :param l2p_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` + :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. + :arg l2p_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape (nlevels,) where the ith entry represents the cost of evaluating the potential of a target in a box of level i using the box's local expansion. - :param box_target_counts_nonchild: if None, use + :arg box_target_counts_nonchild: if None, use traversal.tree.box_target_counts_nonchild. :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape (ntarget_boxes,), the cost of evaluating the potentials of all targets @@ -213,14 +213,148 @@ class CostModel(ABC): @staticmethod @abstractmethod def aggregate(per_box_result): - """ Sum all entries of *per_box_result* into a number. + """Sum all entries of *per_box_result* into a number. - :param per_box_result: an object of :class:`numpy.ndarray` or + :arg per_box_result: an object of :class:`numpy.ndarray` or :class:`pyopencl.array.Array`, the result to be sumed. :return: a :class:`float`, the result of the sum. """ pass + def translation_cost_from_model(self, nlevels, xlat_cost, context): + """Evaluate translation cost from model. The result of this function can be + used for process_* methods in this class. + + :arg nlevels: the number of tree levels. + :arg xlat_cost: a :class:`TranslationCostModel`. + :arg context: a :class:`dict` of parameters passed as context when + evaluating symbolic expressions in *xlat_cost*. + :return: a :class:`dict`, the translation cost of each step in FMM. + """ + return { + "c_p2p": evaluate(xlat_cost.direct(), context=context), + "m2l_cost": np.array([ + evaluate(xlat_cost.m2l(ilevel, ilevel), context=context) + for ilevel in range(nlevels) + ], dtype=np.float64), + "m2p_cost": np.array([ + evaluate(xlat_cost.m2p(ilevel), context=context) + for ilevel in range(nlevels) + ], dtype=np.float64), + "p2l_cost": np.array([ + evaluate(xlat_cost.p2l(ilevel), context=context) + for ilevel in range(nlevels) + ], dtype=np.float64), + "l2p_cost": np.array([ + evaluate(xlat_cost.l2p(ilevel), context=context) + for ilevel in range(nlevels) + ], dtype=np.float64) + } + + def estimate_calibration_params(self, traversals, level_to_orders, + timing_results): + """ + :arg traversals: a :class:`list` of + :class:`boxtree.traversal.FMMTraversalInfo` objects. Note each traversal + object can reside on host or device depending on cost counting + implemenation instead of the expansion wrangler used for executing. + :arg level_to_orders: a :class:`list` of the same length as *traversals*. + Each entry is a :class:`numpy.ndarray` representing the expansion order + of different levels. + :arg timing_results: a :class:`list` of the same length as *traversals*. + Each entry is a :class:`dict` filled with timing data returned by + *boxtree.fmm.drive_fmm* + :return: a :class:`dict` of calibration parameters. + """ + nresults = len(traversals) + assert len(level_to_orders) == nresults + assert len(timing_results) == nresults + + _FMM_STAGE_TO_CALIBRATION_PARAMETER = { + "form_multipoles": "c_p2m", + "coarsen_multipoles": "c_m2m", + "eval_direct": "c_p2p", + "multipole_to_local": "c_m2l", + "eval_multipoles": "c_m2p", + "form_locals": "c_p2l", + "refine_locals": "c_l2l", + "eval_locals": "c_l2p" + } + + params = set(_FMM_STAGE_TO_CALIBRATION_PARAMETER.values()) + + uncalibrated_times = {} + actual_times = {} + + for param in params: + uncalibrated_times[param] = np.zeros(nresults) + actual_times[param] = np.zeros(nresults) + + for icase, traversal in enumerate(traversals): + tree = traversal.tree + + xlat_cost = self.translation_cost_model_factory( + tree.dimensions, tree.nlevels + ) + + training_ctx = dict( + c_l2l=1, + c_l2p=1, + c_m2l=1, + c_m2m=1, + c_m2p=1, + c_p2l=1, + c_p2m=1, + c_p2p=1 + ) + for ilevel in range(tree.nlevels): + training_ctx["p_fmm_lev%d" % ilevel] = level_to_orders[icase][ilevel] + + translation_cost = self.translation_cost_from_model( + tree.nlevels, xlat_cost, training_ctx + ) + + uncalibrated_times["c_p2p"][icase] = self.aggregate( + self.process_direct(traversal, translation_cost["c_p2p"]) + ) + + uncalibrated_times["c_m2l"][icase] = self.aggregate( + self.process_list2(traversal, translation_cost["m2l_cost"]) + ) + + uncalibrated_times["c_m2p"][icase] = self.aggregate( + self.process_list3(traversal, translation_cost["m2p_cost"]) + ) + + uncalibrated_times["c_p2l"][icase] = self.aggregate( + self.process_list4(traversal, translation_cost["p2l_cost"]) + ) + + uncalibrated_times["c_l2p"][icase] = self.aggregate( + self.process_eval_locals(traversal, translation_cost["l2p_cost"]) + ) + + for icase, timing_result in enumerate(timing_results): + for param, time in timing_result.items(): + calibration_param = ( + _FMM_STAGE_TO_CALIBRATION_PARAMETER[param]) + actual_times[calibration_param][icase] = time["process_elapsed"] + + result = {} + + for param in params: + uncalibrated = uncalibrated_times[param] + actual = actual_times[param] + + if np.allclose(uncalibrated, 0): + result[param] = float("NaN") + continue + + result[param] = ( + actual.dot(uncalibrated) / uncalibrated.dot(uncalibrated)) + + return result + class CLCostModel(CostModel): """ @@ -556,6 +690,20 @@ class CLCostModel(CostModel): def aggregate(per_box_result): return cl.array.sum(per_box_result).get().reshape(-1)[0] + def translation_cost_from_model(self, nlevels, xlat_cost, context): + translation_costs = super(CLCostModel, self).translation_cost_from_model( + nlevels, xlat_cost, context + ) + + for name in translation_costs: + if not isinstance(translation_costs[name], np.ndarray): + continue + translation_costs[name] = cl.array.to_device( + self.queue, translation_costs[name] + ) + + return translation_costs + class PythonCostModel(CostModel): def process_direct(self, traversal, c_p2p): diff --git a/test/test_cost_model.py b/test/test_cost_model.py index d56e81a..f18e678 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -6,6 +6,8 @@ import pytest from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) from pymbolic import evaluate +from boxtree.cost import CLCostModel, PythonCostModel +from boxtree.cost import pde_aware_translation_cost_model import logging import os @@ -56,7 +58,6 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): # {{{ Construct cost models - from boxtree.cost import CLCostModel, PythonCostModel cl_cost_model = CLCostModel(queue, None) python_cost_model = PythonCostModel(None) @@ -71,9 +72,8 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): c_p2p=1 ) for ilevel in range(trav.tree.nlevels): - constant_one_params["p_fmm_lev%d" % ilevel] = 1 + constant_one_params["p_fmm_lev%d" % ilevel] = 10 - from boxtree.cost import pde_aware_translation_cost_model xlat_cost = pde_aware_translation_cost_model(dims, trav.tree.nlevels) # }}} @@ -229,7 +229,7 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): queue.finish() start_time = time.time() - cl_l2p_const = cl_cost_model.process_eval_locals(trav_dev, l2p_cost_dev) + cl_l2p_cost = cl_cost_model.process_eval_locals(trav_dev, l2p_cost_dev) queue.finish() logger.info("OpenCL time for process_eval_locals: {0}".format( @@ -242,11 +242,100 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): str(time.time() - start_time) )) - assert np.equal(cl_l2p_const.get(), python_l2p_cost).all() + assert np.equal(cl_l2p_cost.get(), python_l2p_cost).all() # }}} +@pytest.mark.opencl +def test_estimate_calibration_params(ctx_factory): + from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler + + nsources_list = [1000, 2000, 3000, 4000] + ntargets_list = [1000, 2000, 3000, 4000] + dims = 3 + dtype = np.float64 + + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + traversals = [] + traversals_dev = [] + level_to_orders = [] + timing_results = [] + + def fmm_level_to_nterms(tree, ilevel): + return 10 + + for nsources, ntargets in zip(nsources_list, ntargets_list): + # {{{ Generate sources, targets and target_radii + + from boxtree.tools import make_normal_particle_array as p_normal + sources = p_normal(queue, nsources, dims, dtype, seed=15) + targets = p_normal(queue, ntargets, dims, dtype, seed=18) + + from pyopencl.clrandom import PhiloxGenerator + rng = PhiloxGenerator(queue.context, seed=22) + target_radii = rng.uniform( + queue, ntargets, a=0, b=0.05, dtype=dtype + ).get() + + # }}} + + # {{{ Generate tree and traversal + + from boxtree import TreeBuilder + tb = TreeBuilder(ctx) + tree, _ = tb( + queue, sources, targets=targets, target_radii=target_radii, + stick_out_factor=0.15, max_particles_in_box=30, debug=True + ) + + from boxtree.traversal import FMMTraversalBuilder + tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) + trav_dev, _ = tg(queue, tree, debug=True) + trav = trav_dev.get(queue=queue) + + traversals.append(trav) + traversals_dev.append(trav_dev) + + # }}} + + wrangler = FMMLibExpansionWrangler(trav.tree, 0, fmm_level_to_nterms) + level_to_orders.append(wrangler.level_nterms) + + timing_data = {} + from boxtree.fmm import drive_fmm + src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype) + drive_fmm(trav, wrangler, src_weights, timing_data=timing_data) + + timing_results.append(timing_data) + + def test_params_sanity(test_params): + param_names = ["c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2p"] + for name in param_names: + assert isinstance(test_params[name], np.float64) + + def test_params_equal(test_params1, test_params2): + param_names = ["c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2p"] + for name in param_names: + assert test_params1[name] == test_params2[name] + + python_cost_model = PythonCostModel(pde_aware_translation_cost_model) + python_params = python_cost_model.estimate_calibration_params( + traversals, level_to_orders, timing_results + ) + test_params_sanity(python_params) + + cl_cost_model = CLCostModel(queue, pde_aware_translation_cost_model) + cl_params = cl_cost_model.estimate_calibration_params( + traversals_dev, level_to_orders, timing_results + ) + test_params_sanity(cl_params) + + test_params_equal(cl_params, python_params) + + def main(): nsouces = 100000 ntargets = 100000 @@ -255,6 +344,7 @@ def main(): ctx_factory = cl.create_some_context test_cost_counter(ctx_factory, nsouces, ntargets, ndims, dtype) + test_estimate_calibration_params(ctx_factory) if __name__ == "__main__": -- GitLab From a554726b8eecae119e2ad12fa1b1608411a615b1 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 31 Dec 2018 21:58:04 +0800 Subject: [PATCH 15/50] Use wall time instead of processor time for Python 2 --- boxtree/cost.py | 10 ++++++++-- test/test_cost_model.py | 14 ++++++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index ce79fcf..ed13e89 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -252,7 +252,7 @@ class CostModel(ABC): } def estimate_calibration_params(self, traversals, level_to_orders, - timing_results): + timing_results, wall_time=False): """ :arg traversals: a :class:`list` of :class:`boxtree.traversal.FMMTraversalInfo` objects. Note each traversal @@ -264,6 +264,7 @@ class CostModel(ABC): :arg timing_results: a :class:`list` of the same length as *traversals*. Each entry is a :class:`dict` filled with timing data returned by *boxtree.fmm.drive_fmm* + :arg wall_time: a :class:`bool`, whether to use wall time or processor time. :return: a :class:`dict` of calibration parameters. """ nresults = len(traversals) @@ -334,11 +335,16 @@ class CostModel(ABC): self.process_eval_locals(traversal, translation_cost["l2p_cost"]) ) + if wall_time: + field = "wall_elapsed" + else: + field = "process_elapsed" + for icase, timing_result in enumerate(timing_results): for param, time in timing_result.items(): calibration_param = ( _FMM_STAGE_TO_CALIBRATION_PARAMETER[param]) - actual_times[calibration_param][icase] = time["process_elapsed"] + actual_times[calibration_param][icase] = time[field] result = {} diff --git a/test/test_cost_model.py b/test/test_cost_model.py index f18e678..d30ad83 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -8,6 +8,7 @@ from pyopencl.tools import ( # noqa from pymbolic import evaluate from boxtree.cost import CLCostModel, PythonCostModel from boxtree.cost import pde_aware_translation_cost_model +import sys import logging import os @@ -311,6 +312,11 @@ def test_estimate_calibration_params(ctx_factory): timing_results.append(timing_data) + if sys.version_info >= (3, 0): + wall_time = False + else: + wall_time = True + def test_params_sanity(test_params): param_names = ["c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2p"] for name in param_names: @@ -323,17 +329,18 @@ def test_estimate_calibration_params(ctx_factory): python_cost_model = PythonCostModel(pde_aware_translation_cost_model) python_params = python_cost_model.estimate_calibration_params( - traversals, level_to_orders, timing_results + traversals, level_to_orders, timing_results, wall_time=wall_time ) test_params_sanity(python_params) cl_cost_model = CLCostModel(queue, pde_aware_translation_cost_model) cl_params = cl_cost_model.estimate_calibration_params( - traversals_dev, level_to_orders, timing_results + traversals_dev, level_to_orders, timing_results, wall_time=wall_time ) test_params_sanity(cl_params) - test_params_equal(cl_params, python_params) + if sys.version_info >= (3, 0): + test_params_equal(cl_params, python_params) def main(): @@ -348,7 +355,6 @@ def main(): if __name__ == "__main__": - import sys if len(sys.argv) > 1: exec(sys.argv[1]) else: -- GitLab From 112282f7fa0c21fb05cb486b58bdd349ef40d628 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Tue, 1 Jan 2019 22:39:00 +0800 Subject: [PATCH 16/50] Add process_form_multipoles --- boxtree/cost.py | 79 +++++++++++++++++++++++++++++++++++++++++ test/test_cost_model.py | 37 +++++++++++++++++++ 2 files changed, 116 insertions(+) diff --git a/boxtree/cost.py b/boxtree/cost.py index ed13e89..ea3da7e 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -144,6 +144,19 @@ class CostModel(ABC): """ self.translation_cost_model_factory = translation_cost_model_factory + @abstractmethod + def process_form_multipoles(self, traversal, p2m_cost): + """Cost for forming multipole expansions of each box. + + :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. + :arg p2m_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` + of shape (nlevels,) representing the cost of forming the multipole + expansion of one source at each level. + :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape + (nsource_boxes,), with each entry represents the cost of the box. + """ + pass + @abstractmethod def process_direct(self, traversal, c_p2p): """Direct evaluation cost of each target box of *traversal*. @@ -373,6 +386,59 @@ class CLCostModel(CostModel): translation_cost_model_factory ) + # {{{ form multipoles + + @memoize_method + def process_form_multipoles_knl(self, box_id_dtype, particle_id_dtype, + box_level_dtype): + return ElementwiseKernel( + self.queue.context, + Template(r""" + double *np2m, + ${box_id_t} *source_boxes, + ${particle_id_t} *box_source_counts_nonchild, + ${box_level_t} *box_levels, + double *p2m_cost + """).render( + box_id_t=dtype_to_ctype(box_id_dtype), + particle_id_t=dtype_to_ctype(particle_id_dtype), + box_level_t=dtype_to_ctype(box_level_dtype) + ), + Template(r""" + ${box_id_t} box_idx = source_boxes[i]; + ${particle_id_t} nsources = box_source_counts_nonchild[box_idx]; + ${box_level_t} ilevel = box_levels[box_idx]; + np2m[i] = nsources * p2m_cost[ilevel]; + """).render( + box_id_t=dtype_to_ctype(box_id_dtype), + particle_id_t=dtype_to_ctype(particle_id_dtype), + box_level_t=dtype_to_ctype(box_level_dtype) + ), + name="process_form_multipoles" + ) + + def process_form_multipoles(self, traversal, p2m_cost): + tree = traversal.tree + np2m = cl.array.zeros( + self.queue, len(traversal.source_boxes), dtype=np.float64 + ) + + process_form_multipoles_knl = self.process_form_multipoles_knl( + tree.box_id_dtype, tree.particle_id_dtype, tree.box_level_dtype + ) + + process_form_multipoles_knl( + np2m, + traversal.source_boxes, + tree.box_source_counts_nonchild, + tree.box_levels, + p2m_cost + ) + + return np2m + + # }}} + # {{{ direct evaluation to point targets (lists 1, 3 close, 4 close) @memoize_method @@ -712,6 +778,19 @@ class CLCostModel(CostModel): class PythonCostModel(CostModel): + def process_form_multipoles(self, traversal, p2m_cost): + tree = traversal.tree + np2m = np.zeros(len(traversal.source_boxes), dtype=np.float64) + + for ilevel in range(tree.nlevels): + start, stop = traversal.level_start_source_box_nrs[ilevel:ilevel + 2] + for isrc_box, src_ibox in enumerate( + traversal.source_boxes[start:stop], start): + nsources = tree.box_source_counts_nonchild[src_ibox] + np2m[isrc_box] = nsources * p2m_cost[ilevel] + + return np2m + def process_direct(self, traversal, c_p2p): tree = traversal.tree ntarget_boxes = len(traversal.target_boxes) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index d30ad83..7509f24 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -79,6 +79,43 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): # }}} + # {{{ Test process_form_multipoles + + nlevels = trav.tree.nlevels + p2m_cost = np.zeros(nlevels, dtype=np.float64) + for ilevel in range(nlevels): + p2m_cost[ilevel] = evaluate( + xlat_cost.p2m(ilevel), + context=constant_one_params + ) + p2m_cost_dev = cl.array.to_device(queue, p2m_cost) + + queue.finish() + start_time = time.time() + + cl_form_multipoles = cl_cost_model.process_form_multipoles( + trav_dev, p2m_cost_dev + ) + + queue.finish() + logger.info("OpenCL time for process_form_multipoles: {0}".format( + str(time.time() - start_time) + )) + + start_time = time.time() + + python_form_multipoles = python_cost_model.process_form_multipoles( + trav, p2m_cost + ) + + logger.info("Python time for process_form_multipoles: {0}".format( + str(time.time() - start_time) + )) + + assert np.equal(cl_form_multipoles.get(), python_form_multipoles).all() + + # }}} + # {{{ Test process_direct queue.finish() -- GitLab From 287e7ffecc1eb6e8f06438ff07feca763357b674 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Wed, 2 Jan 2019 20:49:05 +0800 Subject: [PATCH 17/50] Estimate the cost of new traversal (evaluating) --- boxtree/cost.py | 69 +++++++++++++++++++++++++++++++++++++++-- test/test_cost_model.py | 13 ++++++++ 2 files changed, 79 insertions(+), 3 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index ea3da7e..52f27c3 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -35,6 +35,7 @@ from mako.template import Template from functools import partial from pymbolic import var, evaluate from pytools import memoize_method +from collections import OrderedDict import sys if sys.version_info >= (3, 0): @@ -136,13 +137,18 @@ def taylor_translation_cost_model(dim, nlevels): class CostModel(ABC): - def __init__(self, translation_cost_model_factory): + def __init__(self, + translation_cost_model_factory=pde_aware_translation_cost_model, + calibration_params=None): """ :arg translation_cost_model_factory: a function, which takes tree dimension and the number of tree levels as arguments, returns an object of :class:`TranslationCostModel`. """ self.translation_cost_model_factory = translation_cost_model_factory + if calibration_params is None: + calibration_params = dict() + self.calibration_params = calibration_params @abstractmethod def process_form_multipoles(self, traversal, p2m_cost): @@ -245,6 +251,10 @@ class CostModel(ABC): :return: a :class:`dict`, the translation cost of each step in FMM. """ return { + "p2m_cost": np.array([ + evaluate(xlat_cost.p2m(ilevel), context=context) + for ilevel in range(nlevels) + ], dtype=np.float64), "c_p2p": evaluate(xlat_cost.direct(), context=context), "m2l_cost": np.array([ evaluate(xlat_cost.m2l(ilevel, ilevel), context=context) @@ -374,16 +384,69 @@ class CostModel(ABC): return result + def __call__(self, traversal, level_to_order, params): + """Predict cost of a new traversal object. + + :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. + :arg level_to_order: a :class:`numpy.ndarray` of shape + (traversal.tree.nlevels,) representing the expansion orders + of different levels. + :arg params: the calibration parameters returned by + *estimate_calibration_params*. + :return: a :class:`dict`, the cost of fmm stages. + """ + tree = traversal.tree + result = OrderedDict() + + for ilevel in range(tree.nlevels): + params["p_fmm_lev%d" % ilevel] = level_to_order[ilevel] + + xlat_cost = self.translation_cost_model_factory( + tree.dimensions, tree.nlevels + ) + + translation_cost = self.translation_cost_from_model( + tree.nlevels, xlat_cost, params + ) + + result["form_multipoles"] = self.process_form_multipoles( + traversal, translation_cost["p2m_cost"] + ) + + result["eval_direct"] = self.process_direct( + traversal, translation_cost["c_p2p"] + ) + + result["multipole_to_local"] = self.process_list2( + traversal, translation_cost["m2l_cost"] + ) + + result["eval_multipoles"] = self.process_list3( + traversal, translation_cost["m2p_cost"] + ) + + result["form_locals"] = self.process_list4( + traversal, translation_cost["p2l_cost"] + ) + + result["eval_locals"] = self.process_eval_locals( + traversal, translation_cost["l2p_cost"] + ) + + return result + class CLCostModel(CostModel): """ Note: For methods in this class, argument *traversal* should live on device memory. """ - def __init__(self, queue, translation_cost_model_factory): + def __init__(self, queue, + translation_cost_model_factory=pde_aware_translation_cost_model, + calibration_params=None): self.queue = queue super(CLCostModel, self).__init__( - translation_cost_model_factory + translation_cost_model_factory, calibration_params ) # {{{ form multipoles diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 7509f24..1748f40 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -379,6 +379,19 @@ def test_estimate_calibration_params(ctx_factory): if sys.version_info >= (3, 0): test_params_equal(cl_params, python_params) + cl_predicted_time = cl_cost_model( + traversals_dev[2], level_to_orders[2], cl_params + ) + + for field in ["form_multipoles", "eval_direct", "multipole_to_local", + "eval_multipoles", "form_locals", "eval_locals"]: + logger.info("predicted time for {0}: {1}".format( + field, str(cl_cost_model.aggregate(cl_predicted_time[field])) + )) + logger.info("actual time for {0}: {1}".format( + field, str(timing_results[2][field]["process_elapsed"]) + )) + def main(): nsouces = 100000 -- GitLab From d03bf034e38b3f274987008eec1a60b38ceaed16 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Fri, 4 Jan 2019 07:38:12 +0800 Subject: [PATCH 18/50] Add process_coarsen_multipoles and fix test cases --- boxtree/cost.py | 114 +++++++++++++++++++++++++++++++++++++++- test/test_cost_model.py | 53 ++++++++++++++++--- 2 files changed, 157 insertions(+), 10 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 52f27c3..5ea83ef 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -35,7 +35,6 @@ from mako.template import Template from functools import partial from pymbolic import var, evaluate from pytools import memoize_method -from collections import OrderedDict import sys if sys.version_info >= (3, 0): @@ -163,6 +162,20 @@ class CostModel(ABC): """ pass + @abstractmethod + def process_coarsen_multipoles(self, traversal, m2m_cost): + """Cost for upward propagation. + + :param traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. + :param m2m_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` + of shape (nlevels, nlevels), where the (i,j) entry represents the + multipole-to-multipole cost from source level i to target level j. + :return: a :class:`float`, the overall cost of upward propagation. + + Note: This method returns a number instead of an array. + """ + pass + @abstractmethod def process_direct(self, traversal, c_p2p): """Direct evaluation cost of each target box of *traversal*. @@ -396,7 +409,7 @@ class CostModel(ABC): :return: a :class:`dict`, the cost of fmm stages. """ tree = traversal.tree - result = OrderedDict() + result = {} for ilevel in range(tree.nlevels): params["p_fmm_lev%d" % ilevel] = level_to_order[ilevel] @@ -502,6 +515,77 @@ class CLCostModel(CostModel): # }}} + # {{{ propagate multipoles upward + + def process_coarsen_multipoles_knl(self, ndimensions, box_id_dtype, + box_level_dtype, nlevels): + return ElementwiseKernel( + self.queue.context, + Template(r""" + ${box_id_t} *source_parent_boxes, + ${box_level_t} *box_levels, + double *m2m_cost, + double *nm2m, + % for i in range(2**ndimensions): + % if i == 2**ndimensions - 1: + ${box_id_t} *box_child_ids_${i} + % else: + ${box_id_t} *box_child_ids_${i}, + % endif + % endfor + """).render( + ndimensions=ndimensions, + box_id_t=dtype_to_ctype(box_id_dtype), + box_level_t=dtype_to_ctype(box_level_dtype) + ), + Template(r""" + ${box_id_t} box_idx = source_parent_boxes[i]; + ${box_level_t} target_level = box_levels[box_idx]; + if(target_level <= 1) { + nm2m[i] = 0.0; + } else { + ${box_level_t} source_level = target_level + 1; + int nchild = 0; + % for i in range(2**ndimensions): + if(box_child_ids_${i}[box_idx]) + nchild += 1; + % endfor + nm2m[i] = nchild * m2m_cost[ + source_level * ${nlevels} + target_level + ]; + } + """).render( + ndimensions=ndimensions, + box_id_t=dtype_to_ctype(box_id_dtype), + box_level_t=dtype_to_ctype(box_level_dtype), + nlevels=nlevels + ), + name="process_coarsen_multipoles" + ) + + def process_coarsen_multipoles(self, traversal, m2m_cost): + tree = traversal.tree + nm2m = cl.array.zeros( + self.queue, len(traversal.source_parent_boxes), dtype=np.float64 + ) + + process_coarsen_multipoles_knl = self.process_coarsen_multipoles_knl( + tree.dimensions, tree.box_id_dtype, tree.box_level_dtype, tree.nlevels + ) + + process_coarsen_multipoles_knl( + traversal.source_parent_boxes, + tree.box_levels, + m2m_cost, + nm2m, + *tree.box_child_ids, + queue=self.queue + ) + + return self.aggregate(nm2m) + + # }}} + # {{{ direct evaluation to point targets (lists 1, 3 close, 4 close) @memoize_method @@ -950,6 +1034,32 @@ class PythonCostModel(CostModel): return neval_locals + def process_coarsen_multipoles(self, traversal, m2m_cost): + tree = traversal.tree + result = 0.0 + + # nlevels-1 is the last valid level index + # nlevels-2 is the last valid level that could have children + # + # 3 is the last relevant source_level. + # 2 is the last relevant target_level. + # (because no level 1 box will be well-separated from another) + for source_level in range(tree.nlevels-1, 2, -1): + target_level = source_level - 1 + cost = m2m_cost[source_level, target_level] + + nmultipoles = 0 + start, stop = traversal.level_start_source_parent_box_nrs[ + target_level:target_level+2] + for ibox in traversal.source_parent_boxes[start:stop]: + for child in tree.box_child_ids[:, ibox]: + if child: + nmultipoles += 1 + + result += cost * nmultipoles + + return result + @staticmethod def aggregate(per_box_result): return np.sum(per_box_result) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 1748f40..b3ec6cf 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -116,6 +116,42 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): # }}} + # {{{ Test process_coarsen_multipoles + + m2m_cost = np.zeros((nlevels, nlevels), dtype=np.float64) + for source_level in range(nlevels): + for target_level in range(nlevels): + m2m_cost[source_level, target_level] = evaluate( + xlat_cost.m2m(source_level, target_level), + context=constant_one_params + ) + m2m_cost_dev = cl.array.to_device(queue, m2m_cost) + + queue.finish() + start_time = time.time() + cl_coarsen_multipoles = cl_cost_model.process_coarsen_multipoles( + trav_dev, m2m_cost_dev + ) + + queue.finish() + logger.info("OpenCL time for coarsen_multipoles: {0}".format( + str(time.time() - start_time) + )) + + start_time = time.time() + + python_coarsen_multipoles = python_cost_model.process_coarsen_multipoles( + trav, m2m_cost + ) + + logger.info("Python time for coarsen_multipoles: {0}".format( + str(time.time() - start_time) + )) + + assert cl_coarsen_multipoles == python_coarsen_multipoles + + # }}} + # {{{ Test process_direct queue.finish() @@ -383,14 +419,15 @@ def test_estimate_calibration_params(ctx_factory): traversals_dev[2], level_to_orders[2], cl_params ) - for field in ["form_multipoles", "eval_direct", "multipole_to_local", - "eval_multipoles", "form_locals", "eval_locals"]: - logger.info("predicted time for {0}: {1}".format( - field, str(cl_cost_model.aggregate(cl_predicted_time[field])) - )) - logger.info("actual time for {0}: {1}".format( - field, str(timing_results[2][field]["process_elapsed"]) - )) + if sys.version_info >= (3, 0): + for field in ["form_multipoles", "eval_direct", "multipole_to_local", + "eval_multipoles", "form_locals", "eval_locals"]: + logger.info("predicted time for {0}: {1}".format( + field, str(cl_cost_model.aggregate(cl_predicted_time[field])) + )) + logger.info("actual time for {0}: {1}".format( + field, str(timing_results[2][field]["process_elapsed"]) + )) def main(): -- GitLab From 983456ef11c0eb70c25b8c921622c9f04a03fb2e Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sat, 5 Jan 2019 17:23:29 +0800 Subject: [PATCH 19/50] Add process_refine_locals --- boxtree/cost.py | 72 +++++++++++++++++++++++++++++++++++++++-- test/test_cost_model.py | 34 +++++++++++++++++++ 2 files changed, 104 insertions(+), 2 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 5ea83ef..a8f7d07 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -166,8 +166,8 @@ class CostModel(ABC): def process_coarsen_multipoles(self, traversal, m2m_cost): """Cost for upward propagation. - :param traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. - :param m2m_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` + :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. + :arg m2m_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape (nlevels, nlevels), where the (i,j) entry represents the multipole-to-multipole cost from source level i to target level j. :return: a :class:`float`, the overall cost of upward propagation. @@ -242,6 +242,20 @@ class CostModel(ABC): """ pass + @abstractmethod + def process_refine_locals(self, traversal, l2l_cost): + """Cost of downward propagation. + + :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. + :arg l2l_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` + of shape (nlevels-1,), where the ith entry represents the cost of + tranlating local expansion from level i to level i+1. + :return: a :class:`float`, the overall cost of downward propagation. + + Note: this method returns a number instead of an array. + """ + pass + @staticmethod @abstractmethod def aggregate(per_box_result): @@ -905,6 +919,48 @@ class CLCostModel(CostModel): # }}} + # {{{ propogate locals downward + + @memoize_method + def process_refine_locals_knl(self, box_id_dtype): + from pyopencl.reduction import ReductionKernel + return ReductionKernel( + self.queue.context, + np.float64, + neutral="0.0", + reduce_expr="a+b", + map_expr=r""" + (level_start_target_or_target_parent_box_nrs[i + 1] + - level_start_target_or_target_parent_box_nrs[i]) + * l2l_cost[i - 1] + """, + arguments=Template(r""" + ${box_id_t} *level_start_target_or_target_parent_box_nrs, + double *l2l_cost + """).render( + box_id_t=dtype_to_ctype(box_id_dtype) + ), + name="process_refine_locals" + ) + + def process_refine_locals(self, traversal, l2l_cost): + tree = traversal.tree + process_refine_locals_knl = self.process_refine_locals_knl(tree.box_id_dtype) + + level_start_target_or_target_parent_box_nrs = cl.array.to_device( + self.queue, traversal.level_start_target_or_target_parent_box_nrs + ) + + cost = process_refine_locals_knl( + level_start_target_or_target_parent_box_nrs, + l2l_cost, + range=range(1, tree.nlevels) + ).get() + + return cost + + # }}} + @staticmethod def aggregate(per_box_result): return cl.array.sum(per_box_result).get().reshape(-1)[0] @@ -1060,6 +1116,18 @@ class PythonCostModel(CostModel): return result + def process_refine_locals(self, traversal, l2l_cost): + tree = traversal.tree + result = 0.0 + + for target_lev in range(1, tree.nlevels): + start, stop = traversal.level_start_target_or_target_parent_box_nrs[ + target_lev:target_lev+2] + source_lev = target_lev - 1 + result += (stop-start) * l2l_cost[source_lev] + + return result + @staticmethod def aggregate(per_box_result): return np.sum(per_box_result) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index b3ec6cf..3c93b1d 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -290,6 +290,40 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): # }}} + # {{{ Test process_refine_locals + + l2l_cost = np.zeros(nlevels - 1, dtype=np.float64) + for ilevel in range(nlevels - 1): + l2l_cost[ilevel] = evaluate( + xlat_cost.l2l(ilevel, ilevel + 1), + context=constant_one_params + ) + l2l_cost_dev = cl.array.to_device(queue, l2l_cost) + + queue.finish() + start_time = time.time() + + cl_refine_locals_cost = cl_cost_model.process_refine_locals( + trav_dev, l2l_cost_dev + ) + + queue.finish() + logger.info("OpenCL time for refine_locals: {0}".format( + str(time.time() - start_time) + )) + + start_time = time.time() + python_refine_locals_cost = python_cost_model.process_refine_locals( + trav, l2l_cost + ) + logger.info("Python time for refine_locals: {0}".format( + str(time.time() - start_time) + )) + + assert cl_refine_locals_cost == python_refine_locals_cost + + # }}} + # {{{ Test process_eval_locals l2p_cost = np.zeros(nlevels, dtype=np.float64) -- GitLab From e1eeff4a004031c4274c87a71a2573b47d4d5999 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sat, 5 Jan 2019 18:15:03 +0800 Subject: [PATCH 20/50] Fix for python 2 --- boxtree/cost.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index a8f7d07..3248a56 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -954,7 +954,7 @@ class CLCostModel(CostModel): cost = process_refine_locals_knl( level_start_target_or_target_parent_box_nrs, l2l_cost, - range=range(1, tree.nlevels) + range=slice(1, tree.nlevels) ).get() return cost -- GitLab From cdf82be69b54753c6af3f760ef4d9419491c3cdb Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 6 Jan 2019 13:13:31 +0800 Subject: [PATCH 21/50] Add coarsen_multipoles and refine_locals to training and evaluation --- boxtree/cost.py | 39 ++++++++++++++++++++++++++++++++------- test/test_cost_model.py | 21 ++++++++++++++------- 2 files changed, 46 insertions(+), 14 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 3248a56..39b762f 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -168,8 +168,8 @@ class CostModel(ABC): :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. :arg m2m_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` - of shape (nlevels, nlevels), where the (i,j) entry represents the - multipole-to-multipole cost from source level i to target level j. + of shape (nlevels-1,), where the ith entry represents the + multipole-to-multipole cost from source level i+1 to target level i. :return: a :class:`float`, the overall cost of upward propagation. Note: This method returns a number instead of an array. @@ -282,6 +282,10 @@ class CostModel(ABC): evaluate(xlat_cost.p2m(ilevel), context=context) for ilevel in range(nlevels) ], dtype=np.float64), + "m2m_cost": np.array([ + evaluate(xlat_cost.m2m(ilevel+1, ilevel), context=context) + for ilevel in range(nlevels-1) + ], dtype=np.float64), "c_p2p": evaluate(xlat_cost.direct(), context=context), "m2l_cost": np.array([ evaluate(xlat_cost.m2l(ilevel, ilevel), context=context) @@ -295,6 +299,10 @@ class CostModel(ABC): evaluate(xlat_cost.p2l(ilevel), context=context) for ilevel in range(nlevels) ], dtype=np.float64), + "l2l_cost": np.array([ + evaluate(xlat_cost.l2l(ilevel, ilevel+1), context=context) + for ilevel in range(nlevels-1) + ], dtype=np.float64), "l2p_cost": np.array([ evaluate(xlat_cost.l2p(ilevel), context=context) for ilevel in range(nlevels) @@ -365,6 +373,14 @@ class CostModel(ABC): tree.nlevels, xlat_cost, training_ctx ) + uncalibrated_times["c_p2m"][icase] = self.aggregate( + self.process_form_multipoles(traversal, translation_cost["p2m_cost"]) + ) + + uncalibrated_times["c_m2m"][icase] = self.process_coarsen_multipoles( + traversal, translation_cost["m2m_cost"] + ) + uncalibrated_times["c_p2p"][icase] = self.aggregate( self.process_direct(traversal, translation_cost["c_p2p"]) ) @@ -381,6 +397,10 @@ class CostModel(ABC): self.process_list4(traversal, translation_cost["p2l_cost"]) ) + uncalibrated_times["c_l2l"][icase] = self.process_refine_locals( + traversal, translation_cost["l2l_cost"] + ) + uncalibrated_times["c_l2p"][icase] = self.aggregate( self.process_eval_locals(traversal, translation_cost["l2p_cost"]) ) @@ -440,6 +460,10 @@ class CostModel(ABC): traversal, translation_cost["p2m_cost"] ) + result["coarsen_multipoles"] = self.process_coarsen_multipoles( + traversal, translation_cost["m2m_cost"] + ) + result["eval_direct"] = self.process_direct( traversal, translation_cost["c_p2p"] ) @@ -456,6 +480,10 @@ class CostModel(ABC): traversal, translation_cost["p2l_cost"] ) + result["refine_locals"] = self.process_refine_locals( + traversal, translation_cost["l2l_cost"] + ) + result["eval_locals"] = self.process_eval_locals( traversal, translation_cost["l2p_cost"] ) @@ -558,15 +586,12 @@ class CLCostModel(CostModel): if(target_level <= 1) { nm2m[i] = 0.0; } else { - ${box_level_t} source_level = target_level + 1; int nchild = 0; % for i in range(2**ndimensions): if(box_child_ids_${i}[box_idx]) nchild += 1; % endfor - nm2m[i] = nchild * m2m_cost[ - source_level * ${nlevels} + target_level - ]; + nm2m[i] = nchild * m2m_cost[target_level]; } """).render( ndimensions=ndimensions, @@ -1102,7 +1127,7 @@ class PythonCostModel(CostModel): # (because no level 1 box will be well-separated from another) for source_level in range(tree.nlevels-1, 2, -1): target_level = source_level - 1 - cost = m2m_cost[source_level, target_level] + cost = m2m_cost[target_level] nmultipoles = 0 start, stop = traversal.level_start_source_parent_box_nrs[ diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 3c93b1d..06cb24d 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -118,13 +118,12 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): # {{{ Test process_coarsen_multipoles - m2m_cost = np.zeros((nlevels, nlevels), dtype=np.float64) - for source_level in range(nlevels): - for target_level in range(nlevels): - m2m_cost[source_level, target_level] = evaluate( - xlat_cost.m2m(source_level, target_level), - context=constant_one_params - ) + m2m_cost = np.zeros(nlevels - 1, dtype=np.float64) + for target_level in range(nlevels - 1): + m2m_cost[target_level] = evaluate( + xlat_cost.m2m(target_level + 1, target_level), + context=constant_one_params + ) m2m_cost_dev = cl.array.to_device(queue, m2m_cost) queue.finish() @@ -463,6 +462,14 @@ def test_estimate_calibration_params(ctx_factory): field, str(timing_results[2][field]["process_elapsed"]) )) + for field in ["coarsen_multipoles", "refine_locals"]: + logger.info("predicted time for {0}: {1}".format( + field, str(cl_predicted_time[field]) + )) + logger.info("actual time for {0}: {1}".format( + field, str(timing_results[2][field]["process_elapsed"]) + )) + def main(): nsouces = 100000 -- GitLab From 0649c58cdb8a808ab4c0f28bfb4f53fb556edea6 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Wed, 16 Jan 2019 23:06:57 -0600 Subject: [PATCH 22/50] Test cost model correctness using constone wrangler --- boxtree/cost.py | 12 ++++-- test/test_cost_model.py | 96 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 3 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 39b762f..01afc8d 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -982,13 +982,16 @@ class CLCostModel(CostModel): range=slice(1, tree.nlevels) ).get() - return cost + return cost.reshape(-1)[0] # }}} @staticmethod def aggregate(per_box_result): - return cl.array.sum(per_box_result).get().reshape(-1)[0] + if isinstance(per_box_result, float): + return per_box_result + else: + return cl.array.sum(per_box_result).get().reshape(-1)[0] def translation_cost_from_model(self, nlevels, xlat_cost, context): translation_costs = super(CLCostModel, self).translation_cost_from_model( @@ -1155,4 +1158,7 @@ class PythonCostModel(CostModel): @staticmethod def aggregate(per_box_result): - return np.sum(per_box_result) + if isinstance(per_box_result, float): + return per_box_result + else: + return np.sum(per_box_result) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 06cb24d..3061692 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -471,6 +471,101 @@ def test_estimate_calibration_params(ctx_factory): )) +class OpCountingTranslationCostModel(object): + """A translation cost model which assigns at cost of 1 to each operation.""" + + def __init__(self, dim, nlevels): + pass + + @staticmethod + def direct(): + return 1 + + @staticmethod + def p2l(level): + return 1 + + l2p = p2l + p2m = p2l + m2p = p2l + + @staticmethod + def m2m(src_level, tgt_level): + return 1 + + l2l = m2m + m2l = m2m + + +@pytest.mark.opencl +@pytest.mark.parametrize( + ("nsources", "ntargets", "dims", "dtype"), [ + (5000, 5000, 3, np.float64) + ] +) +def test_cost_model_correctness(ctx_factory, nsources, ntargets, dims, dtype): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + from boxtree.tools import make_normal_particle_array as p_normal + sources = p_normal(queue, nsources, dims, dtype, seed=16) + targets = p_normal(queue, ntargets, dims, dtype, seed=19) + + from pyopencl.clrandom import PhiloxGenerator + rng = PhiloxGenerator(queue.context, seed=20) + target_radii = rng.uniform( + queue, ntargets, a=0, b=0.04, dtype=dtype + ).get() + + from boxtree import TreeBuilder + tb = TreeBuilder(ctx) + tree, _ = tb( + queue, sources, targets=targets, target_radii=target_radii, + stick_out_factor=0.15, max_particles_in_box=30, debug=True + ) + + from boxtree.traversal import FMMTraversalBuilder + tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) + trav_dev, _ = tg(queue, tree, debug=True) + trav = trav_dev.get(queue=queue) + + from boxtree.tools import ConstantOneExpansionWrangler + wrangler = ConstantOneExpansionWrangler(trav.tree) + + timing_data = {} + from boxtree.fmm import drive_fmm + src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype) + drive_fmm(trav, wrangler, src_weights, timing_data=timing_data) + + cost_model = CLCostModel( + queue, + translation_cost_model_factory=OpCountingTranslationCostModel + ) + + params = { + "c_p2m": 1.0, + "c_m2m": 1.0, + "c_p2p": 1.0, + "c_m2l": 1.0, + "c_m2p": 1.0, + "c_p2l": 1.0, + "c_l2l": 1.0, + "c_l2p": 1.0 + } + + level_to_order = np.array([1 for _ in range(tree.nlevels)]) + modeled_time = cost_model(trav_dev, level_to_order, params) + + mismatches = [] + for stage in timing_data: + if (timing_data[stage]["ops_elapsed"] + != cost_model.aggregate(modeled_time[stage])): + mismatches.append( + (stage, timing_data[stage]["ops_elapsed"], modeled_time[stage])) + + assert not mismatches, "\n".join(str(s) for s in mismatches) + + def main(): nsouces = 100000 ntargets = 100000 @@ -480,6 +575,7 @@ def main(): test_cost_counter(ctx_factory, nsouces, ntargets, ndims, dtype) test_estimate_calibration_params(ctx_factory) + test_cost_model_correctness(ctx_factory, nsouces, ntargets, ndims, dtype) if __name__ == "__main__": -- GitLab From 567618b795cedf1cad5004a1e8bcdd819d0717f3 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Wed, 16 Jan 2019 23:24:58 -0600 Subject: [PATCH 23/50] Evaluate cost model on new traversal object --- test/test_cost_model.py | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 3061692..21797a1 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -20,7 +20,7 @@ logger.setLevel(logging.INFO) @pytest.mark.opencl @pytest.mark.parametrize( ("nsources", "ntargets", "dims", "dtype"), [ - (5000, 5000, 3, np.float64) + (50000, 50000, 3, np.float64) ] ) def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): @@ -358,8 +358,8 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): def test_estimate_calibration_params(ctx_factory): from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler - nsources_list = [1000, 2000, 3000, 4000] - ntargets_list = [1000, 2000, 3000, 4000] + nsources_list = [1000, 2000, 3000, 4000, 5000] + ntargets_list = [1000, 2000, 3000, 4000, 5000] dims = 3 dtype = np.float64 @@ -435,13 +435,15 @@ def test_estimate_calibration_params(ctx_factory): python_cost_model = PythonCostModel(pde_aware_translation_cost_model) python_params = python_cost_model.estimate_calibration_params( - traversals, level_to_orders, timing_results, wall_time=wall_time + traversals[:-1], level_to_orders[:-1], timing_results[:-1], + wall_time=wall_time ) test_params_sanity(python_params) cl_cost_model = CLCostModel(queue, pde_aware_translation_cost_model) cl_params = cl_cost_model.estimate_calibration_params( - traversals_dev, level_to_orders, timing_results, wall_time=wall_time + traversals_dev[:-1], level_to_orders[:-1], timing_results[:-1], + wall_time=wall_time ) test_params_sanity(cl_params) @@ -449,25 +451,18 @@ def test_estimate_calibration_params(ctx_factory): test_params_equal(cl_params, python_params) cl_predicted_time = cl_cost_model( - traversals_dev[2], level_to_orders[2], cl_params + traversals_dev[-1], level_to_orders[-1], cl_params ) if sys.version_info >= (3, 0): for field in ["form_multipoles", "eval_direct", "multipole_to_local", - "eval_multipoles", "form_locals", "eval_locals"]: + "eval_multipoles", "form_locals", "eval_locals", + "coarsen_multipoles", "refine_locals"]: logger.info("predicted time for {0}: {1}".format( field, str(cl_cost_model.aggregate(cl_predicted_time[field])) )) logger.info("actual time for {0}: {1}".format( - field, str(timing_results[2][field]["process_elapsed"]) - )) - - for field in ["coarsen_multipoles", "refine_locals"]: - logger.info("predicted time for {0}: {1}".format( - field, str(cl_predicted_time[field]) - )) - logger.info("actual time for {0}: {1}".format( - field, str(timing_results[2][field]["process_elapsed"]) + field, str(timing_results[-1][field]["process_elapsed"]) )) -- GitLab From 22953978412bf0d32249738d40ae5e06a311ddcd Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sat, 19 Jan 2019 03:22:02 -0600 Subject: [PATCH 24/50] Address suggestions from @inducer and @mattwala --- boxtree/cost.py | 63 +++++++++++++++++++++++++---------------- test/test_cost_model.py | 45 +++++++++++++---------------- 2 files changed, 57 insertions(+), 51 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 01afc8d..99ae340 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -1,3 +1,14 @@ +""" +This module helps predict the running time of each step of FMM. There are two +implementations of the interface :class`AbstractFMMCostModel`, namely +:class:`CLFMMCostModel` using OpenCL and :class:`PythonFMMCostModel` using pure +Python. + +An implementation of :class:`AbstractFMMCostModel` uses a +:class:`TranslationCostModel` to assign translation costs to a +:class:`FMMTraversalInfo` object. +""" + from __future__ import division, absolute_import __copyright__ = """ @@ -49,7 +60,7 @@ else: ABC = ABCMeta('ABC', (), {}) -class TranslationCostModel: +class TranslationCostModel(object): """Provides modeled costs for individual translations or evaluations.""" def __init__(self, ncoeffs_fmm_by_level, uses_point_and_shoot): @@ -135,19 +146,15 @@ def taylor_translation_cost_model(dim, nlevels): # }}} -class CostModel(ABC): +class AbstractFMMCostModel(ABC): def __init__(self, - translation_cost_model_factory=pde_aware_translation_cost_model, - calibration_params=None): + translation_cost_model_factory=pde_aware_translation_cost_model): """ :arg translation_cost_model_factory: a function, which takes tree dimension and the number of tree levels as arguments, returns an object of :class:`TranslationCostModel`. """ self.translation_cost_model_factory = translation_cost_model_factory - if calibration_params is None: - calibration_params = dict() - self.calibration_params = calibration_params @abstractmethod def process_form_multipoles(self, traversal, p2m_cost): @@ -172,7 +179,9 @@ class CostModel(ABC): multipole-to-multipole cost from source level i+1 to target level i. :return: a :class:`float`, the overall cost of upward propagation. - Note: This method returns a number instead of an array. + .. note:: This method returns a number instead of an array, because it is not + immediate clear how per-box cost of upward propagation will be useful for + distributed load balancing. """ pass @@ -252,7 +261,9 @@ class CostModel(ABC): tranlating local expansion from level i to level i+1. :return: a :class:`float`, the overall cost of downward propagation. - Note: this method returns a number instead of an array. + .. note:: This method returns a number instead of an array, because it is not + immediate clear how per-box cost of downward propagation will be useful + for distributed load balancing. """ pass @@ -267,9 +278,9 @@ class CostModel(ABC): """ pass - def translation_cost_from_model(self, nlevels, xlat_cost, context): - """Evaluate translation cost from model. The result of this function can be - used for process_* methods in this class. + def cost_factors_for_kernels_from_model(self, nlevels, xlat_cost, context): + """Evaluate translation cost factors from symbolic model. The result of this + function can be used for process_* methods in this class. :arg nlevels: the number of tree levels. :arg xlat_cost: a :class:`TranslationCostModel`. @@ -369,7 +380,7 @@ class CostModel(ABC): for ilevel in range(tree.nlevels): training_ctx["p_fmm_lev%d" % ilevel] = level_to_orders[icase][ilevel] - translation_cost = self.translation_cost_from_model( + translation_cost = self.cost_factors_for_kernels_from_model( tree.nlevels, xlat_cost, training_ctx ) @@ -452,7 +463,7 @@ class CostModel(ABC): tree.dimensions, tree.nlevels ) - translation_cost = self.translation_cost_from_model( + translation_cost = self.cost_factors_for_kernels_from_model( tree.nlevels, xlat_cost, params ) @@ -491,18 +502,15 @@ class CostModel(ABC): return result -class CLCostModel(CostModel): +class CLFMMCostModel(AbstractFMMCostModel): """ Note: For methods in this class, argument *traversal* should live on device memory. """ def __init__(self, queue, - translation_cost_model_factory=pde_aware_translation_cost_model, - calibration_params=None): + translation_cost_model_factory=pde_aware_translation_cost_model): self.queue = queue - super(CLCostModel, self).__init__( - translation_cost_model_factory, calibration_params - ) + super(CLFMMCostModel, self).__init__(translation_cost_model_factory) # {{{ form multipoles @@ -559,6 +567,7 @@ class CLCostModel(CostModel): # {{{ propagate multipoles upward + @memoize_method def process_coarsen_multipoles_knl(self, ndimensions, box_id_dtype, box_level_dtype, nlevels): return ElementwiseKernel( @@ -697,6 +706,7 @@ class CLCostModel(CostModel): # List 3 close if traversal.from_sep_close_smaller_starts is not None: + self.queue.finish() # Avoid potential race condition count_direct_interaction_knl( direct_by_itgt_box_dev, traversal.from_sep_close_smaller_starts, @@ -709,6 +719,7 @@ class CLCostModel(CostModel): # List 4 close if traversal.from_sep_close_bigger_starts is not None: + self.queue.finish() # Avoid potential race condition count_direct_interaction_knl( direct_by_itgt_box_dev, traversal.from_sep_close_bigger_starts, @@ -744,7 +755,7 @@ class CLCostModel(CostModel): ${box_id_t} end = from_sep_siblings_starts[i+1]; ${box_level_t} ilevel = box_levels[target_or_target_parent_boxes[i]]; - nm2l[i] += (end - start) * m2l_cost[ilevel]; + nm2l[i] = (end - start) * m2l_cost[ilevel]; """).render( box_id_t=dtype_to_ctype(box_id_dtype), box_level_t=dtype_to_ctype(box_level_dtype) @@ -911,7 +922,7 @@ class CLCostModel(CostModel): ${box_id_t} box_idx = target_boxes[i]; ${particle_id_t} ntargets = box_target_counts_nonchild[box_idx]; ${box_level_t} ilevel = box_levels[box_idx]; - neval_locals[i] += ntargets * l2p_cost[ilevel]; + neval_locals[i] = ntargets * l2p_cost[ilevel]; """).render( box_id_t=dtype_to_ctype(box_id_dtype), particle_id_t=dtype_to_ctype(particle_id_dtype), @@ -993,8 +1004,10 @@ class CLCostModel(CostModel): else: return cl.array.sum(per_box_result).get().reshape(-1)[0] - def translation_cost_from_model(self, nlevels, xlat_cost, context): - translation_costs = super(CLCostModel, self).translation_cost_from_model( + def cost_factors_for_kernels_from_model(self, nlevels, xlat_cost, context): + translation_costs = super( + CLFMMCostModel, self + ).cost_factors_for_kernels_from_model( nlevels, xlat_cost, context ) @@ -1008,7 +1021,7 @@ class CLCostModel(CostModel): return translation_costs -class PythonCostModel(CostModel): +class PythonFMMCostModel(AbstractFMMCostModel): def process_form_multipoles(self, traversal, p2m_cost): tree = traversal.tree np2m = np.zeros(len(traversal.source_boxes), dtype=np.float64) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 21797a1..d0e5bac 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -6,7 +6,7 @@ import pytest from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) from pymbolic import evaluate -from boxtree.cost import CLCostModel, PythonCostModel +from boxtree.cost import CLFMMCostModel, PythonFMMCostModel from boxtree.cost import pde_aware_translation_cost_model import sys @@ -23,7 +23,7 @@ logger.setLevel(logging.INFO) (50000, 50000, 3, np.float64) ] ) -def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): +def test_compare_cl_and_py_cost_model(ctx_factory, nsources, ntargets, dims, dtype): ctx = ctx_factory() queue = cl.CommandQueue(ctx) @@ -59,8 +59,8 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): # {{{ Construct cost models - cl_cost_model = CLCostModel(queue, None) - python_cost_model = PythonCostModel(None) + cl_cost_model = CLFMMCostModel(queue, None) + python_cost_model = PythonFMMCostModel(None) constant_one_params = dict( c_l2l=1, @@ -112,7 +112,7 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): str(time.time() - start_time) )) - assert np.equal(cl_form_multipoles.get(), python_form_multipoles).all() + assert np.array_equal(cl_form_multipoles.get(), python_form_multipoles) # }}} @@ -171,7 +171,7 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): str(time.time() - start_time) )) - assert np.equal(cl_direct.get(), python_direct).all() + assert np.array_equal(cl_direct.get(), python_direct) # }}} @@ -225,7 +225,7 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): str(time.time() - start_time) )) - assert np.equal(cl_m2l_cost.get(), python_m2l_cost).all() + assert np.array_equal(cl_m2l_cost.get(), python_m2l_cost) # }}} @@ -255,7 +255,7 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): str(time.time() - start_time) )) - assert np.equal(cl_m2p_cost.get(), python_m2p_cost).all() + assert np.array_equal(cl_m2p_cost.get(), python_m2p_cost) # }}} @@ -285,7 +285,7 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): str(time.time() - start_time) )) - assert np.equal(cl_p2l_cost.get(), python_p2l_cost).all() + assert np.array_equal(cl_p2l_cost.get(), python_p2l_cost) # }}} @@ -349,7 +349,7 @@ def test_cost_counter(ctx_factory, nsources, ntargets, dims, dtype): str(time.time() - start_time) )) - assert np.equal(cl_l2p_cost.get(), python_l2p_cost).all() + assert np.array_equal(cl_l2p_cost.get(), python_l2p_cost) # }}} @@ -433,14 +433,14 @@ def test_estimate_calibration_params(ctx_factory): for name in param_names: assert test_params1[name] == test_params2[name] - python_cost_model = PythonCostModel(pde_aware_translation_cost_model) + python_cost_model = PythonFMMCostModel(pde_aware_translation_cost_model) python_params = python_cost_model.estimate_calibration_params( traversals[:-1], level_to_orders[:-1], timing_results[:-1], wall_time=wall_time ) test_params_sanity(python_params) - cl_cost_model = CLCostModel(queue, pde_aware_translation_cost_model) + cl_cost_model = CLFMMCostModel(queue, pde_aware_translation_cost_model) cl_params = cl_cost_model.estimate_calibration_params( traversals_dev[:-1], level_to_orders[:-1], timing_results[:-1], wall_time=wall_time @@ -498,7 +498,8 @@ class OpCountingTranslationCostModel(object): (5000, 5000, 3, np.float64) ] ) -def test_cost_model_correctness(ctx_factory, nsources, ntargets, dims, dtype): +def test_cost_model_gives_correct_op_counts_with_constantone_wrangler( + ctx_factory, nsources, ntargets, dims, dtype): ctx = ctx_factory() queue = cl.CommandQueue(ctx) @@ -532,7 +533,7 @@ def test_cost_model_correctness(ctx_factory, nsources, ntargets, dims, dtype): src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype) drive_fmm(trav, wrangler, src_weights, timing_data=timing_data) - cost_model = CLCostModel( + cost_model = CLFMMCostModel( queue, translation_cost_model_factory=OpCountingTranslationCostModel ) @@ -561,20 +562,12 @@ def test_cost_model_correctness(ctx_factory, nsources, ntargets, dims, dtype): assert not mismatches, "\n".join(str(s) for s in mismatches) -def main(): - nsouces = 100000 - ntargets = 100000 - ndims = 3 - dtype = np.float64 - ctx_factory = cl.create_some_context - - test_cost_counter(ctx_factory, nsouces, ntargets, ndims, dtype) - test_estimate_calibration_params(ctx_factory) - test_cost_model_correctness(ctx_factory, nsouces, ntargets, ndims, dtype) - +# You can test individual routines by typing +# $ python test_cost_model.py 'test_routine(cl.create_some_context)' if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) else: - main() + from pytest import main + main([__file__]) -- GitLab From cada3c1cffd7515b2ea8c5a951b4cae8b5c83c7f Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 21 Jan 2019 17:31:51 -0600 Subject: [PATCH 25/50] Move demo test case into example folder --- .gitlab-ci.yml | 14 ++++++ boxtree/cost.py | 2 +- examples/cost_model.py | 102 ++++++++++++++++++++++++++++++++++++++++ test/test_cost_model.py | 25 +++------- 4 files changed, 123 insertions(+), 20 deletions(-) create mode 100644 examples/cost_model.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5e60636..4431550 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -80,6 +80,20 @@ Python 3.6 POCL: reports: junit: test/pytest.xml +Python 3.6 POCL Examples: + script: + - export PY_EXE=python3.6 + - export PYOPENCL_TEST=portable + - export EXTRA_INSTALL="pybind11 numpy mako matplotlib" + - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-py-project-and-run-examples.sh + - ". ./build-py-project-and-run-examples.sh" + tags: + - python3.6 + - pocl + except: + - tags + + Documentation: script: - EXTRA_INSTALL="pybind11 numpy mako" diff --git a/boxtree/cost.py b/boxtree/cost.py index 99ae340..c63ef92 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -504,7 +504,7 @@ class AbstractFMMCostModel(ABC): class CLFMMCostModel(AbstractFMMCostModel): """ - Note: For methods in this class, argument *traversal* should live on device + .. note:: For methods in this class, argument *traversal* should live on device memory. """ def __init__(self, queue, diff --git a/examples/cost_model.py b/examples/cost_model.py new file mode 100644 index 0000000..c4532a5 --- /dev/null +++ b/examples/cost_model.py @@ -0,0 +1,102 @@ +import numpy as np +import pyopencl as cl +import sys + +import logging +import os +logging.basicConfig(level=os.environ.get("LOGLEVEL", "WARNING")) +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +def demo_cost_model(): + from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler + + nsources_list = [1000, 2000, 3000, 4000, 5000] + ntargets_list = [1000, 2000, 3000, 4000, 5000] + dims = 3 + dtype = np.float64 + + ctx = cl.create_some_context() + queue = cl.CommandQueue(ctx) + + traversals = [] + traversals_dev = [] + level_to_orders = [] + timing_results = [] + + def fmm_level_to_nterms(tree, ilevel): + return 10 + + for nsources, ntargets in zip(nsources_list, ntargets_list): + # {{{ Generate sources, targets and target_radii + + from boxtree.tools import make_normal_particle_array as p_normal + sources = p_normal(queue, nsources, dims, dtype, seed=15) + targets = p_normal(queue, ntargets, dims, dtype, seed=18) + + from pyopencl.clrandom import PhiloxGenerator + rng = PhiloxGenerator(queue.context, seed=22) + target_radii = rng.uniform( + queue, ntargets, a=0, b=0.05, dtype=dtype + ).get() + + # }}} + + # {{{ Generate tree and traversal + + from boxtree import TreeBuilder + tb = TreeBuilder(ctx) + tree, _ = tb( + queue, sources, targets=targets, target_radii=target_radii, + stick_out_factor=0.15, max_particles_in_box=30, debug=True + ) + + from boxtree.traversal import FMMTraversalBuilder + tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) + trav_dev, _ = tg(queue, tree, debug=True) + trav = trav_dev.get(queue=queue) + + traversals.append(trav) + traversals_dev.append(trav_dev) + + # }}} + + wrangler = FMMLibExpansionWrangler(trav.tree, 0, fmm_level_to_nterms) + level_to_orders.append(wrangler.level_nterms) + + timing_data = {} + from boxtree.fmm import drive_fmm + src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype) + drive_fmm(trav, wrangler, src_weights, timing_data=timing_data) + + timing_results.append(timing_data) + + assert sys.version_info >= (3, 0) + wall_time = False + + from boxtree.cost import CLFMMCostModel + from boxtree.cost import pde_aware_translation_cost_model + cl_cost_model = CLFMMCostModel(queue, pde_aware_translation_cost_model) + cl_params = cl_cost_model.estimate_calibration_params( + traversals_dev[:-1], level_to_orders[:-1], timing_results[:-1], + wall_time=wall_time + ) + + cl_predicted_time = cl_cost_model( + traversals_dev[-1], level_to_orders[-1], cl_params + ) + + for field in ["form_multipoles", "eval_direct", "multipole_to_local", + "eval_multipoles", "form_locals", "eval_locals", + "coarsen_multipoles", "refine_locals"]: + logger.info("predicted time for {0}: {1}".format( + field, str(cl_cost_model.aggregate(cl_predicted_time[field])) + )) + logger.info("actual time for {0}: {1}".format( + field, str(timing_results[-1][field]["process_elapsed"]) + )) + + +if __name__ == '__main__': + demo_cost_model() diff --git a/test/test_cost_model.py b/test/test_cost_model.py index d0e5bac..74084da 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -358,8 +358,8 @@ def test_compare_cl_and_py_cost_model(ctx_factory, nsources, ntargets, dims, dty def test_estimate_calibration_params(ctx_factory): from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler - nsources_list = [1000, 2000, 3000, 4000, 5000] - ntargets_list = [1000, 2000, 3000, 4000, 5000] + nsources_list = [1000, 2000, 3000, 4000] + ntargets_list = [1000, 2000, 3000, 4000] dims = 3 dtype = np.float64 @@ -424,12 +424,14 @@ def test_estimate_calibration_params(ctx_factory): wall_time = True def test_params_sanity(test_params): - param_names = ["c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2p"] + param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l", + "c_l2p"] for name in param_names: assert isinstance(test_params[name], np.float64) def test_params_equal(test_params1, test_params2): - param_names = ["c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2p"] + param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l", + "c_l2p"] for name in param_names: assert test_params1[name] == test_params2[name] @@ -450,21 +452,6 @@ def test_estimate_calibration_params(ctx_factory): if sys.version_info >= (3, 0): test_params_equal(cl_params, python_params) - cl_predicted_time = cl_cost_model( - traversals_dev[-1], level_to_orders[-1], cl_params - ) - - if sys.version_info >= (3, 0): - for field in ["form_multipoles", "eval_direct", "multipole_to_local", - "eval_multipoles", "form_locals", "eval_locals", - "coarsen_multipoles", "refine_locals"]: - logger.info("predicted time for {0}: {1}".format( - field, str(cl_cost_model.aggregate(cl_predicted_time[field])) - )) - logger.info("actual time for {0}: {1}".format( - field, str(timing_results[-1][field]["process_elapsed"]) - )) - class OpCountingTranslationCostModel(object): """A translation cost model which assigns at cost of 1 to each operation.""" -- GitLab From ad3389f7a87187374e9ba85563ef4160c54a894a Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Fri, 25 Jan 2019 10:26:25 -0600 Subject: [PATCH 26/50] TranslationCostModel -> FMMTranslationCostModel --- boxtree/cost.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index c63ef92..2db2176 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -60,7 +60,7 @@ else: ABC = ABCMeta('ABC', (), {}) -class TranslationCostModel(object): +class FMMTranslationCostModel(object): """Provides modeled costs for individual translations or evaluations.""" def __init__(self, ncoeffs_fmm_by_level, uses_point_and_shoot): @@ -125,7 +125,7 @@ def pde_aware_translation_cost_model(dim, nlevels): else: uses_point_and_shoot = False - return TranslationCostModel( + return FMMTranslationCostModel( ncoeffs_fmm_by_level=ncoeffs_fmm, uses_point_and_shoot=uses_point_and_shoot ) @@ -138,7 +138,7 @@ def taylor_translation_cost_model(dim, nlevels): p_fmm = np.array([var("p_fmm_lev%d" % i) for i in range(nlevels)]) ncoeffs_fmm = (p_fmm + 1) ** dim - return TranslationCostModel( + return FMMTranslationCostModel( ncoeffs_fmm_by_level=ncoeffs_fmm, uses_point_and_shoot=False ) -- GitLab From b54a2eee3140ff152db1040ad22c41f1b52a359f Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Tue, 29 Jan 2019 17:53:08 -0600 Subject: [PATCH 27/50] Move direct source counting to a separate function --- boxtree/cost.py | 117 ++++++++++++++++++++++++---------------- test/test_cost_model.py | 14 ++++- 2 files changed, 82 insertions(+), 49 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 2db2176..d81b520 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -186,10 +186,25 @@ class AbstractFMMCostModel(ABC): pass @abstractmethod - def process_direct(self, traversal, c_p2p): + def get_ndirect_sources_per_target_box(self, traversal): + """Collect the number of direct evaluation sources (list 1, list 3 close and + list 4 close) for each target box. + + :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. + :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape + (ntarget_boxes,), with each entry representing the number of direct + evaluation sources for that target box. + """ + pass + + @abstractmethod + def process_direct(self, traversal, ndirect_sources_by_itgt_box, c_p2p): """Direct evaluation cost of each target box of *traversal*. :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. + :arg ndirect_sources_by_itgt_box: a :class:`numpy.ndarray` or + :class:`pyopencl.array.Array` of shape (ntarget_boxes,), with each entry + representing the number of direct evaluation sources for that target box. :arg c_p2p: calibration constant. :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape (ntarget_boxes,), with each entry represents the cost of the box. @@ -392,8 +407,14 @@ class AbstractFMMCostModel(ABC): traversal, translation_cost["m2m_cost"] ) + ndirect_sources_per_target_box = \ + self.get_ndirect_sources_per_target_box(traversal) + uncalibrated_times["c_p2p"][icase] = self.aggregate( - self.process_direct(traversal, translation_cost["c_p2p"]) + self.process_direct( + traversal, ndirect_sources_per_target_box, + translation_cost["c_p2p"] + ) ) uncalibrated_times["c_m2l"][icase] = self.aggregate( @@ -475,8 +496,12 @@ class AbstractFMMCostModel(ABC): traversal, translation_cost["m2m_cost"] ) + ndirect_sources_per_target_box = self.get_ndirect_sources_per_target_box( + traversal + ) + result["eval_direct"] = self.process_direct( - traversal, translation_cost["c_p2p"] + traversal, ndirect_sources_per_target_box, translation_cost["c_p2p"] ) result["multipole_to_local"] = self.process_list2( @@ -637,17 +662,14 @@ class CLFMMCostModel(AbstractFMMCostModel): # {{{ direct evaluation to point targets (lists 1, 3 close, 4 close) @memoize_method - def process_direct_knl(self, particle_id_dtype, box_id_dtype): + def _get_ndirect_sources_knl(self, particle_id_dtype, box_id_dtype): return ElementwiseKernel( self.queue.context, Template(""" - double *direct_by_itgt_box, + ${particle_id_t} *ndirect_sources_by_itgt_box, ${box_id_t} *source_boxes_starts, ${box_id_t} *source_boxes_lists, - ${particle_id_t} *box_source_counts_nonchild, - ${particle_id_t} *box_target_counts_nonchild, - ${box_id_t} *target_boxes, - double c_p2p + ${particle_id_t} *box_source_counts_nonchild """).render( particle_id_t=dtype_to_ctype(particle_id_dtype), box_id_t=dtype_to_ctype(box_id_dtype) @@ -667,70 +689,67 @@ class CLFMMCostModel(AbstractFMMCostModel): nsources += box_source_counts_nonchild[cur_source_box]; } - ${particle_id_t} ntargets = box_target_counts_nonchild[ - target_boxes[i] - ]; - - direct_by_itgt_box[i] += (nsources * ntargets * c_p2p); + ndirect_sources_by_itgt_box[i] += nsources; """).render( particle_id_t=dtype_to_ctype(particle_id_dtype), box_id_t=dtype_to_ctype(box_id_dtype) ), - name="process_direct" + name="get_ndirect_sources" ) - def process_direct(self, traversal, c_p2p): + def get_ndirect_sources_per_target_box(self, traversal): tree = traversal.tree ntarget_boxes = len(traversal.target_boxes) particle_id_dtype = tree.particle_id_dtype box_id_dtype = tree.box_id_dtype - count_direct_interaction_knl = self.process_direct_knl( + get_ndirect_sources_knl = self._get_ndirect_sources_knl( particle_id_dtype, box_id_dtype ) - direct_by_itgt_box_dev = cl.array.zeros( - self.queue, (ntarget_boxes,), dtype=np.float64 + ndirect_sources_by_itgt_box = cl.array.zeros( + self.queue, ntarget_boxes, dtype=particle_id_dtype ) # List 1 - count_direct_interaction_knl( - direct_by_itgt_box_dev, + get_ndirect_sources_knl( + ndirect_sources_by_itgt_box, traversal.neighbor_source_boxes_starts, traversal.neighbor_source_boxes_lists, - traversal.tree.box_source_counts_nonchild, - traversal.tree.box_target_counts_nonchild, - traversal.target_boxes, - c_p2p + tree.box_source_counts_nonchild ) # List 3 close if traversal.from_sep_close_smaller_starts is not None: - self.queue.finish() # Avoid potential race condition - count_direct_interaction_knl( - direct_by_itgt_box_dev, + self.queue.finish() + get_ndirect_sources_knl( + ndirect_sources_by_itgt_box, traversal.from_sep_close_smaller_starts, traversal.from_sep_close_smaller_lists, - traversal.tree.box_source_counts_nonchild, - traversal.tree.box_target_counts_nonchild, - traversal.target_boxes, - c_p2p + tree.box_source_counts_nonchild ) # List 4 close if traversal.from_sep_close_bigger_starts is not None: - self.queue.finish() # Avoid potential race condition - count_direct_interaction_knl( - direct_by_itgt_box_dev, + self.queue.finish() + get_ndirect_sources_knl( + ndirect_sources_by_itgt_box, traversal.from_sep_close_bigger_starts, traversal.from_sep_close_bigger_lists, - traversal.tree.box_source_counts_nonchild, - traversal.tree.box_target_counts_nonchild, - traversal.target_boxes, - c_p2p + tree.box_source_counts_nonchild ) - return direct_by_itgt_box_dev + return ndirect_sources_by_itgt_box + + def process_direct(self, traversal, ndirect_sources_by_itgt_box, c_p2p): + from pyopencl.array import take + ntargets_by_itgt_box = take( + traversal.tree.box_target_counts_nonchild, + traversal.target_boxes, + queue=self.queue + ) + + return ndirect_sources_by_itgt_box * ntargets_by_itgt_box * c_p2p # }}} @@ -1035,12 +1054,12 @@ class PythonFMMCostModel(AbstractFMMCostModel): return np2m - def process_direct(self, traversal, c_p2p): + def get_ndirect_sources_per_target_box(self, traversal): tree = traversal.tree ntarget_boxes = len(traversal.target_boxes) # target box index -> nsources - direct_by_itgt_box = np.zeros(ntarget_boxes, dtype=np.float64) + ndirect_sources_by_itgt_box = np.zeros(ntarget_boxes, dtype=np.float64) for itgt_box in range(ntarget_boxes): nsources = 0 @@ -1065,12 +1084,16 @@ class PythonFMMCostModel(AbstractFMMCostModel): for src_ibox in traversal.from_sep_close_bigger_lists[start:end]: nsources += tree.box_source_counts_nonchild[src_ibox] - ntargets = tree.box_target_counts_nonchild[ - traversal.target_boxes[itgt_box] - ] - direct_by_itgt_box[itgt_box] += (nsources * ntargets * c_p2p) + ndirect_sources_by_itgt_box[itgt_box] = nsources + + return ndirect_sources_by_itgt_box + + def process_direct(self, traversal, ndirect_sources_by_itgt_box, c_p2p): + ntargets_by_itgt_box = traversal.tree.box_target_counts_nonchild[ + traversal.target_boxes + ] - return direct_by_itgt_box + return ntargets_by_itgt_box * ndirect_sources_by_itgt_box * c_p2p def process_list2(self, traversal, m2l_cost): tree = traversal.tree diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 74084da..37e22b8 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -156,7 +156,12 @@ def test_compare_cl_and_py_cost_model(ctx_factory, nsources, ntargets, dims, dty queue.finish() start_time = time.time() - cl_direct = cl_cost_model.process_direct(trav_dev, 5.0) + cl_ndirect_sources_per_target_box = \ + cl_cost_model.get_ndirect_sources_per_target_box(trav_dev) + + cl_direct = cl_cost_model.process_direct( + trav_dev, cl_ndirect_sources_per_target_box, 5.0 + ) queue.finish() logger.info("OpenCL time for process_direct: {0}".format( @@ -165,7 +170,12 @@ def test_compare_cl_and_py_cost_model(ctx_factory, nsources, ntargets, dims, dty start_time = time.time() - python_direct = python_cost_model.process_direct(trav, 5.0) + python_ndirect_sources_per_target_box = \ + python_cost_model.get_ndirect_sources_per_target_box(trav) + + python_direct = python_cost_model.process_direct( + trav, python_ndirect_sources_per_target_box, 5.0 + ) logger.info("Python time for process_direct: {0}".format( str(time.time() - start_time) -- GitLab From 93baacb3960114d91616d91651bd37479e2e5df2 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Wed, 6 Feb 2019 10:18:44 -0600 Subject: [PATCH 28/50] c_p2p -> p2p_cost --- boxtree/cost.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index d81b520..54e058f 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -198,14 +198,15 @@ class AbstractFMMCostModel(ABC): pass @abstractmethod - def process_direct(self, traversal, ndirect_sources_by_itgt_box, c_p2p): + def process_direct(self, traversal, ndirect_sources_by_itgt_box, p2p_cost): """Direct evaluation cost of each target box of *traversal*. :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. :arg ndirect_sources_by_itgt_box: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape (ntarget_boxes,), with each entry representing the number of direct evaluation sources for that target box. - :arg c_p2p: calibration constant. + :arg p2p_cost: a constant representing the cost of one point-to-point + evaluation. :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape (ntarget_boxes,), with each entry represents the cost of the box. """ @@ -741,7 +742,7 @@ class CLFMMCostModel(AbstractFMMCostModel): return ndirect_sources_by_itgt_box - def process_direct(self, traversal, ndirect_sources_by_itgt_box, c_p2p): + def process_direct(self, traversal, ndirect_sources_by_itgt_box, p2p_cost): from pyopencl.array import take ntargets_by_itgt_box = take( traversal.tree.box_target_counts_nonchild, @@ -749,7 +750,7 @@ class CLFMMCostModel(AbstractFMMCostModel): queue=self.queue ) - return ndirect_sources_by_itgt_box * ntargets_by_itgt_box * c_p2p + return ndirect_sources_by_itgt_box * ntargets_by_itgt_box * p2p_cost # }}} @@ -1088,12 +1089,12 @@ class PythonFMMCostModel(AbstractFMMCostModel): return ndirect_sources_by_itgt_box - def process_direct(self, traversal, ndirect_sources_by_itgt_box, c_p2p): + def process_direct(self, traversal, ndirect_sources_by_itgt_box, p2p_cost): ntargets_by_itgt_box = traversal.tree.box_target_counts_nonchild[ traversal.target_boxes ] - return ntargets_by_itgt_box * ndirect_sources_by_itgt_box * c_p2p + return ntargets_by_itgt_box * ndirect_sources_by_itgt_box * p2p_cost def process_list2(self, traversal, m2l_cost): tree = traversal.tree -- GitLab From 1dd2d8a1e2a36611cfd08ec304182fb8d47cda0e Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Wed, 6 Feb 2019 13:53:54 -0600 Subject: [PATCH 29/50] Update CI config file --- .gitlab-ci.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 2e2e537..ef77f40 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -80,11 +80,12 @@ Python 3 POCL: reports: junit: test/pytest.xml -Python 3.6 POCL Examples: +Python 3 POCL Examples: script: - - export PY_EXE=python3.6 + - test -n "$SKIP_EXAMPLES" && exit + - export PY_EXE=python3 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="pybind11 numpy mako matplotlib" + - export EXTRA_INSTALL="pybind11 numpy mako pyvisfile matplotlib" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-py-project-and-run-examples.sh - ". ./build-py-project-and-run-examples.sh" tags: -- GitLab From 66b3a78b7cb269e93e999404d7d137853fc53cac Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Thu, 14 Feb 2019 11:28:49 -0600 Subject: [PATCH 30/50] Modify code for transferring translation cost to device --- boxtree/cost.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 54e058f..35dcff9 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -299,7 +299,7 @@ class AbstractFMMCostModel(ABC): function can be used for process_* methods in this class. :arg nlevels: the number of tree levels. - :arg xlat_cost: a :class:`TranslationCostModel`. + :arg xlat_cost: a :class:`FMMTranslationCostModel`. :arg context: a :class:`dict` of parameters passed as context when evaluating symbolic expressions in *xlat_cost*. :return: a :class:`dict`, the translation cost of each step in FMM. @@ -1024,13 +1024,10 @@ class CLFMMCostModel(AbstractFMMCostModel): else: return cl.array.sum(per_box_result).get().reshape(-1)[0] - def cost_factors_for_kernels_from_model(self, nlevels, xlat_cost, context): - translation_costs = super( - CLFMMCostModel, self - ).cost_factors_for_kernels_from_model( - nlevels, xlat_cost, context - ) - + def translation_costs_to_dev(self, translation_costs): + """This helper function transfers all :class:`numpy.ndarray` fields in + *translation_costs* to device memory as :class:`pyopencl.array.Array`. + """ for name in translation_costs: if not isinstance(translation_costs[name], np.ndarray): continue @@ -1040,6 +1037,13 @@ class CLFMMCostModel(AbstractFMMCostModel): return translation_costs + def cost_factors_for_kernels_from_model(self, nlevels, xlat_cost, context): + translation_costs = AbstractFMMCostModel.cost_factors_for_kernels_from_model( + self, nlevels, xlat_cost, context + ) + + return self.translation_costs_to_dev(translation_costs) + class PythonFMMCostModel(AbstractFMMCostModel): def process_form_multipoles(self, traversal, p2m_cost): -- GitLab From 99ceaace17ad4e67b6166f1b789a5e0b1edaa56e Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 17 Feb 2019 19:02:24 -0600 Subject: [PATCH 31/50] Add box_target_counts_nonchild as an optional argument --- boxtree/cost.py | 73 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 35dcff9..218022a 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -198,7 +198,8 @@ class AbstractFMMCostModel(ABC): pass @abstractmethod - def process_direct(self, traversal, ndirect_sources_by_itgt_box, p2p_cost): + def process_direct(self, traversal, ndirect_sources_by_itgt_box, p2p_cost, + box_target_counts_nonchild=None): """Direct evaluation cost of each target box of *traversal*. :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. @@ -207,6 +208,11 @@ class AbstractFMMCostModel(ABC): representing the number of direct evaluation sources for that target box. :arg p2p_cost: a constant representing the cost of one point-to-point evaluation. + :arg box_target_counts_nonchild: a :class:`numpy.ndarray` or + :class:`pyopencl.array.Array` of shape (nboxes,), the number of targets + using direct evaluation in this box. For example, this is useful in QBX + by specifying the number of non-QBX targets. If None, all targets in + boxes are considered. :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape (ntarget_boxes,), with each entry represents the cost of the box. """ @@ -225,12 +231,17 @@ class AbstractFMMCostModel(ABC): pass @abstractmethod - def process_list3(self, traversal, m2p_cost): + def process_list3(self, traversal, m2p_cost, box_target_counts_nonchild=None): """ :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. :arg m2p_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape (nlevels,) where the ith entry represents the evaluation cost from multipole expansion at level i to a point. + :arg box_target_counts_nonchild: a :class:`numpy.ndarray` or + :class:`pyopencl.array.Array` of shape (nboxes,), the number of targets + using multiple-to-point translations in this box. For example, this is + useful in QBX by specifying the number of non-QBX targets. If None, all + targets in boxes are considered. :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape (nboxes,), with each entry representing the cost of evaluating all targets inside this box from multipole expansions of list-3 boxes. @@ -259,7 +270,10 @@ class AbstractFMMCostModel(ABC): of shape (nlevels,) where the ith entry represents the cost of evaluating the potential of a target in a box of level i using the box's local expansion. - :arg box_target_counts_nonchild: if None, use + :arg box_target_counts_nonchild: a :class:`numpy.ndarray` or + :class:`pyopencl.array.Array` of shape (nboxes,), the number of targets + which need evaluation. For example, this is useful in QBX by specifying + the number of non-QBX targets. If None, use traversal.tree.box_target_counts_nonchild. :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape (ntarget_boxes,), the cost of evaluating the potentials of all targets @@ -464,7 +478,8 @@ class AbstractFMMCostModel(ABC): return result - def __call__(self, traversal, level_to_order, params): + def __call__(self, traversal, level_to_order, params, + box_target_counts_nonchild=None): """Predict cost of a new traversal object. :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. @@ -473,6 +488,11 @@ class AbstractFMMCostModel(ABC): of different levels. :arg params: the calibration parameters returned by *estimate_calibration_params*. + :arg box_target_counts_nonchild: a :class:`numpy.ndarray` or + :class:`pyopencl.array.Array` of shape (nboxes,), the number of targets + which need evaluation. For example, this is useful in QBX by specifying + the number of non-QBX targets. If None, use all targets are considered, + namely traversal.tree.box_target_counts_nonchild. :return: a :class:`dict`, the cost of fmm stages. """ tree = traversal.tree @@ -489,6 +509,9 @@ class AbstractFMMCostModel(ABC): tree.nlevels, xlat_cost, params ) + if box_target_counts_nonchild is None: + box_target_counts_nonchild = traversal.tree.box_target_counts_nonchild + result["form_multipoles"] = self.process_form_multipoles( traversal, translation_cost["p2m_cost"] ) @@ -502,7 +525,8 @@ class AbstractFMMCostModel(ABC): ) result["eval_direct"] = self.process_direct( - traversal, ndirect_sources_per_target_box, translation_cost["c_p2p"] + traversal, ndirect_sources_per_target_box, translation_cost["c_p2p"], + box_target_counts_nonchild=box_target_counts_nonchild ) result["multipole_to_local"] = self.process_list2( @@ -510,7 +534,8 @@ class AbstractFMMCostModel(ABC): ) result["eval_multipoles"] = self.process_list3( - traversal, translation_cost["m2p_cost"] + traversal, translation_cost["m2p_cost"], + box_target_counts_nonchild=box_target_counts_nonchild ) result["form_locals"] = self.process_list4( @@ -522,7 +547,8 @@ class AbstractFMMCostModel(ABC): ) result["eval_locals"] = self.process_eval_locals( - traversal, translation_cost["l2p_cost"] + traversal, translation_cost["l2p_cost"], + box_target_counts_nonchild=box_target_counts_nonchild ) return result @@ -742,10 +768,14 @@ class CLFMMCostModel(AbstractFMMCostModel): return ndirect_sources_by_itgt_box - def process_direct(self, traversal, ndirect_sources_by_itgt_box, p2p_cost): + def process_direct(self, traversal, ndirect_sources_by_itgt_box, p2p_cost, + box_target_counts_nonchild=None): + if box_target_counts_nonchild is None: + box_target_counts_nonchild = traversal.tree.box_target_counts_nonchild + from pyopencl.array import take ntargets_by_itgt_box = take( - traversal.tree.box_target_counts_nonchild, + box_target_counts_nonchild, traversal.target_boxes, queue=self.queue ) @@ -837,10 +867,13 @@ class CLFMMCostModel(AbstractFMMCostModel): name="process_list3" ) - def process_list3(self, traversal, m2p_cost): + def process_list3(self, traversal, m2p_cost, box_target_counts_nonchild=None): tree = traversal.tree nm2p = cl.array.zeros(self.queue, tree.nboxes, dtype=np.float64) + if box_target_counts_nonchild is None: + box_target_counts_nonchild = tree.box_target_counts_nonchild + process_list3_knl = self.process_list3_knl( tree.box_id_dtype, tree.particle_id_dtype ) @@ -850,7 +883,7 @@ class CLFMMCostModel(AbstractFMMCostModel): process_list3_knl( traversal.target_boxes_sep_smaller_by_source_level[ilevel], sep_smaller_list.starts, - tree.box_target_counts_nonchild, + box_target_counts_nonchild, m2p_cost[ilevel].get().reshape(-1)[0], nm2p, queue=self.queue @@ -956,6 +989,7 @@ class CLFMMCostModel(AbstractFMMCostModel): tree = traversal.tree ntarget_boxes = len(traversal.target_boxes) neval_locals = cl.array.zeros(self.queue, ntarget_boxes, dtype=np.float64) + if box_target_counts_nonchild is None: box_target_counts_nonchild = traversal.tree.box_target_counts_nonchild @@ -1093,10 +1127,12 @@ class PythonFMMCostModel(AbstractFMMCostModel): return ndirect_sources_by_itgt_box - def process_direct(self, traversal, ndirect_sources_by_itgt_box, p2p_cost): - ntargets_by_itgt_box = traversal.tree.box_target_counts_nonchild[ - traversal.target_boxes - ] + def process_direct(self, traversal, ndirect_sources_by_itgt_box, p2p_cost, + box_target_counts_nonchild=None): + if box_target_counts_nonchild is None: + box_target_counts_nonchild = traversal.tree.box_target_counts_nonchild + + ntargets_by_itgt_box = box_target_counts_nonchild[traversal.target_boxes] return ntargets_by_itgt_box * ndirect_sources_by_itgt_box * p2p_cost @@ -1113,15 +1149,18 @@ class PythonFMMCostModel(AbstractFMMCostModel): return nm2l - def process_list3(self, traversal, m2p_cost): + def process_list3(self, traversal, m2p_cost, box_target_counts_nonchild=None): tree = traversal.tree nm2p = np.zeros(tree.nboxes, dtype=np.float64) + if box_target_counts_nonchild is None: + box_target_counts_nonchild = tree.box_target_counts_nonchild + for ilevel, sep_smaller_list in enumerate( traversal.from_sep_smaller_by_level): for itgt_box, tgt_ibox in enumerate( traversal.target_boxes_sep_smaller_by_source_level[ilevel]): - ntargets = tree.box_target_counts_nonchild[tgt_ibox] + ntargets = box_target_counts_nonchild[tgt_ibox] start, end = sep_smaller_list.starts[itgt_box:itgt_box + 2] nm2p[tgt_ibox] += ntargets * (end - start) * m2p_cost[ilevel] -- GitLab From 48177186982836b9d6aaf40f83b3e9f07d815ea6 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 17 Feb 2019 22:02:35 -0600 Subject: [PATCH 32/50] Add ndirect_sources_per_target_box as argument for eval --- boxtree/cost.py | 5 +---- test/test_cost_model.py | 9 ++++++++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 218022a..8d2940a 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -479,6 +479,7 @@ class AbstractFMMCostModel(ABC): return result def __call__(self, traversal, level_to_order, params, + ndirect_sources_per_target_box, box_target_counts_nonchild=None): """Predict cost of a new traversal object. @@ -520,10 +521,6 @@ class AbstractFMMCostModel(ABC): traversal, translation_cost["m2m_cost"] ) - ndirect_sources_per_target_box = self.get_ndirect_sources_per_target_box( - traversal - ) - result["eval_direct"] = self.process_direct( traversal, ndirect_sources_per_target_box, translation_cost["c_p2p"], box_target_counts_nonchild=box_target_counts_nonchild diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 37e22b8..f0b642e 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -547,7 +547,14 @@ def test_cost_model_gives_correct_op_counts_with_constantone_wrangler( } level_to_order = np.array([1 for _ in range(tree.nlevels)]) - modeled_time = cost_model(trav_dev, level_to_order, params) + + ndirect_sources_per_target_box = cost_model.get_ndirect_sources_per_target_box( + trav_dev + ) + + modeled_time = cost_model( + trav_dev, level_to_order, params, ndirect_sources_per_target_box + ) mismatches = [] for stage in timing_data: -- GitLab From d27b3f883417f1b2f0236c13ceae27e9d32e91a9 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 17 Feb 2019 22:17:38 -0600 Subject: [PATCH 33/50] Add some doc --- boxtree/cost.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/boxtree/cost.py b/boxtree/cost.py index 8d2940a..21000d6 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -489,6 +489,11 @@ class AbstractFMMCostModel(ABC): of different levels. :arg params: the calibration parameters returned by *estimate_calibration_params*. + :arg ndirect_sources_per_target_box: a :class:`numpy.ndarray` or + :class:`pyopencl.array.Array` of shape (ntarget_boxes,), the number of + direct evaluation sources (list 1, list 3 close, list 4 close) for each + target box. You may find :func:`get_ndirect_sources_per_target_box` + helpful. :arg box_target_counts_nonchild: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape (nboxes,), the number of targets which need evaluation. For example, this is useful in QBX by specifying -- GitLab From fab9940131250e8c9f28efb383516050b408eded Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 17 Feb 2019 23:38:16 -0600 Subject: [PATCH 34/50] Update example --- examples/cost_model.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/examples/cost_model.py b/examples/cost_model.py index c4532a5..8af59a1 100644 --- a/examples/cost_model.py +++ b/examples/cost_model.py @@ -83,8 +83,12 @@ def demo_cost_model(): wall_time=wall_time ) + ndirect_sources_per_target_box = \ + cl_cost_model.get_ndirect_sources_per_target_box(traversals_dev[-1]) + cl_predicted_time = cl_cost_model( - traversals_dev[-1], level_to_orders[-1], cl_params + traversals_dev[-1], level_to_orders[-1], cl_params, + ndirect_sources_per_target_box ) for field in ["form_multipoles", "eval_direct", "multipole_to_local", -- GitLab From 68b82792985d2af8387e3574169e3e9c1cffbd43 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 18 Feb 2019 13:30:33 -0600 Subject: [PATCH 35/50] Use modeled result in estimate_calibration_params instead recomputing --- boxtree/cost.py | 222 ++++++++++++++++------------------------ examples/cost_model.py | 30 ++++-- test/test_cost_model.py | 35 ++++++- 3 files changed, 141 insertions(+), 146 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 21000d6..fa9e18b 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -350,145 +350,18 @@ class AbstractFMMCostModel(ABC): ], dtype=np.float64) } - def estimate_calibration_params(self, traversals, level_to_orders, - timing_results, wall_time=False): - """ - :arg traversals: a :class:`list` of - :class:`boxtree.traversal.FMMTraversalInfo` objects. Note each traversal - object can reside on host or device depending on cost counting - implemenation instead of the expansion wrangler used for executing. - :arg level_to_orders: a :class:`list` of the same length as *traversals*. - Each entry is a :class:`numpy.ndarray` representing the expansion order - of different levels. - :arg timing_results: a :class:`list` of the same length as *traversals*. - Each entry is a :class:`dict` filled with timing data returned by - *boxtree.fmm.drive_fmm* - :arg wall_time: a :class:`bool`, whether to use wall time or processor time. - :return: a :class:`dict` of calibration parameters. - """ - nresults = len(traversals) - assert len(level_to_orders) == nresults - assert len(timing_results) == nresults - - _FMM_STAGE_TO_CALIBRATION_PARAMETER = { - "form_multipoles": "c_p2m", - "coarsen_multipoles": "c_m2m", - "eval_direct": "c_p2p", - "multipole_to_local": "c_m2l", - "eval_multipoles": "c_m2p", - "form_locals": "c_p2l", - "refine_locals": "c_l2l", - "eval_locals": "c_l2p" - } - - params = set(_FMM_STAGE_TO_CALIBRATION_PARAMETER.values()) - - uncalibrated_times = {} - actual_times = {} - - for param in params: - uncalibrated_times[param] = np.zeros(nresults) - actual_times[param] = np.zeros(nresults) - - for icase, traversal in enumerate(traversals): - tree = traversal.tree - - xlat_cost = self.translation_cost_model_factory( - tree.dimensions, tree.nlevels - ) - - training_ctx = dict( - c_l2l=1, - c_l2p=1, - c_m2l=1, - c_m2m=1, - c_m2p=1, - c_p2l=1, - c_p2m=1, - c_p2p=1 - ) - for ilevel in range(tree.nlevels): - training_ctx["p_fmm_lev%d" % ilevel] = level_to_orders[icase][ilevel] - - translation_cost = self.cost_factors_for_kernels_from_model( - tree.nlevels, xlat_cost, training_ctx - ) - - uncalibrated_times["c_p2m"][icase] = self.aggregate( - self.process_form_multipoles(traversal, translation_cost["p2m_cost"]) - ) - - uncalibrated_times["c_m2m"][icase] = self.process_coarsen_multipoles( - traversal, translation_cost["m2m_cost"] - ) - - ndirect_sources_per_target_box = \ - self.get_ndirect_sources_per_target_box(traversal) - - uncalibrated_times["c_p2p"][icase] = self.aggregate( - self.process_direct( - traversal, ndirect_sources_per_target_box, - translation_cost["c_p2p"] - ) - ) - - uncalibrated_times["c_m2l"][icase] = self.aggregate( - self.process_list2(traversal, translation_cost["m2l_cost"]) - ) - - uncalibrated_times["c_m2p"][icase] = self.aggregate( - self.process_list3(traversal, translation_cost["m2p_cost"]) - ) - - uncalibrated_times["c_p2l"][icase] = self.aggregate( - self.process_list4(traversal, translation_cost["p2l_cost"]) - ) - - uncalibrated_times["c_l2l"][icase] = self.process_refine_locals( - traversal, translation_cost["l2l_cost"] - ) - - uncalibrated_times["c_l2p"][icase] = self.aggregate( - self.process_eval_locals(traversal, translation_cost["l2p_cost"]) - ) - - if wall_time: - field = "wall_elapsed" - else: - field = "process_elapsed" - - for icase, timing_result in enumerate(timing_results): - for param, time in timing_result.items(): - calibration_param = ( - _FMM_STAGE_TO_CALIBRATION_PARAMETER[param]) - actual_times[calibration_param][icase] = time[field] - - result = {} - - for param in params: - uncalibrated = uncalibrated_times[param] - actual = actual_times[param] - - if np.allclose(uncalibrated, 0): - result[param] = float("NaN") - continue - - result[param] = ( - actual.dot(uncalibrated) / uncalibrated.dot(uncalibrated)) - - return result - - def __call__(self, traversal, level_to_order, params, - ndirect_sources_per_target_box, - box_target_counts_nonchild=None): + def get_fmm_modeled_cost(self, traversal, level_to_order, params, + ndirect_sources_per_target_box, + box_target_counts_nonchild=None): """Predict cost of a new traversal object. :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. :arg level_to_order: a :class:`numpy.ndarray` of shape (traversal.tree.nlevels,) representing the expansion orders of different levels. - :arg params: the calibration parameters returned by - *estimate_calibration_params*. + :arg params: the calibration parameters. For evaluation, use parameters + returned by *estimate_calibration_params*. For training, specify None + will make all cost modifier 1. :arg ndirect_sources_per_target_box: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape (ntarget_boxes,), the number of direct evaluation sources (list 1, list 3 close, list 4 close) for each @@ -504,6 +377,18 @@ class AbstractFMMCostModel(ABC): tree = traversal.tree result = {} + if params is None: + params = dict( + c_l2l=1.0, + c_l2p=1.0, + c_m2l=1.0, + c_m2m=1.0, + c_m2p=1.0, + c_p2l=1.0, + c_p2m=1.0, + c_p2p=1.0, + ) + for ilevel in range(tree.nlevels): params["p_fmm_lev%d" % ilevel] = level_to_order[ilevel] @@ -555,6 +440,77 @@ class AbstractFMMCostModel(ABC): return result + def __call__(self, *args, **kwargs): + """Shortcut for :func:`get_fmm_modeled_cost`. + """ + return self.get_fmm_modeled_cost(*args, **kwargs) + + def estimate_calibration_params(self, model_results, timing_results, + wall_time=False): + """ + :arg model_results: a :class:`list` of the modeled cost for each step of FMM, + returned by :func:`get_fmm_modeled_cost`. + :arg timing_results: a :class:`list` of the same length as *model_results*. + Each entry is a :class:`dict` filled with timing data returned by + *boxtree.fmm.drive_fmm* + :arg wall_time: a :class:`bool`, whether to use wall time or processor time. + :return: a :class:`dict` of calibration parameters. + """ + nresults = len(model_results) + assert len(timing_results) == nresults + + _FMM_STAGE_TO_CALIBRATION_PARAMETER = { + "form_multipoles": "c_p2m", + "coarsen_multipoles": "c_m2m", + "eval_direct": "c_p2p", + "multipole_to_local": "c_m2l", + "eval_multipoles": "c_m2p", + "form_locals": "c_p2l", + "refine_locals": "c_l2l", + "eval_locals": "c_l2p" + } + + params = set(_FMM_STAGE_TO_CALIBRATION_PARAMETER.values()) + + uncalibrated_times = {} + actual_times = {} + + for param in params: + uncalibrated_times[param] = np.zeros(nresults) + actual_times[param] = np.zeros(nresults) + + for icase, model_result in enumerate(model_results): + for stage_name, param_name in \ + _FMM_STAGE_TO_CALIBRATION_PARAMETER.items(): + uncalibrated_times[param_name][icase] = \ + self.aggregate(model_result[stage_name]) + + if wall_time: + field = "wall_elapsed" + else: + field = "process_elapsed" + + for icase, timing_result in enumerate(timing_results): + for stage_name, time in timing_result.items(): + param_name = ( + _FMM_STAGE_TO_CALIBRATION_PARAMETER[stage_name]) + actual_times[param_name][icase] = time[field] + + result = {} + + for param in params: + uncalibrated = uncalibrated_times[param] + actual = actual_times[param] + + if np.allclose(uncalibrated, 0): + result[param] = float("NaN") + continue + + result[param] = ( + actual.dot(uncalibrated) / uncalibrated.dot(uncalibrated)) + + return result + class CLFMMCostModel(AbstractFMMCostModel): """ diff --git a/examples/cost_model.py b/examples/cost_model.py index 8af59a1..65fda99 100644 --- a/examples/cost_model.py +++ b/examples/cost_model.py @@ -77,17 +77,29 @@ def demo_cost_model(): from boxtree.cost import CLFMMCostModel from boxtree.cost import pde_aware_translation_cost_model - cl_cost_model = CLFMMCostModel(queue, pde_aware_translation_cost_model) - cl_params = cl_cost_model.estimate_calibration_params( - traversals_dev[:-1], level_to_orders[:-1], timing_results[:-1], - wall_time=wall_time + cost_model = CLFMMCostModel(queue, pde_aware_translation_cost_model) + + model_results = [] + for icase in range(len(traversals)-1): + traversal = traversals_dev[icase] + + ndirect_sources_per_target_box = ( + cost_model.get_ndirect_sources_per_target_box(traversal)) + + model_results.append(cost_model.get_fmm_modeled_cost( + traversals_dev[icase], level_to_orders[icase], None, + ndirect_sources_per_target_box) + ) + + params = cost_model.estimate_calibration_params( + model_results, timing_results[:-1], wall_time=wall_time ) - ndirect_sources_per_target_box = \ - cl_cost_model.get_ndirect_sources_per_target_box(traversals_dev[-1]) + ndirect_sources_per_target_box = ( + cost_model.get_ndirect_sources_per_target_box(traversals_dev[-1])) - cl_predicted_time = cl_cost_model( - traversals_dev[-1], level_to_orders[-1], cl_params, + predicted_time = cost_model( + traversals_dev[-1], level_to_orders[-1], params, ndirect_sources_per_target_box ) @@ -95,7 +107,7 @@ def demo_cost_model(): "eval_multipoles", "form_locals", "eval_locals", "coarsen_multipoles", "refine_locals"]: logger.info("predicted time for {0}: {1}".format( - field, str(cl_cost_model.aggregate(cl_predicted_time[field])) + field, str(cost_model.aggregate(predicted_time[field])) )) logger.info("actual time for {0}: {1}".format( field, str(timing_results[-1][field]["process_elapsed"]) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index f0b642e..8bb7a1b 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -446,16 +446,43 @@ def test_estimate_calibration_params(ctx_factory): assert test_params1[name] == test_params2[name] python_cost_model = PythonFMMCostModel(pde_aware_translation_cost_model) + + python_model_results = [] + + for icase in range(len(traversals)-1): + traversal = traversals[icase] + level_to_order = level_to_orders[icase] + + ndirect_sources_per_target_box = ( + python_cost_model.get_ndirect_sources_per_target_box(traversal)) + + python_model_results.append(python_cost_model.get_fmm_modeled_cost( + traversal, level_to_order, None, ndirect_sources_per_target_box + )) + python_params = python_cost_model.estimate_calibration_params( - traversals[:-1], level_to_orders[:-1], timing_results[:-1], - wall_time=wall_time + python_model_results, timing_results[:-1], wall_time=wall_time ) + test_params_sanity(python_params) cl_cost_model = CLFMMCostModel(queue, pde_aware_translation_cost_model) + + cl_model_results = [] + + for icase in range(len(traversals_dev)-1): + traversal = traversals_dev[icase] + level_to_order = level_to_orders[icase] + + ndirect_sources_per_target_box = ( + cl_cost_model.get_ndirect_sources_per_target_box(traversal)) + + cl_model_results.append(cl_cost_model.get_fmm_modeled_cost( + traversal, level_to_order, None, ndirect_sources_per_target_box + )) + cl_params = cl_cost_model.estimate_calibration_params( - traversals_dev[:-1], level_to_orders[:-1], timing_results[:-1], - wall_time=wall_time + cl_model_results, timing_results[:-1], wall_time=wall_time ) test_params_sanity(cl_params) -- GitLab From 2b2dfb2810e3294d24b9ffec8df06422bb7d92a5 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 18 Feb 2019 13:52:56 -0600 Subject: [PATCH 36/50] Allow supplying additional stages to parameter estimation --- boxtree/cost.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index fa9e18b..0d41c0c 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -446,7 +446,8 @@ class AbstractFMMCostModel(ABC): return self.get_fmm_modeled_cost(*args, **kwargs) def estimate_calibration_params(self, model_results, timing_results, - wall_time=False): + wall_time=False, + additional_stage_to_param_names=()): """ :arg model_results: a :class:`list` of the modeled cost for each step of FMM, returned by :func:`get_fmm_modeled_cost`. @@ -454,6 +455,9 @@ class AbstractFMMCostModel(ABC): Each entry is a :class:`dict` filled with timing data returned by *boxtree.fmm.drive_fmm* :arg wall_time: a :class:`bool`, whether to use wall time or processor time. + :arg additional_stage_to_param_names: a :class:`dict` for mapping stage names + to parameter names. This is useful for supplying additional stages of + QBX. :return: a :class:`dict` of calibration parameters. """ nresults = len(model_results) @@ -470,7 +474,10 @@ class AbstractFMMCostModel(ABC): "eval_locals": "c_l2p" } - params = set(_FMM_STAGE_TO_CALIBRATION_PARAMETER.values()) + stage_to_param_names = _FMM_STAGE_TO_CALIBRATION_PARAMETER.copy() + stage_to_param_names.update(additional_stage_to_param_names) + + params = set(stage_to_param_names.values()) uncalibrated_times = {} actual_times = {} @@ -480,8 +487,7 @@ class AbstractFMMCostModel(ABC): actual_times[param] = np.zeros(nresults) for icase, model_result in enumerate(model_results): - for stage_name, param_name in \ - _FMM_STAGE_TO_CALIBRATION_PARAMETER.items(): + for stage_name, param_name in stage_to_param_names.items(): uncalibrated_times[param_name][icase] = \ self.aggregate(model_result[stage_name]) @@ -492,8 +498,7 @@ class AbstractFMMCostModel(ABC): for icase, timing_result in enumerate(timing_results): for stage_name, time in timing_result.items(): - param_name = ( - _FMM_STAGE_TO_CALIBRATION_PARAMETER[stage_name]) + param_name = stage_to_param_names[stage_name] actual_times[param_name][icase] = time[field] result = {} -- GitLab From be370967d14aa35be9f2965c00b3b88b2b5ff530 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Tue, 19 Feb 2019 13:56:16 -0600 Subject: [PATCH 37/50] Attach calibration params to each cost model object --- boxtree/cost.py | 73 +++++++++++++++++++++++++++++------------ examples/cost_model.py | 12 ++++--- test/test_cost_model.py | 46 +++++++++----------------- 3 files changed, 76 insertions(+), 55 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 0d41c0c..2836e8b 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -61,7 +61,13 @@ else: class FMMTranslationCostModel(object): - """Provides modeled costs for individual translations or evaluations.""" + """Provides modeled costs for individual translations or evaluations. + + .. note:: Current implementation assumes the calibration parameters are linear + in the modeled cost. For example, + `var("c_p2l") * self.ncoeffs_fmm_by_level[level]` is valid, but + `var("c_p2l") ** 2 * self.ncoeffs_fmm_by_level[level]` is not. + """ def __init__(self, ncoeffs_fmm_by_level, uses_point_and_shoot): self.ncoeffs_fmm_by_level = ncoeffs_fmm_by_level @@ -148,14 +154,27 @@ def taylor_translation_cost_model(dim, nlevels): class AbstractFMMCostModel(ABC): def __init__(self, + calibration_params, translation_cost_model_factory=pde_aware_translation_cost_model): """ + :arg calibration_params: the calibration parameters. For evaluation, use + parameters returned by :func:`estimate_calibration_params`. For training, + use :func:`get_constantone_calibration_params` to make all cost modifiers + 1. :arg translation_cost_model_factory: a function, which takes tree dimension and the number of tree levels as arguments, returns an object of :class:`TranslationCostModel`. """ + self.calibration_params = calibration_params self.translation_cost_model_factory = translation_cost_model_factory + def with_calibration_params(self, calibration_params): + """Return a copy of *self* with a new set of calibration parameters.""" + return type(self)( + calibration_params, + translation_cost_model_factory=self.translation_cost_model_factory + ) + @abstractmethod def process_form_multipoles(self, traversal, p2m_cost): """Cost for forming multipole expansions of each box. @@ -350,7 +369,7 @@ class AbstractFMMCostModel(ABC): ], dtype=np.float64) } - def get_fmm_modeled_cost(self, traversal, level_to_order, params, + def get_fmm_modeled_cost(self, traversal, level_to_order, ndirect_sources_per_target_box, box_target_counts_nonchild=None): """Predict cost of a new traversal object. @@ -359,9 +378,6 @@ class AbstractFMMCostModel(ABC): :arg level_to_order: a :class:`numpy.ndarray` of shape (traversal.tree.nlevels,) representing the expansion orders of different levels. - :arg params: the calibration parameters. For evaluation, use parameters - returned by *estimate_calibration_params*. For training, specify None - will make all cost modifier 1. :arg ndirect_sources_per_target_box: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape (ntarget_boxes,), the number of direct evaluation sources (list 1, list 3 close, list 4 close) for each @@ -375,20 +391,9 @@ class AbstractFMMCostModel(ABC): :return: a :class:`dict`, the cost of fmm stages. """ tree = traversal.tree + params = self.calibration_params.copy() result = {} - if params is None: - params = dict( - c_l2l=1.0, - c_l2p=1.0, - c_m2l=1.0, - c_m2m=1.0, - c_m2p=1.0, - c_p2l=1.0, - c_p2m=1.0, - c_p2p=1.0, - ) - for ilevel in range(tree.nlevels): params["p_fmm_lev%d" % ilevel] = level_to_order[ilevel] @@ -445,6 +450,19 @@ class AbstractFMMCostModel(ABC): """ return self.get_fmm_modeled_cost(*args, **kwargs) + @staticmethod + def get_constantone_calibration_params(): + return dict( + c_l2l=1.0, + c_l2p=1.0, + c_m2l=1.0, + c_m2m=1.0, + c_m2p=1.0, + c_p2l=1.0, + c_p2m=1.0, + c_p2p=1.0, + ) + def estimate_calibration_params(self, model_results, timing_results, wall_time=False, additional_stage_to_param_names=()): @@ -458,7 +476,9 @@ class AbstractFMMCostModel(ABC): :arg additional_stage_to_param_names: a :class:`dict` for mapping stage names to parameter names. This is useful for supplying additional stages of QBX. - :return: a :class:`dict` of calibration parameters. + :return: a :class:`dict` of calibration parameters. If there is no model + result for a particular stage, the estimated calibration parameter for + that stage is NaN. """ nresults = len(model_results) assert len(timing_results) == nresults @@ -488,8 +508,9 @@ class AbstractFMMCostModel(ABC): for icase, model_result in enumerate(model_results): for stage_name, param_name in stage_to_param_names.items(): - uncalibrated_times[param_name][icase] = \ - self.aggregate(model_result[stage_name]) + if stage_name in model_result: + uncalibrated_times[param_name][icase] = ( + self.aggregate(model_result[stage_name])) if wall_time: field = "wall_elapsed" @@ -523,9 +544,19 @@ class CLFMMCostModel(AbstractFMMCostModel): memory. """ def __init__(self, queue, + calibration_params, translation_cost_model_factory=pde_aware_translation_cost_model): self.queue = queue - super(CLFMMCostModel, self).__init__(translation_cost_model_factory) + AbstractFMMCostModel.__init__( + self, calibration_params, translation_cost_model_factory + ) + + def with_calibration_params(self, calibration_params): + """Return a copy of *self* with a new set of calibration parameters.""" + return type(self)( + self.queue, calibration_params, + translation_cost_model_factory=self.translation_cost_model_factory + ) # {{{ form multipoles diff --git a/examples/cost_model.py b/examples/cost_model.py index 65fda99..5ec213d 100644 --- a/examples/cost_model.py +++ b/examples/cost_model.py @@ -77,7 +77,10 @@ def demo_cost_model(): from boxtree.cost import CLFMMCostModel from boxtree.cost import pde_aware_translation_cost_model - cost_model = CLFMMCostModel(queue, pde_aware_translation_cost_model) + cost_model = CLFMMCostModel( + queue, CLFMMCostModel.get_constantone_calibration_params(), + pde_aware_translation_cost_model + ) model_results = [] for icase in range(len(traversals)-1): @@ -87,7 +90,7 @@ def demo_cost_model(): cost_model.get_ndirect_sources_per_target_box(traversal)) model_results.append(cost_model.get_fmm_modeled_cost( - traversals_dev[icase], level_to_orders[icase], None, + traversals_dev[icase], level_to_orders[icase], ndirect_sources_per_target_box) ) @@ -95,12 +98,13 @@ def demo_cost_model(): model_results, timing_results[:-1], wall_time=wall_time ) + cost_model = cost_model.with_calibration_params(params) + ndirect_sources_per_target_box = ( cost_model.get_ndirect_sources_per_target_box(traversals_dev[-1])) predicted_time = cost_model( - traversals_dev[-1], level_to_orders[-1], params, - ndirect_sources_per_target_box + traversals_dev[-1], level_to_orders[-1], ndirect_sources_per_target_box ) for field in ["form_multipoles", "eval_direct", "multipole_to_local", diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 8bb7a1b..06cf790 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -59,19 +59,10 @@ def test_compare_cl_and_py_cost_model(ctx_factory, nsources, ntargets, dims, dty # {{{ Construct cost models - cl_cost_model = CLFMMCostModel(queue, None) - python_cost_model = PythonFMMCostModel(None) - - constant_one_params = dict( - c_l2l=1, - c_l2p=1, - c_m2l=1, - c_m2m=1, - c_m2p=1, - c_p2l=1, - c_p2m=1, - c_p2p=1 - ) + cl_cost_model = CLFMMCostModel(queue, None, None) + python_cost_model = PythonFMMCostModel(None, None) + + constant_one_params = cl_cost_model.get_constantone_calibration_params().copy() for ilevel in range(trav.tree.nlevels): constant_one_params["p_fmm_lev%d" % ilevel] = 10 @@ -445,7 +436,10 @@ def test_estimate_calibration_params(ctx_factory): for name in param_names: assert test_params1[name] == test_params2[name] - python_cost_model = PythonFMMCostModel(pde_aware_translation_cost_model) + python_cost_model = PythonFMMCostModel( + PythonFMMCostModel.get_constantone_calibration_params(), + pde_aware_translation_cost_model + ) python_model_results = [] @@ -457,7 +451,7 @@ def test_estimate_calibration_params(ctx_factory): python_cost_model.get_ndirect_sources_per_target_box(traversal)) python_model_results.append(python_cost_model.get_fmm_modeled_cost( - traversal, level_to_order, None, ndirect_sources_per_target_box + traversal, level_to_order, ndirect_sources_per_target_box )) python_params = python_cost_model.estimate_calibration_params( @@ -466,7 +460,10 @@ def test_estimate_calibration_params(ctx_factory): test_params_sanity(python_params) - cl_cost_model = CLFMMCostModel(queue, pde_aware_translation_cost_model) + cl_cost_model = CLFMMCostModel( + queue, CLFMMCostModel.get_constantone_calibration_params(), + pde_aware_translation_cost_model + ) cl_model_results = [] @@ -478,7 +475,7 @@ def test_estimate_calibration_params(ctx_factory): cl_cost_model.get_ndirect_sources_per_target_box(traversal)) cl_model_results.append(cl_cost_model.get_fmm_modeled_cost( - traversal, level_to_order, None, ndirect_sources_per_target_box + traversal, level_to_order, ndirect_sources_per_target_box )) cl_params = cl_cost_model.estimate_calibration_params( @@ -558,21 +555,10 @@ def test_cost_model_gives_correct_op_counts_with_constantone_wrangler( drive_fmm(trav, wrangler, src_weights, timing_data=timing_data) cost_model = CLFMMCostModel( - queue, + queue, CLFMMCostModel.get_constantone_calibration_params(), translation_cost_model_factory=OpCountingTranslationCostModel ) - params = { - "c_p2m": 1.0, - "c_m2m": 1.0, - "c_p2p": 1.0, - "c_m2l": 1.0, - "c_m2p": 1.0, - "c_p2l": 1.0, - "c_l2l": 1.0, - "c_l2p": 1.0 - } - level_to_order = np.array([1 for _ in range(tree.nlevels)]) ndirect_sources_per_target_box = cost_model.get_ndirect_sources_per_target_box( @@ -580,7 +566,7 @@ def test_cost_model_gives_correct_op_counts_with_constantone_wrangler( ) modeled_time = cost_model( - trav_dev, level_to_order, params, ndirect_sources_per_target_box + trav_dev, level_to_order, ndirect_sources_per_target_box ) mismatches = [] -- GitLab From 0f8479cd83b18cd39a265b29cc4701f403369378 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Tue, 19 Feb 2019 22:37:28 -0600 Subject: [PATCH 38/50] cost_factors_for_kernels_from_model->fmm_cost_factors_for_kernels_from_model --- boxtree/cost.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 2836e8b..1bc6449 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -327,7 +327,7 @@ class AbstractFMMCostModel(ABC): """ pass - def cost_factors_for_kernels_from_model(self, nlevels, xlat_cost, context): + def fmm_cost_factors_for_kernels_from_model(self, nlevels, xlat_cost, context): """Evaluate translation cost factors from symbolic model. The result of this function can be used for process_* methods in this class. @@ -401,7 +401,7 @@ class AbstractFMMCostModel(ABC): tree.dimensions, tree.nlevels ) - translation_cost = self.cost_factors_for_kernels_from_model( + translation_cost = self.fmm_cost_factors_for_kernels_from_model( tree.nlevels, xlat_cost, params ) @@ -1065,9 +1065,11 @@ class CLFMMCostModel(AbstractFMMCostModel): return translation_costs - def cost_factors_for_kernels_from_model(self, nlevels, xlat_cost, context): - translation_costs = AbstractFMMCostModel.cost_factors_for_kernels_from_model( - self, nlevels, xlat_cost, context + def fmm_cost_factors_for_kernels_from_model(self, nlevels, xlat_cost, context): + translation_costs = ( + AbstractFMMCostModel.fmm_cost_factors_for_kernels_from_model( + self, nlevels, xlat_cost, context + ) ) return self.translation_costs_to_dev(translation_costs) -- GitLab From 48c142d89670dfcb77bf71580ea6a4395f92ddbf Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Wed, 20 Feb 2019 15:19:12 -0600 Subject: [PATCH 39/50] Add aggregate_stage_costs_per_box --- boxtree/cost.py | 53 +++++++++++++++++++++++++++++++++++++++++ test/test_cost_model.py | 16 +++++++++++++ 2 files changed, 69 insertions(+) diff --git a/boxtree/cost.py b/boxtree/cost.py index 1bc6449..cb6eadf 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -450,6 +450,21 @@ class AbstractFMMCostModel(ABC): """ return self.get_fmm_modeled_cost(*args, **kwargs) + @abstractmethod + def aggregate_stage_costs_per_box(self, traversal, cost_result): + """Given per-stage costs, this method calculates the sum of costs from all + stages for each box. This is used for load balancing in distributed + implementation. + + :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. + :arg cost_result: modeled cost of each stage by + :func:`get_fmm_modeled_cost`. + :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape + (nboxes,), where the ith entry represents the cost of all stages for box + i. + """ + pass + @staticmethod def get_constantone_calibration_params(): return dict( @@ -1074,6 +1089,25 @@ class CLFMMCostModel(AbstractFMMCostModel): return self.translation_costs_to_dev(translation_costs) + def aggregate_stage_costs_per_box(self, traversal, cost_result): + tree = traversal.tree + nboxes = tree.nboxes + source_boxes = traversal.source_boxes + target_boxes = traversal.target_boxes + target_or_target_parent_boxes = traversal.target_or_target_parent_boxes + + cost_per_box = cl.array.zeros(self.queue, (nboxes,), dtype=np.float64) + + cost_per_box[source_boxes] += cost_result["form_multipoles"] + cost_per_box[target_boxes] += cost_result["eval_direct"] + cost_per_box[target_or_target_parent_boxes] += \ + cost_result["multipole_to_local"] + cost_per_box += cost_result["eval_multipoles"] + cost_per_box[target_or_target_parent_boxes] += cost_result["form_locals"] + cost_per_box[target_boxes] += cost_result["eval_locals"] + + return cost_per_box + class PythonFMMCostModel(AbstractFMMCostModel): def process_form_multipoles(self, traversal, p2m_cost): @@ -1238,3 +1272,22 @@ class PythonFMMCostModel(AbstractFMMCostModel): return per_box_result else: return np.sum(per_box_result) + + def aggregate_stage_costs_per_box(self, traversal, cost_result): + tree = traversal.tree + nboxes = tree.nboxes + source_boxes = traversal.source_boxes + target_boxes = traversal.target_boxes + target_or_target_parent_boxes = traversal.target_or_target_parent_boxes + + cost_per_box = np.zeros(nboxes, dtype=np.float64) + + cost_per_box[source_boxes] += cost_result["form_multipoles"] + cost_per_box[target_boxes] += cost_result["eval_direct"] + cost_per_box[target_or_target_parent_boxes] += \ + cost_result["multipole_to_local"] + cost_per_box += cost_result["eval_multipoles"] + cost_per_box[target_or_target_parent_boxes] += cost_result["form_locals"] + cost_per_box[target_boxes] += cost_result["eval_locals"] + + return cost_per_box diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 06cf790..5329341 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -578,6 +578,22 @@ def test_cost_model_gives_correct_op_counts_with_constantone_wrangler( assert not mismatches, "\n".join(str(s) for s in mismatches) + # {{{ Test aggregate_stage_costs_per_box + + total_cost = 0.0 + for stage in timing_data: + total_cost += timing_data[stage]["ops_elapsed"] + + per_box_cost = cost_model.aggregate_stage_costs_per_box(trav_dev, modeled_time) + total_aggregate_cost = cost_model.aggregate(per_box_cost) + assert total_cost == ( + total_aggregate_cost + + modeled_time["coarsen_multipoles"] + + modeled_time["refine_locals"] + ) + + # }}} + # You can test individual routines by typing # $ python test_cost_model.py 'test_routine(cl.create_some_context)' -- GitLab From 2c57eb173c41c6b400d7157ac7fbbfb95ee21a77 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 4 Mar 2019 17:33:02 -0600 Subject: [PATCH 40/50] Remove __init__ from AbstractFMMCostModel --- boxtree/cost.py | 46 ++++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index cb6eadf..a0b77a7 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -153,21 +153,6 @@ def taylor_translation_cost_model(dim, nlevels): class AbstractFMMCostModel(ABC): - def __init__(self, - calibration_params, - translation_cost_model_factory=pde_aware_translation_cost_model): - """ - :arg calibration_params: the calibration parameters. For evaluation, use - parameters returned by :func:`estimate_calibration_params`. For training, - use :func:`get_constantone_calibration_params` to make all cost modifiers - 1. - :arg translation_cost_model_factory: a function, which takes tree dimension - and the number of tree levels as arguments, returns an object of - :class:`TranslationCostModel`. - """ - self.calibration_params = calibration_params - self.translation_cost_model_factory = translation_cost_model_factory - def with_calibration_params(self, calibration_params): """Return a copy of *self* with a new set of calibration parameters.""" return type(self)( @@ -561,10 +546,20 @@ class CLFMMCostModel(AbstractFMMCostModel): def __init__(self, queue, calibration_params, translation_cost_model_factory=pde_aware_translation_cost_model): + """ + :arg queue: a :class:`pyopencl.CommandQueue` object on which the execution + of this object runs. + :arg calibration_params: the calibration parameters. For evaluation, use + parameters returned by :func:`estimate_calibration_params`. For training, + use :func:`get_constantone_calibration_params` to make all cost modifiers + 1. + :arg translation_cost_model_factory: a function, which takes tree dimension + and the number of tree levels as arguments, returns an object of + :class:`TranslationCostModel`. + """ self.queue = queue - AbstractFMMCostModel.__init__( - self, calibration_params, translation_cost_model_factory - ) + self.calibration_params = calibration_params + self.translation_cost_model_factory = translation_cost_model_factory def with_calibration_params(self, calibration_params): """Return a copy of *self* with a new set of calibration parameters.""" @@ -1110,6 +1105,21 @@ class CLFMMCostModel(AbstractFMMCostModel): class PythonFMMCostModel(AbstractFMMCostModel): + def __init__(self, + calibration_params, + translation_cost_model_factory=pde_aware_translation_cost_model): + """ + :arg calibration_params: the calibration parameters. For evaluation, use + parameters returned by :func:`estimate_calibration_params`. For training, + use :func:`get_constantone_calibration_params` to make all cost modifiers + 1. + :arg translation_cost_model_factory: a function, which takes tree dimension + and the number of tree levels as arguments, returns an object of + :class:`TranslationCostModel`. + """ + self.calibration_params = calibration_params + self.translation_cost_model_factory = translation_cost_model_factory + def process_form_multipoles(self, traversal, p2m_cost): tree = traversal.tree np2m = np.zeros(len(traversal.source_boxes), dtype=np.float64) -- GitLab From 52d81b776e4bd0ff3e6adb811603c362a7a4195a Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Thu, 7 Mar 2019 17:30:39 -0600 Subject: [PATCH 41/50] Add rst cost model doc --- boxtree/cost.py | 6 +++--- doc/cost.rst | 29 +++++++++++++++++++++++++++++ doc/index.rst | 1 + 3 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 doc/cost.rst diff --git a/boxtree/cost.py b/boxtree/cost.py index a0b77a7..85f62bc 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -1,12 +1,12 @@ """ This module helps predict the running time of each step of FMM. There are two -implementations of the interface :class`AbstractFMMCostModel`, namely +implementations of the interface :class:`AbstractFMMCostModel`, namely :class:`CLFMMCostModel` using OpenCL and :class:`PythonFMMCostModel` using pure Python. An implementation of :class:`AbstractFMMCostModel` uses a -:class:`TranslationCostModel` to assign translation costs to a -:class:`FMMTraversalInfo` object. +:class:`FMMTranslationCostModel` to assign translation costs to a +:class:`boxtree.traversal.FMMTraversalInfo` object. """ from __future__ import division, absolute_import diff --git a/doc/cost.rst b/doc/cost.rst new file mode 100644 index 0000000..02acdf9 --- /dev/null +++ b/doc/cost.rst @@ -0,0 +1,29 @@ +FMM cost model +============== + +.. module:: boxtree.cost + +.. automodule:: boxtree.cost + +Translation cost model +---------------------- + +.. autoclass:: FMMTranslationCostModel + +.. autofunction:: pde_aware_translation_cost_model + +.. autofunction:: taylor_translation_cost_model + +Cost model interface +-------------------- + +.. autoclass:: AbstractFMMCostModel + :members: + :member-order: bysource + +Cost model implementation +------------------------- + +.. autoclass:: CLFMMCostModel + +.. autoclass:: PythonFMMCostModel \ No newline at end of file diff --git a/doc/index.rst b/doc/index.rst index 9d86c88..edff8a3 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -40,6 +40,7 @@ Overview traversal fmm lookup + cost misc Indices and tables -- GitLab From 8b969c661b78a2547665488ee9728e35e4dbb075 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Fri, 8 Mar 2019 16:52:10 -0600 Subject: [PATCH 42/50] Change wall_time in estimate_calibration_params from bool to str name --- boxtree/cost.py | 12 ++++-------- examples/cost_model.py | 4 ++-- test/test_cost_model.py | 8 ++++---- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 85f62bc..7b905cf 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -464,7 +464,7 @@ class AbstractFMMCostModel(ABC): ) def estimate_calibration_params(self, model_results, timing_results, - wall_time=False, + time_field_name="wall_elapsed", additional_stage_to_param_names=()): """ :arg model_results: a :class:`list` of the modeled cost for each step of FMM, @@ -472,7 +472,8 @@ class AbstractFMMCostModel(ABC): :arg timing_results: a :class:`list` of the same length as *model_results*. Each entry is a :class:`dict` filled with timing data returned by *boxtree.fmm.drive_fmm* - :arg wall_time: a :class:`bool`, whether to use wall time or processor time. + :arg time_field_name: a :class:`str`, the field name from the timing result. + Usually this can be "wall_elapsed" or "process_elapsed". :arg additional_stage_to_param_names: a :class:`dict` for mapping stage names to parameter names. This is useful for supplying additional stages of QBX. @@ -512,15 +513,10 @@ class AbstractFMMCostModel(ABC): uncalibrated_times[param_name][icase] = ( self.aggregate(model_result[stage_name])) - if wall_time: - field = "wall_elapsed" - else: - field = "process_elapsed" - for icase, timing_result in enumerate(timing_results): for stage_name, time in timing_result.items(): param_name = stage_to_param_names[stage_name] - actual_times[param_name][icase] = time[field] + actual_times[param_name][icase] = time[time_field_name] result = {} diff --git a/examples/cost_model.py b/examples/cost_model.py index 5ec213d..7370ead 100644 --- a/examples/cost_model.py +++ b/examples/cost_model.py @@ -73,7 +73,7 @@ def demo_cost_model(): timing_results.append(timing_data) assert sys.version_info >= (3, 0) - wall_time = False + time_field_name = "process_elapsed" from boxtree.cost import CLFMMCostModel from boxtree.cost import pde_aware_translation_cost_model @@ -95,7 +95,7 @@ def demo_cost_model(): ) params = cost_model.estimate_calibration_params( - model_results, timing_results[:-1], wall_time=wall_time + model_results, timing_results[:-1], time_field_name=time_field_name ) cost_model = cost_model.with_calibration_params(params) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 5329341..05e496f 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -420,9 +420,9 @@ def test_estimate_calibration_params(ctx_factory): timing_results.append(timing_data) if sys.version_info >= (3, 0): - wall_time = False + time_field_name = "process_elapsed" else: - wall_time = True + time_field_name = "wall_elapsed" def test_params_sanity(test_params): param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l", @@ -455,7 +455,7 @@ def test_estimate_calibration_params(ctx_factory): )) python_params = python_cost_model.estimate_calibration_params( - python_model_results, timing_results[:-1], wall_time=wall_time + python_model_results, timing_results[:-1], time_field_name=time_field_name ) test_params_sanity(python_params) @@ -479,7 +479,7 @@ def test_estimate_calibration_params(ctx_factory): )) cl_params = cl_cost_model.estimate_calibration_params( - cl_model_results, timing_results[:-1], wall_time=wall_time + cl_model_results, timing_results[:-1], time_field_name=time_field_name ) test_params_sanity(cl_params) -- GitLab From 27281e10fc5d4df3e12d15d37300e0462629a37c Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Fri, 8 Mar 2019 17:02:55 -0600 Subject: [PATCH 43/50] Use instance method instead of static method for aggregate --- boxtree/cost.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 7b905cf..0a93c77 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -301,9 +301,8 @@ class AbstractFMMCostModel(ABC): """ pass - @staticmethod @abstractmethod - def aggregate(per_box_result): + def aggregate(self, per_box_result): """Sum all entries of *per_box_result* into a number. :arg per_box_result: an object of :class:`numpy.ndarray` or @@ -1051,8 +1050,7 @@ class CLFMMCostModel(AbstractFMMCostModel): # }}} - @staticmethod - def aggregate(per_box_result): + def aggregate(self, per_box_result): if isinstance(per_box_result, float): return per_box_result else: @@ -1272,8 +1270,7 @@ class PythonFMMCostModel(AbstractFMMCostModel): return result - @staticmethod - def aggregate(per_box_result): + def aggregate(self, per_box_result): if isinstance(per_box_result, float): return per_box_result else: -- GitLab From 037ae54f20abbf6b0da90157db25a0356302b12b Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 11 Mar 2019 14:42:00 -0500 Subject: [PATCH 44/50] Use descriptive name SUPPORTS_PROCESS_TIME --- examples/cost_model.py | 8 +++++++- test/test_cost_model.py | 6 ++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/examples/cost_model.py b/examples/cost_model.py index 7370ead..4945c1f 100644 --- a/examples/cost_model.py +++ b/examples/cost_model.py @@ -8,8 +8,15 @@ logging.basicConfig(level=os.environ.get("LOGLEVEL", "WARNING")) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) +SUPPORTS_PROCESS_TIME = (sys.version_info >= (3, 3)) + def demo_cost_model(): + if not SUPPORTS_PROCESS_TIME: + raise NotImplementedError( + "Currently this script use process time which only works on Python>=3.3" + ) + from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler nsources_list = [1000, 2000, 3000, 4000, 5000] @@ -72,7 +79,6 @@ def demo_cost_model(): timing_results.append(timing_data) - assert sys.version_info >= (3, 0) time_field_name = "process_elapsed" from boxtree.cost import CLFMMCostModel diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 05e496f..e5b5d59 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -16,6 +16,8 @@ logging.basicConfig(level=os.environ.get("LOGLEVEL", "WARNING")) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) +SUPPORTS_PROCESS_TIME = (sys.version_info >= (3, 3)) + @pytest.mark.opencl @pytest.mark.parametrize( @@ -419,7 +421,7 @@ def test_estimate_calibration_params(ctx_factory): timing_results.append(timing_data) - if sys.version_info >= (3, 0): + if SUPPORTS_PROCESS_TIME: time_field_name = "process_elapsed" else: time_field_name = "wall_elapsed" @@ -483,7 +485,7 @@ def test_estimate_calibration_params(ctx_factory): ) test_params_sanity(cl_params) - if sys.version_info >= (3, 0): + if SUPPORTS_PROCESS_TIME: test_params_equal(cl_params, python_params) -- GitLab From c065aac3c7f2ab45b1ed339005e010c3c54b0bb1 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 11 Mar 2019 15:07:09 -0500 Subject: [PATCH 45/50] Simplify top-level interface of cost model --- boxtree/cost.py | 17 +++++++++++++++-- examples/cost_model.py | 18 +++--------------- test/test_cost_model.py | 22 +++------------------- 3 files changed, 21 insertions(+), 36 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 0a93c77..b535f25 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -429,10 +429,23 @@ class AbstractFMMCostModel(ABC): return result - def __call__(self, *args, **kwargs): + def __call__(self, traversal, level_to_order): """Shortcut for :func:`get_fmm_modeled_cost`. + + :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. + :arg level_to_order: a :class:`numpy.ndarray` of shape + (traversal.tree.nlevels,) representing the expansion orders + of different levels. + + :return: a :class:`dict`, the cost of fmm stages. """ - return self.get_fmm_modeled_cost(*args, **kwargs) + ndirect_sources_per_target_box = ( + self.get_ndirect_sources_per_target_box(traversal) + ) + + return self.get_fmm_modeled_cost( + traversal, level_to_order, ndirect_sources_per_target_box + ) @abstractmethod def aggregate_stage_costs_per_box(self, traversal, cost_result): diff --git a/examples/cost_model.py b/examples/cost_model.py index 4945c1f..b313e0d 100644 --- a/examples/cost_model.py +++ b/examples/cost_model.py @@ -14,7 +14,7 @@ SUPPORTS_PROCESS_TIME = (sys.version_info >= (3, 3)) def demo_cost_model(): if not SUPPORTS_PROCESS_TIME: raise NotImplementedError( - "Currently this script use process time which only works on Python>=3.3" + "Currently this script uses process time which only works on Python>=3.3" ) from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler @@ -91,14 +91,7 @@ def demo_cost_model(): model_results = [] for icase in range(len(traversals)-1): traversal = traversals_dev[icase] - - ndirect_sources_per_target_box = ( - cost_model.get_ndirect_sources_per_target_box(traversal)) - - model_results.append(cost_model.get_fmm_modeled_cost( - traversals_dev[icase], level_to_orders[icase], - ndirect_sources_per_target_box) - ) + model_results.append(cost_model(traversal, level_to_orders[icase])) params = cost_model.estimate_calibration_params( model_results, timing_results[:-1], time_field_name=time_field_name @@ -106,12 +99,7 @@ def demo_cost_model(): cost_model = cost_model.with_calibration_params(params) - ndirect_sources_per_target_box = ( - cost_model.get_ndirect_sources_per_target_box(traversals_dev[-1])) - - predicted_time = cost_model( - traversals_dev[-1], level_to_orders[-1], ndirect_sources_per_target_box - ) + predicted_time = cost_model(traversals_dev[-1], level_to_orders[-1]) for field in ["form_multipoles", "eval_direct", "multipole_to_local", "eval_multipoles", "form_locals", "eval_locals", diff --git a/test/test_cost_model.py b/test/test_cost_model.py index e5b5d59..32b3bf8 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -449,12 +449,7 @@ def test_estimate_calibration_params(ctx_factory): traversal = traversals[icase] level_to_order = level_to_orders[icase] - ndirect_sources_per_target_box = ( - python_cost_model.get_ndirect_sources_per_target_box(traversal)) - - python_model_results.append(python_cost_model.get_fmm_modeled_cost( - traversal, level_to_order, ndirect_sources_per_target_box - )) + python_model_results.append(python_cost_model(traversal, level_to_order)) python_params = python_cost_model.estimate_calibration_params( python_model_results, timing_results[:-1], time_field_name=time_field_name @@ -473,12 +468,7 @@ def test_estimate_calibration_params(ctx_factory): traversal = traversals_dev[icase] level_to_order = level_to_orders[icase] - ndirect_sources_per_target_box = ( - cl_cost_model.get_ndirect_sources_per_target_box(traversal)) - - cl_model_results.append(cl_cost_model.get_fmm_modeled_cost( - traversal, level_to_order, ndirect_sources_per_target_box - )) + cl_model_results.append(cl_cost_model(traversal, level_to_order)) cl_params = cl_cost_model.estimate_calibration_params( cl_model_results, timing_results[:-1], time_field_name=time_field_name @@ -563,13 +553,7 @@ def test_cost_model_gives_correct_op_counts_with_constantone_wrangler( level_to_order = np.array([1 for _ in range(tree.nlevels)]) - ndirect_sources_per_target_box = cost_model.get_ndirect_sources_per_target_box( - trav_dev - ) - - modeled_time = cost_model( - trav_dev, level_to_order, ndirect_sources_per_target_box - ) + modeled_time = cost_model(trav_dev, level_to_order) mismatches = [] for stage in timing_data: -- GitLab From 615eb8fc787f071b974388f6aee8404bc82140ff Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 11 Mar 2019 21:45:36 -0500 Subject: [PATCH 46/50] Improve documentation --- boxtree/cost.py | 4 +++- examples/cost_model.py | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index b535f25..e52ebbe 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -430,7 +430,9 @@ class AbstractFMMCostModel(ABC): return result def __call__(self, traversal, level_to_order): - """Shortcut for :func:`get_fmm_modeled_cost`. + """Top-level entry point for predicting cost of a new traversal object. + + Also see :func:`get_fmm_modeled_cost` for more customization. :arg traversal: a :class:`boxtree.traversal.FMMTraversalInfo` object. :arg level_to_order: a :class:`numpy.ndarray` of shape diff --git a/examples/cost_model.py b/examples/cost_model.py index b313e0d..2b349b7 100644 --- a/examples/cost_model.py +++ b/examples/cost_model.py @@ -4,10 +4,16 @@ import sys import logging import os + +# Set the root logger level to WARNING logging.basicConfig(level=os.environ.get("LOGLEVEL", "WARNING")) + logger = logging.getLogger(__name__) + +# Set the logger level of this module to INFO logger.setLevel(logging.INFO) +# `process_elapsed` in `ProcessTimer` is only supported for Python >= 3.3 SUPPORTS_PROCESS_TIME = (sys.version_info >= (3, 3)) -- GitLab From d3eb2fe767b481b15cefdfbc4a540a4b073395f5 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 11 Mar 2019 21:50:27 -0500 Subject: [PATCH 47/50] Improve doc --- examples/cost_model.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/cost_model.py b/examples/cost_model.py index 2b349b7..e3d0a11 100644 --- a/examples/cost_model.py +++ b/examples/cost_model.py @@ -5,12 +5,13 @@ import sys import logging import os -# Set the root logger level to WARNING +# Configure the root logger logging.basicConfig(level=os.environ.get("LOGLEVEL", "WARNING")) logger = logging.getLogger(__name__) -# Set the logger level of this module to INFO +# Set the logger level of this module to INFO so that logging outputs of this module +# are shown logger.setLevel(logging.INFO) # `process_elapsed` in `ProcessTimer` is only supported for Python >= 3.3 -- GitLab From 150a62c1a8a05be2cf1f81aa731ac7b80084d99d Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 24 Mar 2019 10:26:55 -0500 Subject: [PATCH 48/50] Improve doc for pde_aware_translation_cost_model --- boxtree/cost.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/boxtree/cost.py b/boxtree/cost.py index e52ebbe..6bcec55 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -122,6 +122,9 @@ class FMMTranslationCostModel(object): def pde_aware_translation_cost_model(dim, nlevels): """Create a cost model for FMM translation operators that make use of the knowledge that the potential satisfies a PDE. + + For example, this factory is used for complex Taylor and Fourier-Bessel + expansions in 2D, and spherical harmonics (with point-and-shoot) in 3D. """ p_fmm = np.array([var("p_fmm_lev%d" % i) for i in range(nlevels)]) ncoeffs_fmm = (p_fmm + 1) ** (dim - 1) -- GitLab From 86f34686369ebd1f13c2398e59e35ae7a9de4834 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 19 Aug 2019 15:43:16 -0700 Subject: [PATCH 49/50] Supply calibration parameters instead of storing in model --- boxtree/cost.py | 46 ++++++++++++----------------------------- examples/cost_model.py | 14 ++++++------- test/test_cost_model.py | 28 +++++++++++++++---------- 3 files changed, 36 insertions(+), 52 deletions(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index 6bcec55..d96572b 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -156,13 +156,6 @@ def taylor_translation_cost_model(dim, nlevels): class AbstractFMMCostModel(ABC): - def with_calibration_params(self, calibration_params): - """Return a copy of *self* with a new set of calibration parameters.""" - return type(self)( - calibration_params, - translation_cost_model_factory=self.translation_cost_model_factory - ) - @abstractmethod def process_form_multipoles(self, traversal, p2m_cost): """Cost for forming multipole expansions of each box. @@ -358,6 +351,7 @@ class AbstractFMMCostModel(ABC): def get_fmm_modeled_cost(self, traversal, level_to_order, ndirect_sources_per_target_box, + calibration_params, box_target_counts_nonchild=None): """Predict cost of a new traversal object. @@ -370,26 +364,27 @@ class AbstractFMMCostModel(ABC): direct evaluation sources (list 1, list 3 close, list 4 close) for each target box. You may find :func:`get_ndirect_sources_per_target_box` helpful. + :arg calibration_params: a :class:`dict` of calibration parameters. These + parameters can be got from `estimate_calibration_params`. :arg box_target_counts_nonchild: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape (nboxes,), the number of targets which need evaluation. For example, this is useful in QBX by specifying - the number of non-QBX targets. If None, use all targets are considered, + the number of non-QBX targets. If None, all targets are considered, namely traversal.tree.box_target_counts_nonchild. :return: a :class:`dict`, the cost of fmm stages. """ tree = traversal.tree - params = self.calibration_params.copy() result = {} for ilevel in range(tree.nlevels): - params["p_fmm_lev%d" % ilevel] = level_to_order[ilevel] + calibration_params["p_fmm_lev%d" % ilevel] = level_to_order[ilevel] xlat_cost = self.translation_cost_model_factory( tree.dimensions, tree.nlevels ) translation_cost = self.fmm_cost_factors_for_kernels_from_model( - tree.nlevels, xlat_cost, params + tree.nlevels, xlat_cost, calibration_params ) if box_target_counts_nonchild is None: @@ -432,7 +427,7 @@ class AbstractFMMCostModel(ABC): return result - def __call__(self, traversal, level_to_order): + def __call__(self, traversal, level_to_order, calibration_params): """Top-level entry point for predicting cost of a new traversal object. Also see :func:`get_fmm_modeled_cost` for more customization. @@ -441,6 +436,8 @@ class AbstractFMMCostModel(ABC): :arg level_to_order: a :class:`numpy.ndarray` of shape (traversal.tree.nlevels,) representing the expansion orders of different levels. + :arg calibration_params: a :class:`dict` of calibration parameters. These + parameters can be got from `estimate_calibration_params`. :return: a :class:`dict`, the cost of fmm stages. """ @@ -449,7 +446,8 @@ class AbstractFMMCostModel(ABC): ) return self.get_fmm_modeled_cost( - traversal, level_to_order, ndirect_sources_per_target_box + traversal, level_to_order, ndirect_sources_per_target_box, + calibration_params ) @abstractmethod @@ -485,7 +483,8 @@ class AbstractFMMCostModel(ABC): additional_stage_to_param_names=()): """ :arg model_results: a :class:`list` of the modeled cost for each step of FMM, - returned by :func:`get_fmm_modeled_cost`. + returned by :func:`get_fmm_modeled_cost` with constant 1 calibration + parameters. :arg timing_results: a :class:`list` of the same length as *model_results*. Each entry is a :class:`dict` filled with timing data returned by *boxtree.fmm.drive_fmm* @@ -557,30 +556,17 @@ class CLFMMCostModel(AbstractFMMCostModel): memory. """ def __init__(self, queue, - calibration_params, translation_cost_model_factory=pde_aware_translation_cost_model): """ :arg queue: a :class:`pyopencl.CommandQueue` object on which the execution of this object runs. - :arg calibration_params: the calibration parameters. For evaluation, use - parameters returned by :func:`estimate_calibration_params`. For training, - use :func:`get_constantone_calibration_params` to make all cost modifiers - 1. :arg translation_cost_model_factory: a function, which takes tree dimension and the number of tree levels as arguments, returns an object of :class:`TranslationCostModel`. """ self.queue = queue - self.calibration_params = calibration_params self.translation_cost_model_factory = translation_cost_model_factory - def with_calibration_params(self, calibration_params): - """Return a copy of *self* with a new set of calibration parameters.""" - return type(self)( - self.queue, calibration_params, - translation_cost_model_factory=self.translation_cost_model_factory - ) - # {{{ form multipoles @memoize_method @@ -1118,18 +1104,12 @@ class CLFMMCostModel(AbstractFMMCostModel): class PythonFMMCostModel(AbstractFMMCostModel): def __init__(self, - calibration_params, translation_cost_model_factory=pde_aware_translation_cost_model): """ - :arg calibration_params: the calibration parameters. For evaluation, use - parameters returned by :func:`estimate_calibration_params`. For training, - use :func:`get_constantone_calibration_params` to make all cost modifiers - 1. :arg translation_cost_model_factory: a function, which takes tree dimension and the number of tree levels as arguments, returns an object of :class:`TranslationCostModel`. """ - self.calibration_params = calibration_params self.translation_cost_model_factory = translation_cost_model_factory def process_form_multipoles(self, traversal, p2m_cost): diff --git a/examples/cost_model.py b/examples/cost_model.py index e3d0a11..8ebac05 100644 --- a/examples/cost_model.py +++ b/examples/cost_model.py @@ -90,23 +90,21 @@ def demo_cost_model(): from boxtree.cost import CLFMMCostModel from boxtree.cost import pde_aware_translation_cost_model - cost_model = CLFMMCostModel( - queue, CLFMMCostModel.get_constantone_calibration_params(), - pde_aware_translation_cost_model - ) + cost_model = CLFMMCostModel(queue, pde_aware_translation_cost_model) model_results = [] for icase in range(len(traversals)-1): traversal = traversals_dev[icase] - model_results.append(cost_model(traversal, level_to_orders[icase])) + model_results.append( + cost_model(traversal, level_to_orders[icase], + CLFMMCostModel.get_constantone_calibration_params()) + ) params = cost_model.estimate_calibration_params( model_results, timing_results[:-1], time_field_name=time_field_name ) - cost_model = cost_model.with_calibration_params(params) - - predicted_time = cost_model(traversals_dev[-1], level_to_orders[-1]) + predicted_time = cost_model(traversals_dev[-1], level_to_orders[-1], params) for field in ["form_multipoles", "eval_direct", "multipole_to_local", "eval_multipoles", "form_locals", "eval_locals", diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 32b3bf8..e575d10 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -61,8 +61,8 @@ def test_compare_cl_and_py_cost_model(ctx_factory, nsources, ntargets, dims, dty # {{{ Construct cost models - cl_cost_model = CLFMMCostModel(queue, None, None) - python_cost_model = PythonFMMCostModel(None, None) + cl_cost_model = CLFMMCostModel(queue, None) + python_cost_model = PythonFMMCostModel(None) constant_one_params = cl_cost_model.get_constantone_calibration_params().copy() for ilevel in range(trav.tree.nlevels): @@ -438,10 +438,7 @@ def test_estimate_calibration_params(ctx_factory): for name in param_names: assert test_params1[name] == test_params2[name] - python_cost_model = PythonFMMCostModel( - PythonFMMCostModel.get_constantone_calibration_params(), - pde_aware_translation_cost_model - ) + python_cost_model = PythonFMMCostModel(pde_aware_translation_cost_model) python_model_results = [] @@ -449,7 +446,10 @@ def test_estimate_calibration_params(ctx_factory): traversal = traversals[icase] level_to_order = level_to_orders[icase] - python_model_results.append(python_cost_model(traversal, level_to_order)) + python_model_results.append(python_cost_model( + traversal, level_to_order, + PythonFMMCostModel.get_constantone_calibration_params() + )) python_params = python_cost_model.estimate_calibration_params( python_model_results, timing_results[:-1], time_field_name=time_field_name @@ -458,7 +458,7 @@ def test_estimate_calibration_params(ctx_factory): test_params_sanity(python_params) cl_cost_model = CLFMMCostModel( - queue, CLFMMCostModel.get_constantone_calibration_params(), + queue, pde_aware_translation_cost_model ) @@ -468,7 +468,10 @@ def test_estimate_calibration_params(ctx_factory): traversal = traversals_dev[icase] level_to_order = level_to_orders[icase] - cl_model_results.append(cl_cost_model(traversal, level_to_order)) + cl_model_results.append(cl_cost_model( + traversal, level_to_order, + CLFMMCostModel.get_constantone_calibration_params() + )) cl_params = cl_cost_model.estimate_calibration_params( cl_model_results, timing_results[:-1], time_field_name=time_field_name @@ -547,13 +550,16 @@ def test_cost_model_gives_correct_op_counts_with_constantone_wrangler( drive_fmm(trav, wrangler, src_weights, timing_data=timing_data) cost_model = CLFMMCostModel( - queue, CLFMMCostModel.get_constantone_calibration_params(), + queue, translation_cost_model_factory=OpCountingTranslationCostModel ) level_to_order = np.array([1 for _ in range(tree.nlevels)]) - modeled_time = cost_model(trav_dev, level_to_order) + modeled_time = cost_model( + trav_dev, level_to_order, + CLFMMCostModel.get_constantone_calibration_params() + ) mismatches = [] for stage in timing_data: -- GitLab From bdd1685f53b7fb98aca9ab218a6c4fea768da5ae Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 9 Sep 2019 17:04:08 -0500 Subject: [PATCH 50/50] Use 0.0 instead of NaN for stage not present --- boxtree/cost.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/cost.py b/boxtree/cost.py index d96572b..033739b 100644 --- a/boxtree/cost.py +++ b/boxtree/cost.py @@ -541,7 +541,7 @@ class AbstractFMMCostModel(ABC): actual = actual_times[param] if np.allclose(uncalibrated, 0): - result[param] = float("NaN") + result[param] = 0.0 continue result[param] = ( -- GitLab