From 8bc5d340b74bc36eaaef88f13720f959654d0db6 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Fri, 25 Jan 2019 10:34:25 -0600 Subject: [PATCH 01/55] Temporarily disable CI --- .gitlab-ci.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3792baaf..c8b3f17b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -18,6 +18,7 @@ Python 2.7 POCL: - large-node except: - tags + - cl-cost-model Python 3.6 POCL: script: @@ -33,6 +34,7 @@ Python 3.6 POCL: - large-node except: - tags + - cl-cost-model Python 3.6 POCL Examples: script: @@ -48,6 +50,7 @@ Python 3.6 POCL Examples: - large-node except: - tags + - cl-cost-model Python 3.6 Conda: script: @@ -62,6 +65,7 @@ Python 3.6 Conda: - large-node except: - tags + - cl-cost-model Python 3.6 Conda Apple: script: @@ -82,6 +86,7 @@ Python 3.6 Conda Apple: - apple except: - tags + - cl-cost-model retry: 2 Documentation: -- GitLab From 1ea422f3af975e39771f98009dc37dfc38484bca Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Fri, 25 Jan 2019 10:36:53 -0600 Subject: [PATCH 02/55] Add skeleton of cost model --- pytential/qbx/cost.py | 844 ++---------------------------------------- 1 file changed, 35 insertions(+), 809 deletions(-) diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index b638d3c1..1f5303d8 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -3,6 +3,7 @@ from __future__ import division, absolute_import __copyright__ = """ Copyright (C) 2013 Andreas Kloeckner Copyright (C) 2018 Matt Wala +Copyright (C) 2019 Hao Gao """ __license__ = """ @@ -25,47 +26,29 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -from collections import OrderedDict - -import logging - import numpy as np -import pyopencl as cl from six.moves import range -import sympy as sp - -from pytools import log_process from pymbolic import var -logger = logging.getLogger(__name__) - - -__doc__ = """ -.. autoclass:: CostModel -.. autoclass:: ParametrizedCosts +from boxtree.cost import ( + FMMTranslationCostModel, AbstractFMMCostModel +) +from abc import abstractmethod -.. autoclass:: TranslationCostModel - -.. autofunction:: pde_aware_translation_cost_model -.. autofunction:: taylor_translation_cost_model - -.. autofunction:: estimate_calibration_params -""" +import logging +logger = logging.getLogger(__name__) # {{{ translation cost model -class TranslationCostModel(object): +class QBXTranslationCostModel(FMMTranslationCostModel): """Provides modeled costs for individual translations or evaluations.""" def __init__(self, ncoeffs_qbx, ncoeffs_fmm_by_level, uses_point_and_shoot): self.ncoeffs_qbx = ncoeffs_qbx - self.ncoeffs_fmm_by_level = ncoeffs_fmm_by_level - self.uses_point_and_shoot = uses_point_and_shoot - - @staticmethod - def direct(): - return var("c_p2p") + FMMTranslationCostModel.__init__( + self, ncoeffs_fmm_by_level, uses_point_and_shoot + ) def p2qbxl(self): return var("c_p2qbxl") * self.ncoeffs_qbx @@ -78,33 +61,6 @@ class TranslationCostModel(object): def qbxl2p(self): return var("c_qbxl2p") * self.ncoeffs_qbx - def p2l(self, level): - return var("c_p2l") * self.ncoeffs_fmm_by_level[level] - - def l2p(self, level): - return var("c_l2p") * self.ncoeffs_fmm_by_level[level] - - def p2m(self, level): - return var("c_p2m") * self.ncoeffs_fmm_by_level[level] - - def m2p(self, level): - return var("c_m2p") * self.ncoeffs_fmm_by_level[level] - - def m2m(self, src_level, tgt_level): - return var("c_m2m") * self.e2e_cost( - self.ncoeffs_fmm_by_level[src_level], - self.ncoeffs_fmm_by_level[tgt_level]) - - def l2l(self, src_level, tgt_level): - return var("c_l2l") * self.e2e_cost( - self.ncoeffs_fmm_by_level[src_level], - self.ncoeffs_fmm_by_level[tgt_level]) - - def m2l(self, src_level, tgt_level): - return var("c_m2l") * self.e2e_cost( - self.ncoeffs_fmm_by_level[src_level], - self.ncoeffs_fmm_by_level[tgt_level]) - def m2qbxl(self, level): return var("c_m2qbxl") * self.e2e_cost( self.ncoeffs_fmm_by_level[level], @@ -115,18 +71,6 @@ class TranslationCostModel(object): self.ncoeffs_fmm_by_level[level], self.ncoeffs_qbx) - def e2e_cost(self, nsource_coeffs, ntarget_coeffs): - if self.uses_point_and_shoot: - return ( - # Rotate the coordinate system to be z axis aligned. - nsource_coeffs ** (3 / 2) - # Translate the expansion along the z axis. - + nsource_coeffs ** (1 / 2) * ntarget_coeffs - # Rotate the coordinate system back. - + ntarget_coeffs ** (3 / 2)) - - return nsource_coeffs * ntarget_coeffs - # }}} @@ -147,7 +91,7 @@ def pde_aware_translation_cost_model(dim, nlevels): if dim == 3: uses_point_and_shoot = True - return TranslationCostModel( + return QBXTranslationCostModel( ncoeffs_qbx=ncoeffs_qbx, ncoeffs_fmm_by_level=ncoeffs_fmm, uses_point_and_shoot=uses_point_and_shoot) @@ -163,7 +107,7 @@ def taylor_translation_cost_model(dim, nlevels): ncoeffs_fmm = (p_fmm + 1) ** dim ncoeffs_qbx = (p_qbx + 1) ** dim - return TranslationCostModel( + return QBXTranslationCostModel( ncoeffs_qbx=ncoeffs_qbx, ncoeffs_fmm_by_level=ncoeffs_fmm, uses_point_and_shoot=False) @@ -171,755 +115,37 @@ def taylor_translation_cost_model(dim, nlevels): # }}} -# {{{ parameterized costs returned by cost model - -class ParametrizedCosts(object): - """A container for data returned by the cost model. - - This holds both symbolic costs as well as parameter values. To obtain a - prediction of the running time, use :meth:`get_predicted_times`. - - .. attribute:: raw_costs - - A dictionary mapping algorithmic stage names to symbolic costs. - - .. attribute:: params - - A dictionary mapping names of symbolic parameters to values. Parameters - appear in *raw_costs* and may include values such as QBX or FMM order - as well as calibration constants. - - .. automethod:: copy - .. automethod:: with_params - .. automethod:: get_predicted_times - """ - - def __init__(self, raw_costs, params): - self.raw_costs = OrderedDict(raw_costs) - self.params = params - - def with_params(self, params): - """Return a copy of *self* with parameters updated to include *params*.""" - new_params = self.params.copy() - new_params.update(params) - return type(self)( - raw_costs=self.raw_costs.copy(), - params=new_params) - - def copy(self): - return self.with_params({}) - - def __str__(self): - return "".join([ - type(self).__name__, - "(raw_costs=", - str(self.raw_costs), - ", params=", - str(self.params), - ")"]) - - def __repr__(self): - return "".join([ - type(self).__name__, - "(raw_costs=", - repr(self.raw_costs), - ", params=", - repr(self.params), - ")"]) - - def get_predicted_times(self, merge_close_lists=False): - """Return a dictionary mapping stage names to predicted time in seconds. - - :arg merge_close_lists: If *True*, the returned estimate combines - the cost of "close" lists (Lists 1, 3 close, and 4 close). If - *False*, the time of each "close" list is reported separately. - """ - from pymbolic import evaluate - from functools import partial - - get_time = partial(evaluate, context=self.params) - - result = OrderedDict() - - for name, val in self.raw_costs.items(): - if merge_close_lists: - for suffix in ("_list1", "_list3", "_list4"): - if name.endswith(suffix): - name = name[:-len(suffix)] - break - - result[name] = get_time(val) + result.get(name, 0) - - return result - -# }}} - - # {{{ cost model -class CostModel(object): - """ - .. automethod:: with_calibration_params - .. automethod:: __call__ - - The cost model relies on a translation cost model. See - :class:`TranslationCostModel` for the translation cost model interface. - """ - - def __init__(self, - translation_cost_model_factory=pde_aware_translation_cost_model, - calibration_params=None): - """ - :arg translation_cost_model_factory: A callable which, given arguments - (*dim*, *nlevels*), returns a translation cost model. - """ - self.translation_cost_model_factory = translation_cost_model_factory - if calibration_params is None: - calibration_params = dict() - self.calibration_params = calibration_params - - def with_calibration_params(self, calibration_params): - """Return a copy of *self* with a new set of calibration parameters.""" - return type(self)( - translation_cost_model_factory=self.translation_cost_model_factory, - calibration_params=calibration_params) - - # {{{ form multipoles - - def process_form_multipoles(self, xlat_cost, traversal, tree): - result = 0 - - for level in range(tree.nlevels): - src_count = 0 - start, stop = traversal.level_start_source_box_nrs[level:level + 2] - for src_ibox in traversal.source_boxes[start:stop]: - nsrcs = tree.box_source_counts_nonchild[src_ibox] - src_count += nsrcs - result += src_count * xlat_cost.p2m(level) - - return dict(form_multipoles=result) - - # }}} - - # {{{ propagate multipoles upward - - def process_coarsen_multipoles(self, xlat_cost, traversal, tree): - result = 0 - - # nlevels-1 is the last valid level index - # nlevels-2 is the last valid level that could have children - # - # 3 is the last relevant source_level. - # 2 is the last relevant target_level. - # (because no level 1 box will be well-separated from another) - for source_level in range(tree.nlevels-1, 2, -1): - target_level = source_level - 1 - cost = xlat_cost.m2m(source_level, target_level) - - nmultipoles = 0 - start, stop = traversal.level_start_source_parent_box_nrs[ - target_level:target_level+2] - for ibox in traversal.source_parent_boxes[start:stop]: - for child in tree.box_child_ids[:, ibox]: - if child: - nmultipoles += 1 - - result += cost * nmultipoles - - return dict(coarsen_multipoles=result) - - # }}} - - # {{{ collect direct interaction data - - @staticmethod - def _collect_direction_interaction_data(traversal, tree): - ntarget_boxes = len(traversal.target_boxes) - - # target box index -> nsources - nlist1_srcs_by_itgt_box = np.zeros(ntarget_boxes, dtype=np.intp) - nlist3close_srcs_by_itgt_box = np.zeros(ntarget_boxes, dtype=np.intp) - nlist4close_srcs_by_itgt_box = np.zeros(ntarget_boxes, dtype=np.intp) - - for itgt_box in range(ntarget_boxes): - nlist1_srcs = 0 - start, end = traversal.neighbor_source_boxes_starts[itgt_box:itgt_box+2] - for src_ibox in traversal.neighbor_source_boxes_lists[start:end]: - nlist1_srcs += tree.box_source_counts_nonchild[src_ibox] - - nlist1_srcs_by_itgt_box[itgt_box] = nlist1_srcs - - nlist3close_srcs = 0 - # Could be None, if not using targets with extent. - if traversal.from_sep_close_smaller_starts is not None: - start, end = ( - traversal.from_sep_close_smaller_starts[itgt_box:itgt_box+2]) - for src_ibox in traversal.from_sep_close_smaller_lists[start:end]: - nlist3close_srcs += tree.box_source_counts_nonchild[src_ibox] - - nlist3close_srcs_by_itgt_box[itgt_box] = nlist3close_srcs - - nlist4close_srcs = 0 - # Could be None, if not using targets with extent. - if traversal.from_sep_close_bigger_starts is not None: - start, end = ( - traversal.from_sep_close_bigger_starts[itgt_box:itgt_box+2]) - for src_ibox in traversal.from_sep_close_bigger_lists[start:end]: - nlist4close_srcs += tree.box_source_counts_nonchild[src_ibox] - - nlist4close_srcs_by_itgt_box[itgt_box] = nlist4close_srcs - - result = {} - result["nlist1_srcs_by_itgt_box"] = nlist1_srcs_by_itgt_box - result["nlist3close_srcs_by_itgt_box"] = nlist3close_srcs_by_itgt_box - result["nlist4close_srcs_by_itgt_box"] = nlist4close_srcs_by_itgt_box - - return result - - # }}} - - # {{{ direct evaluation to point targets (lists 1, 3 close, 4 close) - - def process_direct(self, xlat_cost, traversal, direct_interaction_data, - box_target_counts_nonchild): - nlist1_srcs_by_itgt_box = ( - direct_interaction_data["nlist1_srcs_by_itgt_box"]) - nlist3close_srcs_by_itgt_box = ( - direct_interaction_data["nlist3close_srcs_by_itgt_box"]) - nlist4close_srcs_by_itgt_box = ( - direct_interaction_data["nlist4close_srcs_by_itgt_box"]) - - # list -> number of source-target interactions - npart_direct_list1 = 0 - npart_direct_list3 = 0 - npart_direct_list4 = 0 - - for itgt_box, tgt_ibox in enumerate(traversal.target_boxes): - ntargets = box_target_counts_nonchild[tgt_ibox] - - npart_direct_list1 += ntargets * nlist1_srcs_by_itgt_box[itgt_box] - npart_direct_list3 += ntargets * nlist3close_srcs_by_itgt_box[itgt_box] - npart_direct_list4 += ntargets * nlist4close_srcs_by_itgt_box[itgt_box] - - result = {} - result["eval_direct_list1"] = npart_direct_list1 * xlat_cost.direct() - result["eval_direct_list3"] = npart_direct_list3 * xlat_cost.direct() - result["eval_direct_list4"] = npart_direct_list4 * xlat_cost.direct() - - return result - - # }}} - - # {{{ translate separated siblings' ("list 2") mpoles to local - - def process_list2(self, xlat_cost, traversal, tree): - nm2l_by_level = np.zeros(tree.nlevels, dtype=np.intp) - - for itgt_box, tgt_ibox in enumerate(traversal.target_or_target_parent_boxes): - start, end = traversal.from_sep_siblings_starts[itgt_box:itgt_box+2] - - level = tree.box_levels[tgt_ibox] - nm2l_by_level[level] += end-start - - result = sum( - cost * xlat_cost.m2l(ilevel, ilevel) - for ilevel, cost in enumerate(nm2l_by_level)) - - return dict(multipole_to_local=result) - - # }}} - - # {{{ evaluate sep. smaller mpoles ("list 3") at particles - - def process_list3(self, xlat_cost, traversal, tree, box_target_counts_nonchild): - nmp_eval_by_source_level = np.zeros(tree.nlevels, dtype=np.intp) - - assert tree.nlevels == len(traversal.from_sep_smaller_by_level) - - for ilevel, sep_smaller_list in enumerate( - traversal.from_sep_smaller_by_level): - for itgt_box, tgt_ibox in enumerate( - traversal.target_boxes_sep_smaller_by_source_level[ilevel]): - ntargets = box_target_counts_nonchild[tgt_ibox] - start, end = sep_smaller_list.starts[itgt_box:itgt_box+2] - nmp_eval_by_source_level[ilevel] += ntargets * (end-start) - - result = sum( - cost * xlat_cost.m2p(ilevel) - for ilevel, cost in enumerate(nmp_eval_by_source_level)) - - return dict(eval_multipoles=result) - - # }}} - - # {{{ form locals for separated bigger source boxes ("list 4") - - def process_list4(self, xlat_cost, traversal, tree): - nform_local_by_source_level = np.zeros(tree.nlevels, dtype=np.intp) - - for itgt_box in range(len(traversal.target_or_target_parent_boxes)): - start, end = traversal.from_sep_bigger_starts[itgt_box:itgt_box+2] - for src_ibox in traversal.from_sep_bigger_lists[start:end]: - nsources = tree.box_source_counts_nonchild[src_ibox] - level = tree.box_levels[src_ibox] - nform_local_by_source_level[level] += nsources - - result = sum( - cost * xlat_cost.p2l(ilevel) - for ilevel, cost in enumerate(nform_local_by_source_level)) - - return dict(form_locals=result) - - # }}} - - # {{{ propogate locals downward - - def process_refine_locals(self, xlat_cost, traversal, tree): - result = 0 - - for target_lev in range(1, tree.nlevels): - start, stop = traversal.level_start_target_or_target_parent_box_nrs[ - target_lev:target_lev+2] - source_lev = target_lev - 1 - result += (stop-start) * xlat_cost.l2l(source_lev, target_lev) - - return dict(refine_locals=result) - - # }}} - - # {{{ evaluate local expansions at non-qbx targets - - def process_eval_locals(self, xlat_cost, traversal, tree, nqbtl): - ntargets_by_level = np.zeros(tree.nlevels, dtype=np.intp) - - for target_lev in range(tree.nlevels): - start, stop = traversal.level_start_target_box_nrs[ - target_lev:target_lev+2] - for tgt_ibox in traversal.target_boxes[start:stop]: - ntargets_by_level[target_lev] += ( - nqbtl.box_target_counts_nonchild[tgt_ibox]) - - result = sum( - cost * xlat_cost.l2p(ilevel) - for ilevel, cost in enumerate(ntargets_by_level)) - - return dict(eval_locals=result) - - # }}} - - # {{{ collect data about direct interactions with qbx centers - - @staticmethod - def _collect_qbxl_direct_interaction_data(direct_interaction_data, - global_qbx_centers, qbx_center_to_target_box, center_to_targets_starts): - nlist1_srcs_by_itgt_box = ( - direct_interaction_data["nlist1_srcs_by_itgt_box"]) - nlist3close_srcs_by_itgt_box = ( - direct_interaction_data["nlist3close_srcs_by_itgt_box"]) - nlist4close_srcs_by_itgt_box = ( - direct_interaction_data["nlist4close_srcs_by_itgt_box"]) - - # center -> nsources - np2qbxl_list1_by_center = np.zeros(len(global_qbx_centers), dtype=np.intp) - np2qbxl_list3_by_center = np.zeros(len(global_qbx_centers), dtype=np.intp) - np2qbxl_list4_by_center = np.zeros(len(global_qbx_centers), dtype=np.intp) - - # center -> number of associated targets - nqbxl2p_by_center = np.zeros(len(global_qbx_centers), dtype=np.intp) - - for itgt_center, tgt_icenter in enumerate(global_qbx_centers): - start, end = center_to_targets_starts[tgt_icenter:tgt_icenter+2] - nqbxl2p_by_center[itgt_center] = end - start - - itgt_box = qbx_center_to_target_box[tgt_icenter] - np2qbxl_list1_by_center[itgt_center] = ( - nlist1_srcs_by_itgt_box[itgt_box]) - np2qbxl_list3_by_center[itgt_center] = ( - nlist3close_srcs_by_itgt_box[itgt_box]) - np2qbxl_list4_by_center[itgt_center] = ( - nlist4close_srcs_by_itgt_box[itgt_box]) - - result = {} - result["np2qbxl_list1_by_center"] = np2qbxl_list1_by_center - result["np2qbxl_list3_by_center"] = np2qbxl_list3_by_center - result["np2qbxl_list4_by_center"] = np2qbxl_list4_by_center - result["nqbxl2p_by_center"] = nqbxl2p_by_center - - return result - - # }}} - - # {{{ eval target specific qbx expansions +class AbstractQBXCostModel(AbstractFMMCostModel): + def __init__( + self, + translation_cost_model_factory=pde_aware_translation_cost_model): + AbstractFMMCostModel.__init__( + self, translation_cost_model_factory + ) - def process_eval_target_specific_qbxl(self, xlat_cost, direct_interaction_data, - global_qbx_centers, qbx_center_to_target_box, center_to_targets_starts): - - counts = self._collect_qbxl_direct_interaction_data( - direct_interaction_data, global_qbx_centers, - qbx_center_to_target_box, center_to_targets_starts) - - result = {} - result["eval_target_specific_qbx_locals_list1"] = ( - sum(counts["np2qbxl_list1_by_center"] * counts["nqbxl2p_by_center"]) - * xlat_cost.p2p_tsqbx()) - result["eval_target_specific_qbx_locals_list3"] = ( - sum(counts["np2qbxl_list3_by_center"] * counts["nqbxl2p_by_center"]) - * xlat_cost.p2p_tsqbx()) - result["eval_target_specific_qbx_locals_list4"] = ( - sum(counts["np2qbxl_list4_by_center"] * counts["nqbxl2p_by_center"]) - * xlat_cost.p2p_tsqbx()) - - return result - - # }}} - - # {{{ form global qbx locals - - def process_form_qbxl(self, xlat_cost, direct_interaction_data, - global_qbx_centers, qbx_center_to_target_box, center_to_targets_starts): - - counts = self._collect_qbxl_direct_interaction_data( - direct_interaction_data, global_qbx_centers, - qbx_center_to_target_box, center_to_targets_starts) - - result = {} - result["form_global_qbx_locals_list1"] = ( - sum(counts["np2qbxl_list1_by_center"]) * xlat_cost.p2qbxl()) - result["form_global_qbx_locals_list3"] = ( - sum(counts["np2qbxl_list3_by_center"]) * xlat_cost.p2qbxl()) - result["form_global_qbx_locals_list4"] = ( - sum(counts["np2qbxl_list4_by_center"]) * xlat_cost.p2qbxl()) - - return result - - # }}} - - # {{{ translate from list 3 multipoles to qbx local expansions - - def process_m2qbxl(self, xlat_cost, traversal, tree, global_qbx_centers, - qbx_center_to_target_box_source_level): - nm2qbxl_by_source_level = np.zeros(tree.nlevels, dtype=np.intp) - - assert tree.nlevels == len(traversal.from_sep_smaller_by_level) - - for isrc_level, ssn in enumerate(traversal.from_sep_smaller_by_level): - for tgt_icenter in global_qbx_centers: - icontaining_tgt_box = qbx_center_to_target_box_source_level[ - isrc_level][tgt_icenter] - - if icontaining_tgt_box == -1: - continue - - start, stop = ( - ssn.starts[icontaining_tgt_box], - ssn.starts[icontaining_tgt_box+1]) - - nm2qbxl_by_source_level[isrc_level] += stop-start - - result = sum( - cost * xlat_cost.m2qbxl(ilevel) - for ilevel, cost in enumerate(nm2qbxl_by_source_level)) - - return dict(translate_box_multipoles_to_qbx_local=result) - - # }}} - - # {{{ translate from box locals to qbx local expansions - - def process_l2qbxl(self, xlat_cost, traversal, tree, global_qbx_centers, - qbx_center_to_target_box): - nl2qbxl_by_level = np.zeros(tree.nlevels, dtype=np.intp) - - for tgt_icenter in global_qbx_centers: - itgt_box = qbx_center_to_target_box[tgt_icenter] - tgt_ibox = traversal.target_boxes[itgt_box] - level = tree.box_levels[tgt_ibox] - nl2qbxl_by_level[level] += 1 - - result = sum( - cost * xlat_cost.l2qbxl(ilevel) - for ilevel, cost in enumerate(nl2qbxl_by_level)) - - return dict(translate_box_local_to_qbx_local=result) - - # }}} - - # {{{ evaluate qbx local expansions - - def process_eval_qbxl(self, xlat_cost, global_qbx_centers, - center_to_targets_starts): - result = 0 - - for src_icenter in global_qbx_centers: - start, end = center_to_targets_starts[src_icenter:src_icenter+2] - result += (end - start) - - result *= xlat_cost.qbxl2p() - - return dict(eval_qbx_expansions=result) - - # }}} - - @log_process(logger, "model cost") - def __call__(self, geo_data, kernel, kernel_arguments): - """Analyze the given geometry and return cost data. - - :returns: An instance of :class:`ParametrizedCosts`. - """ - # FIXME: This should suport target filtering. - - result = OrderedDict() - - lpot_source = geo_data.lpot_source - - use_tsqbx = lpot_source._use_target_specific_qbx - - with cl.CommandQueue(geo_data.cl_context) as queue: - tree = geo_data.tree().get(queue) - traversal = geo_data.traversal(merge_close_lists=False).get(queue) - nqbtl = geo_data.non_qbx_box_target_lists().get(queue) - - box_target_counts_nonchild = nqbtl.box_target_counts_nonchild - - params = dict( - nlevels=tree.nlevels, - nboxes=tree.nboxes, - nsources=tree.nsources, - ntargets=tree.ntargets, - ncenters=geo_data.ncenters, - p_qbx=lpot_source.qbx_order, - ) - - for ilevel in range(tree.nlevels): - params["p_fmm_lev%d" % ilevel] = ( - lpot_source.fmm_level_to_order( - kernel.get_base_kernel(), kernel_arguments, tree, ilevel)) - - params.update(self.calibration_params) - - xlat_cost = ( - self.translation_cost_model_factory(tree.dimensions, tree.nlevels)) - - # {{{ construct local multipoles - - result.update(self.process_form_multipoles(xlat_cost, traversal, tree)) - - # }}} - - # {{{ propagate multipoles upward - - result.update(self.process_coarsen_multipoles(xlat_cost, traversal, tree)) - - # }}} - - direct_interaction_data = ( - self._collect_direction_interaction_data(traversal, tree)) - - # {{{ direct evaluation to point targets (lists 1, 3 close, 4 close) - - result.update(self.process_direct( - xlat_cost, traversal, direct_interaction_data, - box_target_counts_nonchild)) - - # }}} - - # {{{ translate separated siblings' ("list 2") mpoles to local - - result.update(self.process_list2(xlat_cost, traversal, tree)) - - # }}} - - # {{{ evaluate sep. smaller mpoles ("list 3") at particles - - result.update(self.process_list3( - xlat_cost, traversal, tree, box_target_counts_nonchild)) - - # }}} - - # {{{ form locals for separated bigger source boxes ("list 4") - - result.update(self.process_list4(xlat_cost, traversal, tree)) - - # }}} - - # {{{ propagate local_exps downward - - result.update(self.process_refine_locals(xlat_cost, traversal, tree)) - - # }}} - - # {{{ evaluate locals - - result.update(self.process_eval_locals(xlat_cost, traversal, tree, nqbtl)) - - # }}} - - global_qbx_centers = geo_data.global_qbx_centers() - - qbx_center_to_target_box = geo_data.qbx_center_to_target_box() - center_to_targets_starts = geo_data.center_to_tree_targets().starts - qbx_center_to_target_box_source_level = np.empty( - (tree.nlevels,), dtype=object) - - for src_level in range(tree.nlevels): - qbx_center_to_target_box_source_level[src_level] = ( - geo_data.qbx_center_to_target_box_source_level(src_level)) - - with cl.CommandQueue(geo_data.cl_context) as queue: - global_qbx_centers = global_qbx_centers.get( - queue=queue) - qbx_center_to_target_box = qbx_center_to_target_box.get( - queue=queue) - center_to_targets_starts = center_to_targets_starts.get( - queue=queue) - for src_level in range(tree.nlevels): - qbx_center_to_target_box_source_level[src_level] = ( - qbx_center_to_target_box_source_level[src_level] - .get(queue=queue)) - - # {{{ form global qbx locals or evaluate target specific qbx expansions - - if use_tsqbx: - result.update(self.process_eval_target_specific_qbxl( - xlat_cost, direct_interaction_data, global_qbx_centers, - qbx_center_to_target_box, center_to_targets_starts)) - else: - result.update(self.process_form_qbxl( - xlat_cost, direct_interaction_data, global_qbx_centers, - qbx_center_to_target_box, center_to_targets_starts)) - - # }}} - - # {{{ translate from list 3 multipoles to qbx local expansions - - result.update(self.process_m2qbxl( - xlat_cost, traversal, tree, global_qbx_centers, - qbx_center_to_target_box_source_level)) - - # }}} - - # {{{ translate from box local expansions to qbx local expansions - - result.update(self.process_l2qbxl( - xlat_cost, traversal, tree, global_qbx_centers, - qbx_center_to_target_box)) - - # }}} - - # {{{ evaluate qbx local expansions - - result.update(self.process_eval_qbxl( - xlat_cost, global_qbx_centers, center_to_targets_starts)) - - # }}} - - return ParametrizedCosts(result, params) - -# }}} - - -# {{{ calibrate cost model - -def _collect(expr, variables): - """Collect terms with respect to a list of variables. - - This applies :func:`sympy.simplify.collect` to the a :mod:`pymbolic` expression - with respect to the iterable of names in *variables*. - - Returns a dictionary mapping variable names to terms. """ - from pymbolic.interop.sympy import PymbolicToSympyMapper, SympyToPymbolicMapper - p2s = PymbolicToSympyMapper() - s2p = SympyToPymbolicMapper() - - from sympy.simplify import collect - sympy_variables = [sp.var(v) for v in variables] - collect_result = collect(p2s(expr), sympy_variables, evaluate=False) - - result = {} - for v in variables: - try: - result[v] = s2p(collect_result[sp.var(v)]) - except KeyError: - continue - - return result - - -_FMM_STAGE_TO_CALIBRATION_PARAMETER = { - "form_multipoles": "c_p2m", - "coarsen_multipoles": "c_m2m", - "eval_direct": "c_p2p", - "multipole_to_local": "c_m2l", - "eval_multipoles": "c_m2p", - "form_locals": "c_p2l", - "refine_locals": "c_l2l", - "eval_locals": "c_l2p", - "form_global_qbx_locals": "c_p2qbxl", - "translate_box_multipoles_to_qbx_local": "c_m2qbxl", - "translate_box_local_to_qbx_local": "c_l2qbxl", - "eval_qbx_expansions": "c_qbxl2p", - "eval_target_specific_qbx_locals": "c_p2p_tsqbx", - } - - -def estimate_calibration_params(model_results, timing_results): - """Given a set of model results and matching timing results, estimate the best - calibration parameters for the model. + @abstractmethod + def process_eval_target_specific_qbxl(self): + pass """ - params = set(_FMM_STAGE_TO_CALIBRATION_PARAMETER.values()) - - nresults = len(model_results) - - if nresults != len(timing_results): - raise ValueError("must have same number of model and timing results") - - uncalibrated_times = {} - actual_times = {} - - for param in params: - uncalibrated_times[param] = np.zeros(nresults) - actual_times[param] = np.zeros(nresults) - - from pymbolic import evaluate - - for i, model_result in enumerate(model_results): - context = model_result.params.copy() - for param in params: - context[param] = var(param) - - # Represents the total modeled cost, but leaves the calibration - # parameters symbolic. - total_modeled_cost = evaluate( - sum(model_result.raw_costs.values()), - context=context) - - collected_times = _collect(total_modeled_cost, params) - - for param, time in collected_times.items(): - uncalibrated_times[param][i] = time - - for i, timing_result in enumerate(timing_results): - for param, time in timing_result.items(): - calibration_param = ( - _FMM_STAGE_TO_CALIBRATION_PARAMETER[param]) - actual_times[calibration_param][i] = time["process_elapsed"] - - result = {} - - for param in params: - uncalibrated = uncalibrated_times[param] - actual = actual_times[param] + @abstractmethod + def process_form_qbxl(self): + pass - if np.allclose(uncalibrated, 0): - result[param] = float("NaN") - continue + @abstractmethod + def process_m2qbxl(self): + pass - result[param] = ( - actual.dot(uncalibrated) / uncalibrated.dot(uncalibrated)) + @abstractmethod + def process_l2qbxl(self): + pass - return result + @abstractmethod + def process_eval_qbxl(self): + pass # }}} -- GitLab From f9d51961cd0b46d9ac1975d76da2adf85911d1ab Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 3 Feb 2019 22:28:14 -0600 Subject: [PATCH 03/55] Add process_form_qbxl --- pytential/qbx/cost.py | 90 ++++++- test/test_cost_model.py | 561 +++++----------------------------------- 2 files changed, 154 insertions(+), 497 deletions(-) diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 1f5303d8..f1517ce8 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -27,11 +27,13 @@ THE SOFTWARE. """ import numpy as np +import pyopencl as cl +import pyopencl.array # noqa: F401 from six.moves import range from pymbolic import var from boxtree.cost import ( - FMMTranslationCostModel, AbstractFMMCostModel + FMMTranslationCostModel, AbstractFMMCostModel, PythonFMMCostModel, CLFMMCostModel ) from abc import abstractmethod @@ -127,26 +129,100 @@ class AbstractQBXCostModel(AbstractFMMCostModel): """ @abstractmethod - def process_eval_target_specific_qbxl(self): + def process_m2qbxl(self): pass - """ @abstractmethod - def process_form_qbxl(self): + def process_l2qbxl(self): pass @abstractmethod - def process_m2qbxl(self): + def process_eval_qbxl(self): pass + """ + @abstractmethod - def process_l2qbxl(self): + def process_form_qbxl(self, p2qbxl_cost, geo_data, + ndirect_sources_per_target_box): pass @abstractmethod - def process_eval_qbxl(self): + def process_eval_target_specific_qbxl(self, p2p_tsqbx_cost, geo_data, + ndirect_sources_per_target_box): + pass + + +class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): + def __init__(self, queue, + translation_cost_model_factory=pde_aware_translation_cost_model): + self.queue = queue + AbstractQBXCostModel.__init__(self, translation_cost_model_factory) + + def process_form_qbxl(self, p2qbxl_cost, geo_data, + ndirect_sources_per_target_box): + # TODO: convert this implementation to OpenCL + # TODO: probably need an OpenCL histogram implementation + traversal = geo_data.traversal() + ntarget_boxes = traversal.target_boxes.shape[0] + qbx_center_to_target_box = geo_data.qbx_center_to_target_box().get( + self.queue + ) + global_qbx_centers = geo_data.global_qbx_centers().get(self.queue) + + ncenters_per_tgt_box = np.zeros( + ntarget_boxes, dtype=traversal.tree.particle_id_dtype + ) + + for itgt_center, tgt_icenter in enumerate(global_qbx_centers): + itgt_box = qbx_center_to_target_box[tgt_icenter] + ncenters_per_tgt_box[itgt_box] += 1 + + ncenters_per_tgt_box_dev = cl.array.to_device( + self.queue, ncenters_per_tgt_box + ) + + return (ncenters_per_tgt_box_dev + * ndirect_sources_per_target_box + * p2qbxl_cost) + + def process_eval_target_specific_qbxl(self, p2p_tsqbx_cost, geo_data, + ndirect_sources_per_target_box): pass + +class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): + def process_form_qbxl(self, p2qbxl_cost, geo_data, + ndirect_sources_per_target_box): + global_qbx_centers = geo_data.global_qbx_centers() + qbx_center_to_target_box = geo_data.qbx_center_to_target_box() + traversal = geo_data.traversal() + + np2qbxl = np.zeros(len(traversal.target_boxes), dtype=np.float64) + + for itgt_center, tgt_icenter in enumerate(global_qbx_centers): + itgt_box = qbx_center_to_target_box[tgt_icenter] + np2qbxl[itgt_box] += ndirect_sources_per_target_box[itgt_box] + + return np2qbxl * p2qbxl_cost + + def process_eval_target_specific_qbxl(self, p2p_tsqbx_cost, geo_data, + ndirect_sources_per_target_box): + center_to_targets_starts = geo_data.center_to_tree_targets().starts + global_qbx_centers = geo_data.global_qbx_centers() + qbx_center_to_target_box = geo_data.qbx_center_to_target_box() + traversal = geo_data.traversal() + + neval_tsqbx = np.zeros(len(traversal.target_boxes), dtype=np.float64) + for itgt_center, tgt_icenter in enumerate(global_qbx_centers): + start, end = center_to_targets_starts[tgt_icenter:tgt_icenter + 2] + itgt_box = qbx_center_to_target_box[tgt_icenter] + neval_tsqbx[itgt_box] += ( + ndirect_sources_per_target_box[itgt_box] * (end - start) + ) + + return neval_tsqbx * p2p_tsqbx_cost + # }}} # vim: foldmethod=marker diff --git a/test/test_cost_model.py b/test/test_cost_model.py index e8ac4f37..71d0b572 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -1,6 +1,9 @@ from __future__ import division, print_function -__copyright__ = "Copyright (C) 2018 Matt Wala" +__copyright__ = """ + Copyright (C) 2018 Matt Wala + Copyright (C) 2019 Hao Gao +""" __license__ = """ Permission is hereby granted, free of charge, to any person obtaining a copy @@ -22,525 +25,103 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -import numpy as np -import numpy.linalg as la # noqa - -from boxtree.tools import ConstantOneExpansionWrangler -import pyopencl as cl -import pyopencl.clmath # noqa import pytest from pyopencl.tools import ( # noqa - pytest_generate_tests_for_pyopencl as pytest_generate_tests) - -from pytools import one -from sumpy.kernel import LaplaceKernel, HelmholtzKernel + pytest_generate_tests_for_pyopencl as pytest_generate_tests) -from pytential import bind, sym, norm # noqa -from pytential.qbx.cost import CostModel +import numpy as np +import pyopencl as cl +from pytential.qbx import QBXLayerPotentialSource +from pytential.target import PointsTarget +from pytential.qbx.cost import CLQBXCostModel, PythonQBXCostModel +import time +import logging +import os +logging.basicConfig(level=os.environ.get("LOGLEVEL", "WARNING")) +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) -# {{{ global params -TARGET_ORDER = 8 -OVSMP_FACTOR = 5 -TCF = 0.9 -QBX_ORDER = 5 -FMM_ORDER = 10 +@pytest.mark.opencl +def test_compare_cl_and_py_cost_model(ctx_factory): + nelements = 120 + target_order = 16 + fmm_order = 5 + qbx_order = fmm_order -DEFAULT_LPOT_KWARGS = { - "_box_extent_norm": "l2", - "_from_sep_smaller_crit": "static_l2", - } + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + from meshmode.mesh.generation import make_curve_mesh, starfish + mesh = make_curve_mesh(starfish, np.linspace(0, 1, nelements), target_order) -def get_lpot_source(queue, dim): from meshmode.discretization import Discretization - from meshmode.discretization.poly_element import ( - InterpolatoryQuadratureSimplexGroupFactory) - - target_order = TARGET_ORDER - - if dim == 2: - from meshmode.mesh.generation import starfish, make_curve_mesh - mesh = make_curve_mesh(starfish, np.linspace(0, 1, 50), order=target_order) - elif dim == 3: - from meshmode.mesh.generation import generate_torus - mesh = generate_torus(2, 1, order=target_order) - else: - raise ValueError("unsupported dimension: %d" % dim) - + from meshmode.discretization.poly_element import \ + InterpolatoryQuadratureSimplexGroupFactory pre_density_discr = Discretization( - queue.context, mesh, - InterpolatoryQuadratureSimplexGroupFactory(target_order)) - - lpot_kwargs = DEFAULT_LPOT_KWARGS.copy() - lpot_kwargs.update( - _expansion_stick_out_factor=TCF, - fmm_order=FMM_ORDER, qbx_order=QBX_ORDER - ) - - from pytential.qbx import QBXLayerPotentialSource - lpot_source = QBXLayerPotentialSource( - pre_density_discr, OVSMP_FACTOR*target_order, - **lpot_kwargs) - - lpot_source, _ = lpot_source.with_refinement() - - return lpot_source - - -def get_density(queue, lpot_source): - density_discr = lpot_source.density_discr - nodes = density_discr.nodes().with_queue(queue) - return cl.clmath.sin(10 * nodes[0]) - -# }}} - - -# {{{ test timing data gathering - -def test_timing_data_gathering(ctx_getter): - pytest.importorskip("pyfmmlib") - - cl_ctx = ctx_getter() - queue = cl.CommandQueue(cl_ctx, - properties=cl.command_queue_properties.PROFILING_ENABLE) - - lpot_source = get_lpot_source(queue, 2) - sigma = get_density(queue, lpot_source) - - sigma_sym = sym.var("sigma") - k_sym = LaplaceKernel(lpot_source.ambient_dim) - sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) - - op_S = bind(lpot_source, sym_op_S) - - timing_data = {} - op_S.eval(queue, dict(sigma=sigma), timing_data=timing_data) - assert timing_data - print(timing_data) - -# }}} - - -# {{{ test cost model - -@pytest.mark.parametrize("dim", (2, 3)) -def test_cost_model(ctx_getter, dim): - cl_ctx = ctx_getter() - queue = cl.CommandQueue(cl_ctx) - - lpot_source = get_lpot_source(queue, dim) - sigma = get_density(queue, lpot_source) - - sigma_sym = sym.var("sigma") - k_sym = LaplaceKernel(lpot_source.ambient_dim) - - sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) - op_S = bind(lpot_source, sym_op_S) - perf_S = op_S.get_modeled_cost(queue, sigma=sigma) - assert len(perf_S) == 1 - - sym_op_S_plus_D = ( - sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) - + sym.D(k_sym, sigma_sym)) - op_S_plus_D = bind(lpot_source, sym_op_S_plus_D) - perf_S_plus_D = op_S_plus_D.get_modeled_cost(queue, sigma=sigma) - assert len(perf_S_plus_D) == 2 - -# }}} - - -# {{{ test cost model parameter gathering - -def test_cost_model_parameter_gathering(ctx_getter): - cl_ctx = ctx_getter() - queue = cl.CommandQueue(cl_ctx) - - from sumpy.expansion.level_to_order import SimpleExpansionOrderFinder - - fmm_level_to_order = SimpleExpansionOrderFinder(tol=1e-5) - - lpot_source = get_lpot_source(queue, 2).copy( - fmm_level_to_order=fmm_level_to_order) - - sigma = get_density(queue, lpot_source) - - sigma_sym = sym.var("sigma") - k_sym = HelmholtzKernel(2, "k") - k = 2 - - sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1, k=sym.var("k")) - op_S = bind(lpot_source, sym_op_S) - - perf_S = one(op_S.get_modeled_cost(queue, sigma=sigma, k=k).values()) - - geo_data = lpot_source.qbx_fmm_geometry_data( - target_discrs_and_qbx_sides=((lpot_source.density_discr, 1),)) - - tree = geo_data.tree() - - assert perf_S.params["p_qbx"] == QBX_ORDER - assert perf_S.params["nlevels"] == tree.nlevels - assert perf_S.params["nsources"] == tree.nsources - assert perf_S.params["ntargets"] == tree.ntargets - assert perf_S.params["ncenters"] == geo_data.ncenters - - for level in range(tree.nlevels): - assert ( - perf_S.params["p_fmm_lev%d" % level] - == fmm_level_to_order(k_sym, {"k": 2}, tree, level)) - -# }}} - - -# {{{ constant one wrangler - -class ConstantOneQBXExpansionWrangler(ConstantOneExpansionWrangler): - - def __init__(self, queue, geo_data, use_target_specific_qbx): - from pytential.qbx.utils import ToHostTransferredGeoDataWrapper - geo_data = ToHostTransferredGeoDataWrapper(queue, geo_data) - - self.geo_data = geo_data - self.trav = geo_data.traversal() - self.use_target_specific_qbx = use_target_specific_qbx - - ConstantOneExpansionWrangler.__init__(self, geo_data.tree()) - - def _get_target_slice(self, ibox): - non_qbx_box_target_lists = self.geo_data.non_qbx_box_target_lists() - pstart = non_qbx_box_target_lists.box_target_starts[ibox] - return slice( - pstart, pstart - + non_qbx_box_target_lists.box_target_counts_nonchild[ibox]) - - def output_zeros(self): - non_qbx_box_target_lists = self.geo_data.non_qbx_box_target_lists() - return np.zeros(non_qbx_box_target_lists.nfiltered_targets) - - def full_output_zeros(self): - from pytools.obj_array import make_obj_array - return make_obj_array([np.zeros(self.tree.ntargets)]) - - def qbx_local_expansion_zeros(self): - return np.zeros(self.geo_data.ncenters) - - def reorder_potentials(self, potentials): - raise NotImplementedError("reorder_potentials should not " - "be called on a QBXExpansionWrangler") - - def form_global_qbx_locals(self, src_weights): - local_exps = self.qbx_local_expansion_zeros() - ops = 0 - - if self.use_target_specific_qbx: - return local_exps, self.timing_future(ops) - - global_qbx_centers = self.geo_data.global_qbx_centers() - qbx_center_to_target_box = self.geo_data.qbx_center_to_target_box() - - for tgt_icenter in global_qbx_centers: - itgt_box = qbx_center_to_target_box[tgt_icenter] - - start, end = ( - self.trav.neighbor_source_boxes_starts[itgt_box:itgt_box + 2]) + ctx, mesh, + InterpolatoryQuadratureSimplexGroupFactory(target_order) + ) - src_sum = 0 - for src_ibox in self.trav.neighbor_source_boxes_lists[start:end]: - src_pslice = self._get_source_slice(src_ibox) - ops += src_pslice.stop - src_pslice.start - src_sum += np.sum(src_weights[src_pslice]) + qbx, _ = QBXLayerPotentialSource( + pre_density_discr, 4 * target_order, + qbx_order, + fmm_order=fmm_order + ).with_refinement() - local_exps[tgt_icenter] = src_sum + coords = np.linspace(-1.5, 1.5, num=50) + x_coords, y_coords = np.meshgrid(coords, coords) + target_discr = PointsTarget(np.vstack( + (x_coords.reshape(-1), y_coords.reshape(-1)) + )) + target_discrs_and_qbx_sides = tuple([(target_discr, 0)]) - return local_exps, self.timing_future(ops) + geo_data_dev = qbx.qbx_fmm_geometry_data(target_discrs_and_qbx_sides) - def translate_box_multipoles_to_qbx_local(self, multipole_exps): - local_exps = self.qbx_local_expansion_zeros() - ops = 0 + from pytential.qbx.utils import ToHostTransferredGeoDataWrapper + geo_data = ToHostTransferredGeoDataWrapper(queue, geo_data_dev) - global_qbx_centers = self.geo_data.global_qbx_centers() + cl_cost_model = CLQBXCostModel(queue) + python_cost_model = PythonQBXCostModel() - for isrc_level, ssn in enumerate(self.trav.from_sep_smaller_by_level): - for tgt_icenter in global_qbx_centers: - icontaining_tgt_box = ( - self.geo_data - .qbx_center_to_target_box_source_level(isrc_level) - [tgt_icenter]) + # {{{ Test process_form_qbxl - if icontaining_tgt_box == -1: - continue + cl_ndirect_sources_per_target_box = \ + cl_cost_model.get_ndirect_sources_per_target_box(geo_data_dev.traversal()) - start, stop = ( - ssn.starts[icontaining_tgt_box], - ssn.starts[icontaining_tgt_box+1]) + queue.finish() + start_time = time.time() - for src_ibox in ssn.lists[start:stop]: - local_exps[tgt_icenter] += multipole_exps[src_ibox] - ops += 1 + cl_p2qbxl = cl_cost_model.process_form_qbxl( + 5.0, geo_data_dev, cl_ndirect_sources_per_target_box + ) - return local_exps, self.timing_future(ops) + queue.finish() + logger.info("OpenCL time for process_form_qbxl: {0}".format( + str(time.time() - start_time) + )) - def translate_box_local_to_qbx_local(self, local_exps): - qbx_expansions = self.qbx_local_expansion_zeros() - ops = 0 + python_ndirect_sources_per_target_box = \ + python_cost_model.get_ndirect_sources_per_target_box(geo_data.traversal()) - global_qbx_centers = self.geo_data.global_qbx_centers() - qbx_center_to_target_box = self.geo_data.qbx_center_to_target_box() + start_time = time.time() - for tgt_icenter in global_qbx_centers: - isrc_box = qbx_center_to_target_box[tgt_icenter] - src_ibox = self.trav.target_boxes[isrc_box] - qbx_expansions[tgt_icenter] += local_exps[src_ibox] - ops += 1 + python_p2qbxl = python_cost_model.process_form_qbxl( + 5.0, geo_data, python_ndirect_sources_per_target_box + ) - return qbx_expansions, self.timing_future(ops) + logger.info("Python time for process_form_qbxl: {0}".format( + str(time.time() - start_time) + )) - def eval_qbx_expansions(self, qbx_expansions): - output = self.full_output_zeros() - ops = 0 - - global_qbx_centers = self.geo_data.global_qbx_centers() - center_to_tree_targets = self.geo_data.center_to_tree_targets() - - for src_icenter in global_qbx_centers: - start, end = ( - center_to_tree_targets.starts[src_icenter:src_icenter+2]) - for icenter_tgt in range(start, end): - center_itgt = center_to_tree_targets.lists[icenter_tgt] - output[0][center_itgt] += qbx_expansions[src_icenter] - ops += 1 - - return output, self.timing_future(ops) - - def eval_target_specific_qbx_locals(self, src_weights): - pot = self.full_output_zeros() - ops = 0 - - if not self.use_target_specific_qbx: - return pot, self.timing_future(ops) - - global_qbx_centers = self.geo_data.global_qbx_centers() - center_to_tree_targets = self.geo_data.center_to_tree_targets() - qbx_center_to_target_box = self.geo_data.qbx_center_to_target_box() - - for ictr in global_qbx_centers: - tgt_ibox = qbx_center_to_target_box[ictr] - - ictr_tgt_start, ictr_tgt_end = center_to_tree_targets.starts[ictr:ictr+2] - - for ictr_tgt in range(ictr_tgt_start, ictr_tgt_end): - ctr_itgt = center_to_tree_targets.lists[ictr_tgt] - - isrc_box_start, isrc_box_end = ( - self.trav.neighbor_source_boxes_starts[tgt_ibox:tgt_ibox+2]) - - for isrc_box in range(isrc_box_start, isrc_box_end): - src_ibox = self.trav.neighbor_source_boxes_lists[isrc_box] - - isrc_start = self.tree.box_source_starts[src_ibox] - isrc_end = (isrc_start - + self.tree.box_source_counts_nonchild[src_ibox]) - - pot[0][ctr_itgt] += sum(src_weights[isrc_start:isrc_end]) - ops += isrc_end - isrc_start - - return pot, self.timing_future(ops) - -# }}} - - -# {{{ verify cost model - -class OpCountingTranslationCostModel(object): - """A translation cost model which assigns at cost of 1 to each operation.""" - - def __init__(self, dim, nlevels): - pass - - @staticmethod - def direct(): - return 1 - - p2qbxl = direct - p2p_tsqbx = direct - qbxl2p = direct - - @staticmethod - def p2l(level): - return 1 - - l2p = p2l - p2m = p2l - m2p = p2l - m2qbxl = p2l - l2qbxl = p2l - - @staticmethod - def m2m(src_level, tgt_level): - return 1 - - l2l = m2m - m2l = m2m - - -@pytest.mark.parametrize("dim, off_surface, use_target_specific_qbx", ( - (2, False, False), - (2, True, False), - (3, False, False), - (3, False, True), - (3, True, False), - (3, True, True))) -def test_cost_model_correctness(ctx_getter, dim, off_surface, - use_target_specific_qbx): - cl_ctx = ctx_getter() - queue = cl.CommandQueue(cl_ctx) - - perf_model = ( - CostModel( - translation_cost_model_factory=OpCountingTranslationCostModel)) - - lpot_source = get_lpot_source(queue, dim).copy( - cost_model=perf_model, - _use_target_specific_qbx=use_target_specific_qbx) - - # Construct targets. - if off_surface: - from pytential.target import PointsTarget - from boxtree.tools import make_uniform_particle_array - ntargets = 10 ** 3 - targets = PointsTarget( - make_uniform_particle_array(queue, ntargets, dim, np.float)) - target_discrs_and_qbx_sides = ((targets, 0),) - qbx_forced_limit = None - else: - targets = lpot_source.density_discr - target_discrs_and_qbx_sides = ((targets, 1),) - qbx_forced_limit = 1 - - # Construct bound op, run cost model. - sigma_sym = sym.var("sigma") - k_sym = LaplaceKernel(lpot_source.ambient_dim) - sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=qbx_forced_limit) - - op_S = bind((lpot_source, targets), sym_op_S) - sigma = get_density(queue, lpot_source) - - from pytools import one - perf_S = one(op_S.get_modeled_cost(queue, sigma=sigma).values()) - - # Run FMM with ConstantOneWrangler. This can't be done with pytential's - # high-level interface, so call the FMM driver directly. - from pytential.qbx.fmm import drive_fmm - geo_data = lpot_source.qbx_fmm_geometry_data( - target_discrs_and_qbx_sides=target_discrs_and_qbx_sides) - - wrangler = ConstantOneQBXExpansionWrangler( - queue, geo_data, use_target_specific_qbx) - nnodes = lpot_source.quad_stage2_density_discr.nnodes - src_weights = np.ones(nnodes) - - timing_data = {} - potential = drive_fmm(wrangler, src_weights, timing_data, - traversal=wrangler.trav)[0][geo_data.ncenters:] - - # Check constant one wrangler for correctness. - assert (potential == nnodes).all() - - modeled_time = perf_S.get_predicted_times(merge_close_lists=True) - - # Check that the cost model matches the timing data returned by the - # constant one wrangler. - mismatches = [] - for stage in timing_data: - if timing_data[stage]["ops_elapsed"] != modeled_time[stage]: - mismatches.append( - (stage, timing_data[stage]["ops_elapsed"], modeled_time[stage])) - - assert not mismatches, "\n".join(str(s) for s in mismatches) - -# }}} - - -# {{{ test order varying by level - -CONSTANT_ONE_PARAMS = dict( - c_l2l=1, - c_l2p=1, - c_l2qbxl=1, - c_m2l=1, - c_m2m=1, - c_m2p=1, - c_m2qbxl=1, - c_p2l=1, - c_p2m=1, - c_p2p=1, - c_p2qbxl=1, - c_qbxl2p=1, - c_p2p_tsqbx=1, - ) - - -def test_cost_model_order_varying_by_level(ctx_getter): - cl_ctx = ctx_getter() - queue = cl.CommandQueue(cl_ctx) - - # {{{ constant level to order - - def level_to_order_constant(kernel, kernel_args, tree, level): - return 1 - - lpot_source = get_lpot_source(queue, 2).copy( - cost_model=CostModel( - calibration_params=CONSTANT_ONE_PARAMS), - fmm_level_to_order=level_to_order_constant) - - sigma_sym = sym.var("sigma") - - k_sym = LaplaceKernel(2) - sym_op = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) - - sigma = get_density(queue, lpot_source) - - perf_constant = one( - bind(lpot_source, sym_op) - .get_modeled_cost(queue, sigma=sigma).values()) - - # }}} - - # {{{ varying level to order - - varying_order_params = perf_constant.params.copy() - - nlevels = perf_constant.params["nlevels"] - for level in range(nlevels): - varying_order_params["p_fmm_lev%d" % level] = nlevels - level - - perf_varying = perf_constant.with_params(varying_order_params) + assert np.array_equal(cl_p2qbxl.get(), python_p2qbxl) # }}} - # This only checks to ensure that the costs are different. The varying-level - # case should have larger cost. - - assert ( - sum(perf_varying.get_predicted_times().values()) - > sum(perf_constant.get_predicted_times().values())) - -# }}} - - -# You can test individual routines by typing -# $ python test_cost_model.py 'test_routine()' if __name__ == "__main__": - import sys - if len(sys.argv) > 1: - exec(sys.argv[1]) - else: - from pytest import main - main([__file__]) - + ctx_factory = cl.create_some_context + test_compare_cl_and_py_cost_model(ctx_factory) # vim: foldmethod=marker -- GitLab From 4fce00da6270947048610af7411d3676c2742f5f Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Wed, 6 Feb 2019 09:49:44 -0600 Subject: [PATCH 04/55] Add process_m2qbxl to PythonQBXCostModel --- .gitlab-ci.yml | 2 +- pytential/qbx/cost.py | 44 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c8b3f17b..6e9d488a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -104,6 +104,6 @@ Flake8: - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh - ". ./prepare-and-run-flake8.sh pytential test examples" tags: - - python3.5 + - python3 except: - tags diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index f1517ce8..8486a568 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -128,9 +128,6 @@ class AbstractQBXCostModel(AbstractFMMCostModel): ) """ - @abstractmethod - def process_m2qbxl(self): - pass @abstractmethod def process_l2qbxl(self): @@ -152,6 +149,17 @@ class AbstractQBXCostModel(AbstractFMMCostModel): ndirect_sources_per_target_box): pass + @abstractmethod + def process_m2qbxl(self, geo_data, m2qbxl_cost): + """ + :arg geo_data: TODO + :arg m2qbxl_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` + of shape (nlevels,) where the ith entry represents the evaluation cost + from multipole expansion at level i to a QBX center. + :return: + """ + pass + class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): def __init__(self, queue, @@ -223,6 +231,36 @@ class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): return neval_tsqbx * p2p_tsqbx_cost + def process_m2qbxl(self, geo_data, m2qbxl_cost): + traversal = geo_data.traversal() + global_qbx_centers = geo_data.global_qbx_centers() + qbx_center_to_target_box_source_level = \ + geo_data.qbx_center_to_target_box_source_level() + qbx_center_to_target_box = geo_data.qbx_center_to_target_box() + + ntarget_boxes = len(traversal.target_boxes) + nm2qbxl = np.zeros(ntarget_boxes, dtype=np.float64) + + for isrc_level, sep_smaller_list in enumerate( + traversal.from_sep_smaller_by_level): + for tgt_icenter in global_qbx_centers: + icontaining_tgt_box = qbx_center_to_target_box_source_level[ + isrc_level][tgt_icenter] + + if icontaining_tgt_box == -1: + continue + + start = sep_smaller_list.starts[icontaining_tgt_box] + stop = sep_smaller_list.starts[icontaining_tgt_box+1] + + containing_tgt_box = qbx_center_to_target_box(tgt_icenter) + + nm2qbxl[containing_tgt_box] += ( + (stop - start) * m2qbxl_cost[isrc_level]) + + return nm2qbxl + + # }}} # vim: foldmethod=marker -- GitLab From 718a32ecaef9fad504b20a2bd27c1aa9233405b6 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Fri, 8 Feb 2019 21:16:07 -0600 Subject: [PATCH 05/55] Add OpenCL implementation of process_form_qbxl --- pytential/qbx/__init__.py | 14 ++-- pytential/qbx/cost.py | 156 +++++++++++++++++++++++++++++--------- test/test_cost_model.py | 2 +- 3 files changed, 130 insertions(+), 42 deletions(-) diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index 413b9122..c2aa9e95 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -506,9 +506,9 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): @memoize_method def _expansion_radii(self, last_dim_length): with cl.CommandQueue(self.cl_context) as queue: - return (self._coarsest_quad_resolution(last_dim_length) - .with_queue(queue) - * 0.5 * self._dim_fudge_factor()).with_queue(None) + return (self._coarsest_quad_resolution(last_dim_length) + .with_queue(queue) + * 0.5 * self._dim_fudge_factor()).with_queue(None) # _expansion_radii should not be needed for the fine discretization @@ -536,10 +536,10 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): @memoize_method def _close_target_tunnel_radius(self, last_dim_length): with cl.CommandQueue(self.cl_context) as queue: - return ( - self._expansion_radii(last_dim_length).with_queue(queue) - * 0.5 - ).with_queue(None) + return ( + self._expansion_radii(last_dim_length).with_queue(queue) + * 0.5 + ).with_queue(None) @memoize_method def _coarsest_quad_resolution(self, last_dim_length="npanels"): diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 8486a568..016bc225 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -26,11 +26,16 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +from six.moves import range import numpy as np import pyopencl as cl import pyopencl.array # noqa: F401 -from six.moves import range +from pyopencl.array import take +from pyopencl.elementwise import ElementwiseKernel +from pyopencl.tools import dtype_to_ctype +from mako.template import Template from pymbolic import var +from pytools import memoize_method from boxtree.cost import ( FMMTranslationCostModel, AbstractFMMCostModel, PythonFMMCostModel, CLFMMCostModel @@ -58,20 +63,20 @@ class QBXTranslationCostModel(FMMTranslationCostModel): def p2p_tsqbx(self): # This term should be linear in the QBX order, which is the # square root of the number of QBX coefficients. - return var("c_p2p_tsqbx") * self.ncoeffs_qbx ** (1/2) + return var("c_p2p_tsqbx") * self.ncoeffs_qbx ** (1 / 2) def qbxl2p(self): return var("c_qbxl2p") * self.ncoeffs_qbx def m2qbxl(self, level): return var("c_m2qbxl") * self.e2e_cost( - self.ncoeffs_fmm_by_level[level], - self.ncoeffs_qbx) + self.ncoeffs_fmm_by_level[level], + self.ncoeffs_qbx) def l2qbxl(self, level): return var("c_l2qbxl") * self.e2e_cost( - self.ncoeffs_fmm_by_level[level], - self.ncoeffs_qbx) + self.ncoeffs_fmm_by_level[level], + self.ncoeffs_qbx) # }}} @@ -94,9 +99,9 @@ def pde_aware_translation_cost_model(dim, nlevels): uses_point_and_shoot = True return QBXTranslationCostModel( - ncoeffs_qbx=ncoeffs_qbx, - ncoeffs_fmm_by_level=ncoeffs_fmm, - uses_point_and_shoot=uses_point_and_shoot) + ncoeffs_qbx=ncoeffs_qbx, + ncoeffs_fmm_by_level=ncoeffs_fmm, + uses_point_and_shoot=uses_point_and_shoot) def taylor_translation_cost_model(dim, nlevels): @@ -110,9 +115,9 @@ def taylor_translation_cost_model(dim, nlevels): ncoeffs_qbx = (p_qbx + 1) ** dim return QBXTranslationCostModel( - ncoeffs_qbx=ncoeffs_qbx, - ncoeffs_fmm_by_level=ncoeffs_fmm, - uses_point_and_shoot=False) + ncoeffs_qbx=ncoeffs_qbx, + ncoeffs_fmm_by_level=ncoeffs_fmm, + uses_point_and_shoot=False) # }}} @@ -167,30 +172,111 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): self.queue = queue AbstractQBXCostModel.__init__(self, translation_cost_model_factory) - def process_form_qbxl(self, p2qbxl_cost, geo_data, - ndirect_sources_per_target_box): - # TODO: convert this implementation to OpenCL - # TODO: probably need an OpenCL histogram implementation - traversal = geo_data.traversal() - ntarget_boxes = traversal.target_boxes.shape[0] - qbx_center_to_target_box = geo_data.qbx_center_to_target_box().get( - self.queue + @memoize_method + def _fill_array_with_index_knl(self, idx_dtype, array_dtype): + return ElementwiseKernel( + self.queue.context, + Template(r""" + ${idx_t} *index, + ${array_t} *array, + ${array_t} val + """).render( + idx_t=dtype_to_ctype(idx_dtype), + array_t=dtype_to_ctype(array_dtype) + ), + Template(r""" + array[index[i]] = val; + """).render(), + name="fill_array_with_index" ) - global_qbx_centers = geo_data.global_qbx_centers().get(self.queue) - ncenters_per_tgt_box = np.zeros( - ntarget_boxes, dtype=traversal.tree.particle_id_dtype + def _fill_array_with_index(self, array, index, value): + idx_dtype = index.dtype + array_dtype = array.dtype + knl = self._fill_array_with_index_knl(idx_dtype, array_dtype) + knl(index, array, value, queue=self.queue) + + @memoize_method + def count_global_qbx_centers_knl(self, box_id_dtype, particle_id_dtype): + return ElementwiseKernel( + self.queue.context, + Template(r""" + ${particle_id_t} *nqbx_centers_itgt_box, + char *global_qbx_center_mask, + ${box_id_t} *target_boxes, + ${particle_id_t} *box_target_starts, + ${particle_id_t} *box_target_counts_nonchild + """).render( + box_id_t=dtype_to_ctype(box_id_dtype), + particle_id_t=dtype_to_ctype(particle_id_dtype) + ), + Template(r""" + ${box_id_t} global_box_id = target_boxes[i]; + ${particle_id_t} start = box_target_starts[global_box_id]; + ${particle_id_t} end = start + box_target_counts_nonchild[ + global_box_id + ]; + + ${particle_id_t} nqbx_centers = 0; + for(${particle_id_t} iparticle = start; iparticle < end; iparticle++) + if(global_qbx_center_mask[iparticle]) + nqbx_centers++; + + nqbx_centers_itgt_box[i] = nqbx_centers; + """).render( + box_id_t=dtype_to_ctype(box_id_dtype), + particle_id_t=dtype_to_ctype(particle_id_dtype) + ), + name="count_global_qbx_centers" ) - for itgt_center, tgt_icenter in enumerate(global_qbx_centers): - itgt_box = qbx_center_to_target_box[tgt_icenter] - ncenters_per_tgt_box[itgt_box] += 1 + def get_nqbx_centers_per_tgt_box(self, geo_data): + """ + :arg geo_data: TODO + :return: a :class:`pyopencl.array.Array` of shape (ntarget_boxes,) where the + ith entry represents the number of `geo_data.global_qbx_centers` in + target_boxes[i]. + """ + traversal = geo_data.traversal() + tree = geo_data.tree() + global_qbx_centers = geo_data.global_qbx_centers() - ncenters_per_tgt_box_dev = cl.array.to_device( - self.queue, ncenters_per_tgt_box + # Build a mask of whether a target is a global qbx center + global_qbx_centers_tree_order = take( + tree.sorted_target_ids, global_qbx_centers, queue=self.queue + ) + global_qbx_center_mask = cl.array.zeros( + self.queue, tree.ntargets, dtype=np.int8 + ) + self._fill_array_with_index( + global_qbx_center_mask, global_qbx_centers_tree_order, 1 ) - return (ncenters_per_tgt_box_dev + # Each target box enumerate its target list and count the number of global + # qbx centers + ntarget_boxes = len(traversal.target_boxes) + nqbx_centers_itgt_box = cl.array.empty( + self.queue, ntarget_boxes, dtype=tree.particle_id_dtype + ) + + count_global_qbx_centers_knl = self.count_global_qbx_centers_knl( + tree.box_id_dtype, tree.particle_id_dtype + ) + count_global_qbx_centers_knl( + nqbx_centers_itgt_box, + global_qbx_center_mask, + traversal.target_boxes, + tree.box_target_starts, + tree.box_target_counts_nonchild + ) + + return nqbx_centers_itgt_box + + def process_form_qbxl(self, p2qbxl_cost, geo_data, + ndirect_sources_per_target_box): + nqbx_centers_itgt_box = self.get_nqbx_centers_per_tgt_box(geo_data) + + return (nqbx_centers_itgt_box * ndirect_sources_per_target_box * p2qbxl_cost) @@ -198,6 +284,9 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): ndirect_sources_per_target_box): pass + def process_m2qbxl(self, geo_data, m2qbxl_cost): + pass + class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): def process_form_qbxl(self, p2qbxl_cost, geo_data, @@ -208,7 +297,7 @@ class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): np2qbxl = np.zeros(len(traversal.target_boxes), dtype=np.float64) - for itgt_center, tgt_icenter in enumerate(global_qbx_centers): + for tgt_icenter in global_qbx_centers: itgt_box = qbx_center_to_target_box[tgt_icenter] np2qbxl[itgt_box] += ndirect_sources_per_target_box[itgt_box] @@ -226,7 +315,7 @@ class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): start, end = center_to_targets_starts[tgt_icenter:tgt_icenter + 2] itgt_box = qbx_center_to_target_box[tgt_icenter] neval_tsqbx[itgt_box] += ( - ndirect_sources_per_target_box[itgt_box] * (end - start) + ndirect_sources_per_target_box[itgt_box] * (end - start) ) return neval_tsqbx * p2p_tsqbx_cost @@ -251,16 +340,15 @@ class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): continue start = sep_smaller_list.starts[icontaining_tgt_box] - stop = sep_smaller_list.starts[icontaining_tgt_box+1] + stop = sep_smaller_list.starts[icontaining_tgt_box + 1] containing_tgt_box = qbx_center_to_target_box(tgt_icenter) nm2qbxl[containing_tgt_box] += ( - (stop - start) * m2qbxl_cost[isrc_level]) + (stop - start) * m2qbxl_cost[isrc_level]) return nm2qbxl - # }}} # vim: foldmethod=marker diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 71d0b572..f3229639 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -45,7 +45,7 @@ logger.setLevel(logging.INFO) @pytest.mark.opencl def test_compare_cl_and_py_cost_model(ctx_factory): - nelements = 120 + nelements = 1280 target_order = 16 fmm_order = 5 qbx_order = fmm_order -- GitLab From d51e806faf6fdb8dc2cdaef7789b05fbc6b6c6dd Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 10 Feb 2019 15:57:46 -0600 Subject: [PATCH 06/55] Add OpenCL implementation of process_m2qbxl and corresponding test case --- pytential/qbx/cost.py | 78 ++++++++++++++++++++++++++++++++++++----- test/test_cost_model.py | 68 ++++++++++++++++++++++++++++++++++- 2 files changed, 136 insertions(+), 10 deletions(-) diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 016bc225..339158ca 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -157,11 +157,15 @@ class AbstractQBXCostModel(AbstractFMMCostModel): @abstractmethod def process_m2qbxl(self, geo_data, m2qbxl_cost): """ - :arg geo_data: TODO + :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object or + similar object in the host memory. :arg m2qbxl_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` - of shape (nlevels,) where the ith entry represents the evaluation cost + of shape (nlevels,) where the ith entry represents the translation cost from multipole expansion at level i to a QBX center. - :return: + :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape + (ntarget_boxes,), with the ith entry representing the cost of translating + multipole expansions of list 3 boxes at all source levels to all QBX + centers in target_boxes[i]. """ pass @@ -230,12 +234,13 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): name="count_global_qbx_centers" ) + @memoize_method def get_nqbx_centers_per_tgt_box(self, geo_data): """ :arg geo_data: TODO :return: a :class:`pyopencl.array.Array` of shape (ntarget_boxes,) where the ith entry represents the number of `geo_data.global_qbx_centers` in - target_boxes[i]. + target_boxes[i]. The type of this array is *particle_id_dtype*. """ traversal = geo_data.traversal() tree = geo_data.tree() @@ -284,8 +289,60 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): ndirect_sources_per_target_box): pass + @memoize_method + def process_m2qbxl_knl(self, box_id_dtype, particle_id_dtype): + return ElementwiseKernel( + self.queue.context, + Template(r""" + ${box_id_t} *idx_to_itgt_box, + ${particle_id_t} *nqbx_centers_itgt_box, + ${box_id_t} *ssn_starts, + double *nm2qbxl, + double m2qbxl_cost + """).render( + box_id_t=dtype_to_ctype(box_id_dtype), + particle_id_t=dtype_to_ctype(particle_id_dtype) + ), + Template(r""" + // get the index of current box in target_boxes + ${box_id_t} itgt_box = idx_to_itgt_box[i]; + // get the number of expansion centers in current box + ${particle_id_t} nqbx_centers = nqbx_centers_itgt_box[itgt_box]; + // get the number of list 3 boxes of the current box in a particular + // level + ${box_id_t} nlist3_boxes = ssn_starts[i + 1] - ssn_starts[i]; + // calculate the cost + nm2qbxl[itgt_box] += (nqbx_centers * nlist3_boxes * m2qbxl_cost); + """).render( + box_id_t=dtype_to_ctype(box_id_dtype), + particle_id_t=dtype_to_ctype(particle_id_dtype) + ), + name="process_m2qbxl" + ) + def process_m2qbxl(self, geo_data, m2qbxl_cost): - pass + tree = geo_data.tree() + traversal = geo_data.traversal() + ntarget_boxes = len(traversal.target_boxes) + nqbx_centers_itgt_box = self.get_nqbx_centers_per_tgt_box(geo_data) + + process_m2qbxl_knl = self.process_m2qbxl_knl( + tree.box_id_dtype, tree.particle_id_dtype + ) + + nm2qbxl = cl.array.zeros(self.queue, ntarget_boxes, dtype=np.float64) + + for isrc_level, ssn in enumerate(traversal.from_sep_smaller_by_level): + process_m2qbxl_knl( + ssn.nonempty_indices, + nqbx_centers_itgt_box, + ssn.starts, + nm2qbxl, + m2qbxl_cost[isrc_level].get().reshape(-1)[0], + queue=self.queue + ) + + return nm2qbxl class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): @@ -323,8 +380,6 @@ class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): def process_m2qbxl(self, geo_data, m2qbxl_cost): traversal = geo_data.traversal() global_qbx_centers = geo_data.global_qbx_centers() - qbx_center_to_target_box_source_level = \ - geo_data.qbx_center_to_target_box_source_level() qbx_center_to_target_box = geo_data.qbx_center_to_target_box() ntarget_boxes = len(traversal.target_boxes) @@ -332,9 +387,14 @@ class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): for isrc_level, sep_smaller_list in enumerate( traversal.from_sep_smaller_by_level): + + qbx_center_to_target_box_source_level = \ + geo_data.qbx_center_to_target_box_source_level(isrc_level) + for tgt_icenter in global_qbx_centers: icontaining_tgt_box = qbx_center_to_target_box_source_level[ - isrc_level][tgt_icenter] + tgt_icenter + ] if icontaining_tgt_box == -1: continue @@ -342,7 +402,7 @@ class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): start = sep_smaller_list.starts[icontaining_tgt_box] stop = sep_smaller_list.starts[icontaining_tgt_box + 1] - containing_tgt_box = qbx_center_to_target_box(tgt_icenter) + containing_tgt_box = qbx_center_to_target_box[tgt_icenter] nm2qbxl[containing_tgt_box] += ( (stop - start) * m2qbxl_cost[isrc_level]) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index f3229639..2305498e 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -33,7 +33,10 @@ import numpy as np import pyopencl as cl from pytential.qbx import QBXLayerPotentialSource from pytential.target import PointsTarget -from pytential.qbx.cost import CLQBXCostModel, PythonQBXCostModel +from pytential.qbx.cost import ( + CLQBXCostModel, PythonQBXCostModel, pde_aware_translation_cost_model +) +from pymbolic import evaluate import time import logging @@ -53,6 +56,8 @@ def test_compare_cl_and_py_cost_model(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) + # {{{ Construct geometry + from meshmode.mesh.generation import make_curve_mesh, starfish mesh = make_curve_mesh(starfish, np.linspace(0, 1, nelements), target_order) @@ -82,9 +87,37 @@ def test_compare_cl_and_py_cost_model(ctx_factory): from pytential.qbx.utils import ToHostTransferredGeoDataWrapper geo_data = ToHostTransferredGeoDataWrapper(queue, geo_data_dev) + # }}} + + # {{{ Construct cost models + cl_cost_model = CLQBXCostModel(queue) python_cost_model = PythonQBXCostModel() + tree = geo_data.tree() + xlat_cost = pde_aware_translation_cost_model(tree.targets.shape[0], tree.nlevels) + + constant_one_params = dict( + c_l2l=1, + c_l2p=1, + c_m2l=1, + c_m2m=1, + c_m2p=1, + c_p2l=1, + c_p2m=1, + c_p2p=1, + c_p2qbxl=1, + c_p2p_tsqbx=1, + c_qbxl2p=1, + c_m2qbxl=1, + c_l2qbxl=1, + p_qbx=5 + ) + for ilevel in range(tree.nlevels): + constant_one_params["p_fmm_lev%d" % ilevel] = 10 + + # }}} + # {{{ Test process_form_qbxl cl_ndirect_sources_per_target_box = \ @@ -119,6 +152,39 @@ def test_compare_cl_and_py_cost_model(ctx_factory): # }}} + # {{{ Test process_m2qbxl + + nlevels = geo_data.tree().nlevels + m2qbxl_cost = np.zeros(nlevels, dtype=np.float64) + for ilevel in range(nlevels): + m2qbxl_cost[ilevel] = evaluate( + xlat_cost.m2qbxl(ilevel), + context=constant_one_params + ) + m2qbxl_cost_dev = cl.array.to_device(queue, m2qbxl_cost) + + queue.finish() + start_time = time.time() + + cl_m2qbxl = cl_cost_model.process_m2qbxl(geo_data_dev, m2qbxl_cost_dev) + + queue.finish() + logger.info("OpenCL time for process_m2qbxl: {0}".format( + str(time.time() - start_time) + )) + + start_time = time.time() + + python_m2qbxl = python_cost_model.process_m2qbxl(geo_data, m2qbxl_cost) + + logger.info("Python time for process_m2qbxl: {0}".format( + str(time.time() - start_time) + )) + + assert np.array_equal(cl_m2qbxl.get(), python_m2qbxl) + + # }}} + if __name__ == "__main__": ctx_factory = cl.create_some_context -- GitLab From df1984e6dd9cd0b6387c95428228791f82489fb1 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 10 Feb 2019 23:43:08 -0600 Subject: [PATCH 07/55] Add process_l2qbxl --- pytential/qbx/cost.py | 48 +++++++++++++++++++++++++++++++++++++---- test/test_cost_model.py | 41 +++++++++++++++++++++++++++-------- 2 files changed, 76 insertions(+), 13 deletions(-) diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 339158ca..b412185e 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -134,10 +134,6 @@ class AbstractQBXCostModel(AbstractFMMCostModel): """ - @abstractmethod - def process_l2qbxl(self): - pass - @abstractmethod def process_eval_qbxl(self): pass @@ -169,6 +165,20 @@ class AbstractQBXCostModel(AbstractFMMCostModel): """ pass + @abstractmethod + def process_l2qbxl(self, geo_data, l2qbxl_cost): + """ + :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object or + similar object in the host memory. + :arg l2qbxl_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` + of shape (nlevels,) where the ith entry represents the translation cost + from a box local expansion to a QBX local expansion. + :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape + (ntarget_boxes,), with the ith entry representing the cost of translating + box local expansions to all QBX local expansions. + """ + pass + class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): def __init__(self, queue, @@ -344,6 +354,20 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): return nm2qbxl + def process_l2qbxl(self, geo_data, l2qbxl_cost): + tree = geo_data.tree() + traversal = geo_data.traversal() + nqbx_centers_itgt_box = self.get_nqbx_centers_per_tgt_box(geo_data) + + # l2qbxl_cost_itgt_box = l2qbxl_cost[tree.box_levels[traversal.target_boxes]] + l2qbxl_cost_itgt_box = take( + l2qbxl_cost, + take(tree.box_levels, traversal.target_boxes, queue=self.queue), + queue=self.queue + ) + + return nqbx_centers_itgt_box * l2qbxl_cost_itgt_box + class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): def process_form_qbxl(self, p2qbxl_cost, geo_data, @@ -409,6 +433,22 @@ class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): return nm2qbxl + def process_l2qbxl(self, geo_data, l2qbxl_cost): + tree = geo_data.tree() + traversal = geo_data.traversal() + global_qbx_centers = geo_data.global_qbx_centers() + qbx_center_to_target_box = geo_data.qbx_center_to_target_box() + + ntarget_boxes = len(traversal.target_boxes) + nl2qbxl = np.zeros(ntarget_boxes, dtype=np.float64) + + for tgt_icenter in global_qbx_centers: + itgt_box = qbx_center_to_target_box[tgt_icenter] + tgt_ibox = traversal.target_boxes[itgt_box] + nl2qbxl[itgt_box] += l2qbxl_cost[tree.box_levels[tgt_ibox]] + + return nl2qbxl + # }}} # vim: foldmethod=marker diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 2305498e..4b29cca7 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -32,7 +32,6 @@ from pyopencl.tools import ( # noqa import numpy as np import pyopencl as cl from pytential.qbx import QBXLayerPotentialSource -from pytential.target import PointsTarget from pytential.qbx.cost import ( CLQBXCostModel, PythonQBXCostModel, pde_aware_translation_cost_model ) @@ -48,7 +47,7 @@ logger.setLevel(logging.INFO) @pytest.mark.opencl def test_compare_cl_and_py_cost_model(ctx_factory): - nelements = 1280 + nelements = 3600 target_order = 16 fmm_order = 5 qbx_order = fmm_order @@ -75,13 +74,7 @@ def test_compare_cl_and_py_cost_model(ctx_factory): fmm_order=fmm_order ).with_refinement() - coords = np.linspace(-1.5, 1.5, num=50) - x_coords, y_coords = np.meshgrid(coords, coords) - target_discr = PointsTarget(np.vstack( - (x_coords.reshape(-1), y_coords.reshape(-1)) - )) - target_discrs_and_qbx_sides = tuple([(target_discr, 0)]) - + target_discrs_and_qbx_sides = tuple([(qbx.density_discr, 0)]) geo_data_dev = qbx.qbx_fmm_geometry_data(target_discrs_and_qbx_sides) from pytential.qbx.utils import ToHostTransferredGeoDataWrapper @@ -185,6 +178,36 @@ def test_compare_cl_and_py_cost_model(ctx_factory): # }}} + # {{{ Test process_l2qbxl + + l2qbxl_cost = np.zeros(nlevels, dtype=np.float64) + for ilevel in range(nlevels): + l2qbxl_cost[ilevel] = evaluate( + xlat_cost.l2qbxl(ilevel), + context=constant_one_params + ) + l2qbxl_cost_dev = cl.array.to_device(queue, l2qbxl_cost) + + queue.finish() + start_time = time.time() + + cl_l2qbxl = cl_cost_model.process_l2qbxl(geo_data_dev, l2qbxl_cost_dev) + + queue.finish() + logger.info("OpenCL time for process_l2qbxl: {0}".format( + str(time.time() - start_time) + )) + + start_time = time.time() + + python_l2qbxl = python_cost_model.process_l2qbxl(geo_data, l2qbxl_cost) + + logger.info("Python time for process_l2qbxl: {0}".format( + str(time.time() - start_time) + )) + + assert np.array_equal(cl_l2qbxl.get(), python_l2qbxl) + if __name__ == "__main__": ctx_factory = cl.create_some_context -- GitLab From 5684d6c89345d51b22459d53888fc1b8cf02283a Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 11 Feb 2019 20:48:38 -0600 Subject: [PATCH 08/55] Add process_eval_qbxl --- pytential/qbx/cost.py | 86 ++++++++++++++++++++++++++++++----------- test/test_cost_model.py | 26 +++++++++++++ 2 files changed, 89 insertions(+), 23 deletions(-) diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index b412185e..df120f0d 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -132,14 +132,6 @@ class AbstractQBXCostModel(AbstractFMMCostModel): self, translation_cost_model_factory ) - """ - - @abstractmethod - def process_eval_qbxl(self): - pass - - """ - @abstractmethod def process_form_qbxl(self, p2qbxl_cost, geo_data, ndirect_sources_per_target_box): @@ -179,6 +171,20 @@ class AbstractQBXCostModel(AbstractFMMCostModel): """ pass + @abstractmethod + def process_eval_qbxl(self, geo_data, qbxl2p_cost): + """ + :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object or + similar object in the host memory. + :arg qbxl2p_cost: a :class:`numpy.float64` constant, representing the + evaluation cost of a target from its QBX local expansion. + :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape + (ntarget_boxes,), with the ith entry representing the cost of evaluating + all targets associated with QBX centers in target_boxes[i] from QBX local + expansions. + """ + pass + class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): def __init__(self, queue, @@ -216,7 +222,7 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): self.queue.context, Template(r""" ${particle_id_t} *nqbx_centers_itgt_box, - char *global_qbx_center_mask, + ${particle_id_t} *global_qbx_center_weight, ${box_id_t} *target_boxes, ${particle_id_t} *box_target_starts, ${particle_id_t} *box_target_counts_nonchild @@ -233,8 +239,7 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): ${particle_id_t} nqbx_centers = 0; for(${particle_id_t} iparticle = start; iparticle < end; iparticle++) - if(global_qbx_center_mask[iparticle]) - nqbx_centers++; + nqbx_centers += global_qbx_center_weight[iparticle]; nqbx_centers_itgt_box[i] = nqbx_centers; """).render( @@ -244,30 +249,38 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): name="count_global_qbx_centers" ) - @memoize_method - def get_nqbx_centers_per_tgt_box(self, geo_data): + def get_nqbx_centers_per_tgt_box(self, geo_data, weights=None): """ - :arg geo_data: TODO - :return: a :class:`pyopencl.array.Array` of shape (ntarget_boxes,) where the - ith entry represents the number of `geo_data.global_qbx_centers` in - target_boxes[i]. The type of this array is *particle_id_dtype*. + :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object. + :arg weights: a :class:`pyopencl.array.Array` of shape (ncenters,) with + particle_id_dtype, the weight of each center in user order. + :return: a :class:`pyopencl.array.Array` of shape (ntarget_boxes,) with type + *particle_id_dtype* where the ith entry represents the number of + `geo_data.global_qbx_centers` in target_boxes[i], optionally weighted by + *weights*. """ traversal = geo_data.traversal() tree = geo_data.tree() global_qbx_centers = geo_data.global_qbx_centers() + ncenters = geo_data.ncenters - # Build a mask of whether a target is a global qbx center + # Build a mask (weight) of whether a target is a global qbx center global_qbx_centers_tree_order = take( tree.sorted_target_ids, global_qbx_centers, queue=self.queue ) - global_qbx_center_mask = cl.array.zeros( - self.queue, tree.ntargets, dtype=np.int8 + global_qbx_center_weight = cl.array.zeros( + self.queue, tree.ntargets, dtype=tree.particle_id_dtype ) + self._fill_array_with_index( - global_qbx_center_mask, global_qbx_centers_tree_order, 1 + global_qbx_center_weight, global_qbx_centers_tree_order, 1 ) - # Each target box enumerate its target list and count the number of global + if weights is not None: + assert weights.dtype == tree.particle_id_dtype + global_qbx_center_weight[tree.sorted_target_ids[:ncenters]] *= weights + + # Each target box enumerate its target list and add the weight of global # qbx centers ntarget_boxes = len(traversal.target_boxes) nqbx_centers_itgt_box = cl.array.empty( @@ -279,7 +292,7 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): ) count_global_qbx_centers_knl( nqbx_centers_itgt_box, - global_qbx_center_mask, + global_qbx_center_weight, traversal.target_boxes, tree.box_target_starts, tree.box_target_counts_nonchild @@ -368,6 +381,17 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): return nqbx_centers_itgt_box * l2qbxl_cost_itgt_box + def process_eval_qbxl(self, geo_data, qbxl2p_cost): + center_to_targets_starts = geo_data.center_to_tree_targets().starts + center_to_targets_starts = center_to_targets_starts.with_queue(self.queue) + weights = center_to_targets_starts[1:] - center_to_targets_starts[:-1] + + nqbx_targets_itgt_box = self.get_nqbx_centers_per_tgt_box( + geo_data, weights=weights + ) + + return nqbx_targets_itgt_box * qbxl2p_cost + class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): def process_form_qbxl(self, p2qbxl_cost, geo_data, @@ -449,6 +473,22 @@ class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): return nl2qbxl + def process_eval_qbxl(self, geo_data, qbxl2p_cost): + traversal = geo_data.traversal() + global_qbx_centers = geo_data.global_qbx_centers() + center_to_targets_starts = geo_data.center_to_tree_targets().starts + qbx_center_to_target_box = geo_data.qbx_center_to_target_box() + + ntarget_boxes = len(traversal.target_boxes) + neval_qbxl = np.zeros(ntarget_boxes, dtype=np.float64) + + for src_icenter in global_qbx_centers: + start, end = center_to_targets_starts[src_icenter:src_icenter+2] + icontaining_tgt_box = qbx_center_to_target_box[src_icenter] + neval_qbxl[icontaining_tgt_box] += (end - start) + + return neval_qbxl * qbxl2p_cost + # }}} # vim: foldmethod=marker diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 4b29cca7..ef666ed3 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -208,6 +208,32 @@ def test_compare_cl_and_py_cost_model(ctx_factory): assert np.array_equal(cl_l2qbxl.get(), python_l2qbxl) + # }}} + + # {{{ Test process_eval_qbxl + + queue.finish() + start_time = time.time() + + cl_eval_qbxl = cl_cost_model.process_eval_qbxl(geo_data_dev, 5.0) + + queue.finish() + logger.info("OpenCL time for process_eval_qbxl: {0}".format( + str(time.time() - start_time) + )) + + start_time = time.time() + + python_eval_qbxl = python_cost_model.process_eval_qbxl(geo_data, 5.0) + + logger.info("Python time for process_eval_qbxl: {0}".format( + str(time.time() - start_time) + )) + + assert np.array_equal(cl_eval_qbxl.get(), python_eval_qbxl) + + # }}} + if __name__ == "__main__": ctx_factory = cl.create_some_context -- GitLab From 5493086da6d13528a52a7bf50f7d6d0f7546e0dd Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Tue, 12 Feb 2019 21:08:16 -0600 Subject: [PATCH 09/55] Add eval_target_specific_qbxl --- pytential/qbx/cost.py | 65 ++++++++++++++++++++++++++++++++--------- test/test_cost_model.py | 35 ++++++++++++++++++++-- 2 files changed, 85 insertions(+), 15 deletions(-) diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index df120f0d..be3341b4 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -133,13 +133,22 @@ class AbstractQBXCostModel(AbstractFMMCostModel): ) @abstractmethod - def process_form_qbxl(self, p2qbxl_cost, geo_data, + def process_form_qbxl(self, geo_data, p2qbxl_cost, ndirect_sources_per_target_box): - pass - - @abstractmethod - def process_eval_target_specific_qbxl(self, p2p_tsqbx_cost, geo_data, - ndirect_sources_per_target_box): + """ + :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object or + similar object in the host memory. + :arg p2qbxl_cost: a :class:`numpy.float64` constant representing the cost of + adding a source to a QBX local expansion. + :arg ndirect_sources_per_target_box: a :class:`numpy.ndarray` or + :class:`pyopencl.array.Array` of shape (ntarget_boxes,), with the ith + entry representing the number of direct evaluation sources (list 1, + list 3 close and list 4 close) for target_boxes[i]. + :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape + (ntarget_boxes,), with the ith entry representing the cost of adding all + direct evaluation sources to QBX local expansions of centers in + target_boxes[i]. + """ pass @abstractmethod @@ -185,6 +194,26 @@ class AbstractQBXCostModel(AbstractFMMCostModel): """ pass + @abstractmethod + def process_eval_target_specific_qbxl(self, geo_data, p2p_tsqbx_cost, + ndirect_sources_per_target_box): + """ + :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object or + similar object in the host memory. + :arg p2p_tsqbx_cost: a :class:`numpy.float64` constant representing the + evaluation cost of a target from a direct evaluation source of the target + box containing the expansion center. + :arg ndirect_sources_per_target_box: a :class:`numpy.ndarray` or + :class:`pyopencl.array.Array` of shape (ntarget_boxes,), with the ith + entry representing the number of direct evaluation sources (list 1, + list 3 close and list 4 close) for target_boxes[i]. + :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape + (ntarget_boxes,), with the ith entry representing the evaluation cost of + all targets associated with centers in target_boxes[i] from the direct + evaluation sources of target_boxes[i]. + """ + pass + class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): def __init__(self, queue, @@ -300,7 +329,7 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): return nqbx_centers_itgt_box - def process_form_qbxl(self, p2qbxl_cost, geo_data, + def process_form_qbxl(self, geo_data, p2qbxl_cost, ndirect_sources_per_target_box): nqbx_centers_itgt_box = self.get_nqbx_centers_per_tgt_box(geo_data) @@ -308,10 +337,6 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): * ndirect_sources_per_target_box * p2qbxl_cost) - def process_eval_target_specific_qbxl(self, p2p_tsqbx_cost, geo_data, - ndirect_sources_per_target_box): - pass - @memoize_method def process_m2qbxl_knl(self, box_id_dtype, particle_id_dtype): return ElementwiseKernel( @@ -392,9 +417,23 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): return nqbx_targets_itgt_box * qbxl2p_cost + def process_eval_target_specific_qbxl(self, geo_data, p2p_tsqbx_cost, + ndirect_sources_per_target_box): + center_to_targets_starts = geo_data.center_to_tree_targets().starts + center_to_targets_starts = center_to_targets_starts.with_queue(self.queue) + weights = center_to_targets_starts[1:] - center_to_targets_starts[:-1] + + nqbx_targets_itgt_box = self.get_nqbx_centers_per_tgt_box( + geo_data, weights=weights + ) + + return (nqbx_targets_itgt_box + * ndirect_sources_per_target_box + * p2p_tsqbx_cost) + class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): - def process_form_qbxl(self, p2qbxl_cost, geo_data, + def process_form_qbxl(self, geo_data, p2qbxl_cost, ndirect_sources_per_target_box): global_qbx_centers = geo_data.global_qbx_centers() qbx_center_to_target_box = geo_data.qbx_center_to_target_box() @@ -408,7 +447,7 @@ class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): return np2qbxl * p2qbxl_cost - def process_eval_target_specific_qbxl(self, p2p_tsqbx_cost, geo_data, + def process_eval_target_specific_qbxl(self, geo_data, p2p_tsqbx_cost, ndirect_sources_per_target_box): center_to_targets_starts = geo_data.center_to_tree_targets().starts global_qbx_centers = geo_data.global_qbx_centers() diff --git a/test/test_cost_model.py b/test/test_cost_model.py index ef666ed3..5b93b1a9 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -120,7 +120,7 @@ def test_compare_cl_and_py_cost_model(ctx_factory): start_time = time.time() cl_p2qbxl = cl_cost_model.process_form_qbxl( - 5.0, geo_data_dev, cl_ndirect_sources_per_target_box + geo_data_dev, 5.0, cl_ndirect_sources_per_target_box ) queue.finish() @@ -134,7 +134,7 @@ def test_compare_cl_and_py_cost_model(ctx_factory): start_time = time.time() python_p2qbxl = python_cost_model.process_form_qbxl( - 5.0, geo_data, python_ndirect_sources_per_target_box + geo_data, 5.0, python_ndirect_sources_per_target_box ) logger.info("Python time for process_form_qbxl: {0}".format( @@ -234,6 +234,37 @@ def test_compare_cl_and_py_cost_model(ctx_factory): # }}} + # {{{ Test eval_target_specific_qbxl + + queue.finish() + start_time = time.time() + + cl_eval_target_specific_qbxl = cl_cost_model.process_eval_target_specific_qbxl( + geo_data_dev, 5.0, cl_ndirect_sources_per_target_box + ) + + queue.finish() + logger.info("OpenCL time for eval_target_specific_qbxl: {0}".format( + str(time.time() - start_time) + )) + + start_time = time.time() + + python_eval_target_specific_qbxl = \ + python_cost_model.process_eval_target_specific_qbxl( + geo_data, 5.0, python_ndirect_sources_per_target_box + ) + + logger.info("Python time for eval_target_specific_qbxl: {0}".format( + str(time.time() - start_time) + )) + + assert np.array_equal( + cl_eval_target_specific_qbxl.get(), python_eval_target_specific_qbxl + ) + + # }}} + if __name__ == "__main__": ctx_factory = cl.create_some_context -- GitLab From 8d11872bc9a9d11ccab6f7f3da65303d443c28d4 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Thu, 14 Feb 2019 11:32:52 -0600 Subject: [PATCH 10/55] Add cost_factors_for_kernels_from_model --- pytential/qbx/cost.py | 43 +++++++++++++++++++++++++++- test/test_cost_model.py | 62 ++++++++++++++++++++++------------------- 2 files changed, 76 insertions(+), 29 deletions(-) diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index be3341b4..4313fbdd 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -34,7 +34,7 @@ from pyopencl.array import take from pyopencl.elementwise import ElementwiseKernel from pyopencl.tools import dtype_to_ctype from mako.template import Template -from pymbolic import var +from pymbolic import var, evaluate from pytools import memoize_method from boxtree.cost import ( @@ -214,6 +214,40 @@ class AbstractQBXCostModel(AbstractFMMCostModel): """ pass + def cost_factors_for_kernels_from_model(self, nlevels, xlat_cost, context): + """Evaluate translation cost factors from symbolic model. The result of this + function can be used for process_* methods in this class. + + This method overwrite the method in parent + :class:`boxtree.cost.AbstractFMMCostModel` to support operations specific to + QBX. + + :arg nlevels: the number of tree levels. + :arg xlat_cost: a :class:`QBXTranslationCostModel`. + :arg context: a :class:`dict` of parameters passed as context when + evaluating symbolic expressions in *xlat_cost*. + :return: a :class:`dict`, the translation cost of each step in FMM and QBX. + """ + cost_factors = AbstractFMMCostModel.cost_factors_for_kernels_from_model( + self, nlevels, xlat_cost, context + ) + + cost_factors.update({ + "p2qbxl_cost": evaluate(xlat_cost.p2qbxl(), context=context), + "m2qbxl_cost": np.array([ + evaluate(xlat_cost.m2qbxl(ilevel), context=context) + for ilevel in range(nlevels) + ]), + "l2qbxl_cost": np.array([ + evaluate(xlat_cost.l2qbxl(ilevel), context=context) + for ilevel in range(nlevels) + ]), + "qbxl2p_cost": evaluate(xlat_cost.qbxl2p(), context=context), + "p2p_tsqbx_cost": evaluate(xlat_cost.p2p_tsqbx(), context=context) + }) + + return cost_factors + class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): def __init__(self, queue, @@ -431,6 +465,13 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): * ndirect_sources_per_target_box * p2p_tsqbx_cost) + def cost_factors_for_kernels_from_model(self, nlevels, xlat_cost, context): + translation_costs = AbstractQBXCostModel.cost_factors_for_kernels_from_model( + self, nlevels, xlat_cost, context + ) + + return self.translation_costs_to_dev(translation_costs) + class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): def process_form_qbxl(self, geo_data, p2qbxl_cost, diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 5b93b1a9..8f175b84 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -35,7 +35,6 @@ from pytential.qbx import QBXLayerPotentialSource from pytential.qbx.cost import ( CLQBXCostModel, PythonQBXCostModel, pde_aware_translation_cost_model ) -from pymbolic import evaluate import time import logging @@ -109,6 +108,14 @@ def test_compare_cl_and_py_cost_model(ctx_factory): for ilevel in range(tree.nlevels): constant_one_params["p_fmm_lev%d" % ilevel] = 10 + cl_cost_factors = cl_cost_model.cost_factors_for_kernels_from_model( + tree.nlevels, xlat_cost, constant_one_params + ) + + python_cost_factors = python_cost_model.cost_factors_for_kernels_from_model( + tree.nlevels, xlat_cost, constant_one_params + ) + # }}} # {{{ Test process_form_qbxl @@ -120,7 +127,8 @@ def test_compare_cl_and_py_cost_model(ctx_factory): start_time = time.time() cl_p2qbxl = cl_cost_model.process_form_qbxl( - geo_data_dev, 5.0, cl_ndirect_sources_per_target_box + geo_data_dev, cl_cost_factors["p2qbxl_cost"], + cl_ndirect_sources_per_target_box ) queue.finish() @@ -134,7 +142,8 @@ def test_compare_cl_and_py_cost_model(ctx_factory): start_time = time.time() python_p2qbxl = python_cost_model.process_form_qbxl( - geo_data, 5.0, python_ndirect_sources_per_target_box + geo_data, python_cost_factors["p2qbxl_cost"], + python_ndirect_sources_per_target_box ) logger.info("Python time for process_form_qbxl: {0}".format( @@ -147,19 +156,12 @@ def test_compare_cl_and_py_cost_model(ctx_factory): # {{{ Test process_m2qbxl - nlevels = geo_data.tree().nlevels - m2qbxl_cost = np.zeros(nlevels, dtype=np.float64) - for ilevel in range(nlevels): - m2qbxl_cost[ilevel] = evaluate( - xlat_cost.m2qbxl(ilevel), - context=constant_one_params - ) - m2qbxl_cost_dev = cl.array.to_device(queue, m2qbxl_cost) - queue.finish() start_time = time.time() - cl_m2qbxl = cl_cost_model.process_m2qbxl(geo_data_dev, m2qbxl_cost_dev) + cl_m2qbxl = cl_cost_model.process_m2qbxl( + geo_data_dev, cl_cost_factors["m2qbxl_cost"] + ) queue.finish() logger.info("OpenCL time for process_m2qbxl: {0}".format( @@ -168,7 +170,9 @@ def test_compare_cl_and_py_cost_model(ctx_factory): start_time = time.time() - python_m2qbxl = python_cost_model.process_m2qbxl(geo_data, m2qbxl_cost) + python_m2qbxl = python_cost_model.process_m2qbxl( + geo_data, python_cost_factors["m2qbxl_cost"] + ) logger.info("Python time for process_m2qbxl: {0}".format( str(time.time() - start_time) @@ -180,18 +184,12 @@ def test_compare_cl_and_py_cost_model(ctx_factory): # {{{ Test process_l2qbxl - l2qbxl_cost = np.zeros(nlevels, dtype=np.float64) - for ilevel in range(nlevels): - l2qbxl_cost[ilevel] = evaluate( - xlat_cost.l2qbxl(ilevel), - context=constant_one_params - ) - l2qbxl_cost_dev = cl.array.to_device(queue, l2qbxl_cost) - queue.finish() start_time = time.time() - cl_l2qbxl = cl_cost_model.process_l2qbxl(geo_data_dev, l2qbxl_cost_dev) + cl_l2qbxl = cl_cost_model.process_l2qbxl( + geo_data_dev, cl_cost_factors["l2qbxl_cost"] + ) queue.finish() logger.info("OpenCL time for process_l2qbxl: {0}".format( @@ -200,7 +198,9 @@ def test_compare_cl_and_py_cost_model(ctx_factory): start_time = time.time() - python_l2qbxl = python_cost_model.process_l2qbxl(geo_data, l2qbxl_cost) + python_l2qbxl = python_cost_model.process_l2qbxl( + geo_data, python_cost_factors["l2qbxl_cost"] + ) logger.info("Python time for process_l2qbxl: {0}".format( str(time.time() - start_time) @@ -215,7 +215,9 @@ def test_compare_cl_and_py_cost_model(ctx_factory): queue.finish() start_time = time.time() - cl_eval_qbxl = cl_cost_model.process_eval_qbxl(geo_data_dev, 5.0) + cl_eval_qbxl = cl_cost_model.process_eval_qbxl( + geo_data_dev, cl_cost_factors["qbxl2p_cost"] + ) queue.finish() logger.info("OpenCL time for process_eval_qbxl: {0}".format( @@ -224,7 +226,9 @@ def test_compare_cl_and_py_cost_model(ctx_factory): start_time = time.time() - python_eval_qbxl = python_cost_model.process_eval_qbxl(geo_data, 5.0) + python_eval_qbxl = python_cost_model.process_eval_qbxl( + geo_data, python_cost_factors["qbxl2p_cost"] + ) logger.info("Python time for process_eval_qbxl: {0}".format( str(time.time() - start_time) @@ -240,7 +244,8 @@ def test_compare_cl_and_py_cost_model(ctx_factory): start_time = time.time() cl_eval_target_specific_qbxl = cl_cost_model.process_eval_target_specific_qbxl( - geo_data_dev, 5.0, cl_ndirect_sources_per_target_box + geo_data_dev, cl_cost_factors["p2p_tsqbx_cost"], + cl_ndirect_sources_per_target_box ) queue.finish() @@ -252,7 +257,8 @@ def test_compare_cl_and_py_cost_model(ctx_factory): python_eval_target_specific_qbxl = \ python_cost_model.process_eval_target_specific_qbxl( - geo_data, 5.0, python_ndirect_sources_per_target_box + geo_data, python_cost_factors["p2p_tsqbx_cost"], + python_ndirect_sources_per_target_box ) logger.info("Python time for eval_target_specific_qbxl: {0}".format( -- GitLab From e4ccc1df0bb102ec9907fb0f59c20bd3a952bb96 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 17 Feb 2019 22:29:06 -0600 Subject: [PATCH 11/55] Add get_modeled_cost --- pytential/qbx/__init__.py | 4 +- pytential/qbx/cost.py | 87 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index c2aa9e95..bc669e51 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -755,8 +755,8 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): geo_data = self.qbx_fmm_geometry_data(target_discrs_and_qbx_sides) if self.cost_model is None: - from pytential.qbx.cost import CostModel - cost_model = CostModel() + from pytential.qbx.cost import CLQBXCostModel + cost_model = CLQBXCostModel(cl.CommandQueue(self.cl_context)) else: cost_model = self.cost_model diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 4313fbdd..f269c0fd 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -248,6 +248,93 @@ class AbstractQBXCostModel(AbstractFMMCostModel): return cost_factors + def __call__(self, *args, **kwargs): + return self.get_modeled_cost(*args, **kwargs) + + def get_modeled_cost(self, geo_data, kernel, kernel_arguments): + # FIXME: This should support target filtering. + lpot_source = geo_data.lpot_source + use_tsqbx = lpot_source._use_target_specific_qbx + tree = geo_data.tree() + traversal = geo_data.traversal() + nqbtl = geo_data.non_qbx_box_target_lists() + box_target_counts_nonchild = nqbtl.box_target_counts_nonchild + + fmm_level_to_order = [ + lpot_source.fmm_level_to_order( + kernel.get_base_kernel(), kernel_arguments, tree, ilevel + ) for ilevel in range(tree.nlevels) + ] + + # {{{ Construct parameters + + params = dict(p_qbx=lpot_source.qbx_order) + + for ilevel in range(tree.nlevels): + params["p_fmm_lev%d" % ilevel] = fmm_level_to_order[ilevel] + + # TODO: cost model with parameters + params.update(dict( + c_l2l=1.0, + c_l2p=1.0, + c_m2l=1.0, + c_m2m=1.0, + c_m2p=1.0, + c_p2l=1.0, + c_p2m=1.0, + c_p2p=1.0, + c_p2qbxl=1.0, + c_p2p_tsqbx=1.0, + c_qbxl2p=1.0, + c_m2qbxl=1.0, + c_l2qbxl=1.0, + )) + + # }}} + + xlat_cost = self.translation_cost_model_factory( + tree.dimensions, tree.nlevels + ) + + translation_cost = self.cost_factors_for_kernels_from_model( + tree.nlevels, xlat_cost, params + ) + + ndirect_sources_per_target_box = \ + self.get_ndirect_sources_per_target_box(traversal) + + result = AbstractFMMCostModel.__call__( + self, traversal, fmm_level_to_order, params, + ndirect_sources_per_target_box, + box_target_counts_nonchild=box_target_counts_nonchild + ) + + if use_tsqbx: + result["eval_target_specific_qbx_locals"] = \ + self.process_eval_target_specific_qbxl( + geo_data, translation_cost["p2p_tsqbx_cost"], + ndirect_sources_per_target_box=ndirect_sources_per_target_box + ) + else: + result["form_global_qbx_locals"] = self.process_form_qbxl( + geo_data, translation_cost["p2qbxl_cost"], + ndirect_sources_per_target_box + ) + + result["translate_box_multipoles_to_qbx_local"] = self.process_m2qbxl( + geo_data, translation_cost["m2qbxl_cost"] + ) + + result["translate_box_local_to_qbx_local"] = self.process_l2qbxl( + geo_data, translation_cost["l2qbxl_cost"] + ) + + result["eval_qbx_expansions"] = self.process_eval_qbxl( + geo_data, translation_cost["qbxl2p_cost"] + ) + + return result + class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): def __init__(self, queue, -- GitLab From f4db46734f93c507904165ce430667705738f56e Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Tue, 19 Feb 2019 22:42:46 -0600 Subject: [PATCH 12/55] Add parameter estimation and binding calibration params to cost model --- examples/cost.py | 17 +++--- pytential/qbx/cost.py | 121 ++++++++++++++++++++++++++++------------ test/test_cost_model.py | 26 ++------- 3 files changed, 100 insertions(+), 64 deletions(-) diff --git a/examples/cost.py b/examples/cost.py index 9cd3578d..3494d555 100644 --- a/examples/cost.py +++ b/examples/cost.py @@ -92,9 +92,11 @@ def get_test_density(queue, lpot_source): def calibrate_cost_model(ctx): queue = cl.CommandQueue(ctx) - from pytential.qbx.cost import CostModel, estimate_calibration_params + from pytential.qbx.cost import CLQBXCostModel - perf_model = CostModel() + perf_model = CLQBXCostModel( + queue, CLQBXCostModel.get_constantone_calibration_params() + ) model_results = [] timing_results = [] @@ -116,8 +118,9 @@ def calibrate_cost_model(ctx): model_results.append(one(perf_S.values())) timing_results.append(one(timing_data.values())) - calibration_params = ( - estimate_calibration_params(model_results, timing_results)) + calibration_params = perf_model.estimate_calibration_params( + model_results, timing_results + ) return perf_model.with_calibration_params(calibration_params) @@ -131,9 +134,7 @@ def test_cost_model(ctx, perf_model): sigma = get_test_density(queue, lpot_source) perf_S = bound_op.get_modeled_cost(queue, sigma=sigma) - model_result = ( - one(perf_S.values()) - .get_predicted_times(merge_close_lists=True)) + model_result = one(perf_S.values()) # Warm-up run. bound_op.eval(queue, {"sigma": sigma}) @@ -154,7 +155,7 @@ def test_cost_model(ctx, perf_model): for stage in model_result: print("stage: ", stage) print("actual: ", timing_result[stage]) - print("predicted: ", model_result[stage]) + print("predicted: ", perf_model.aggregate(model_result[stage])) print("=" * 20) diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index f269c0fd..8710d8a4 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -125,11 +125,27 @@ def taylor_translation_cost_model(dim, nlevels): # {{{ cost model class AbstractQBXCostModel(AbstractFMMCostModel): - def __init__( - self, - translation_cost_model_factory=pde_aware_translation_cost_model): + def __init__(self, + calibration_params, + translation_cost_model_factory=pde_aware_translation_cost_model): + """ + :arg calibration_params: the calibration parameters. For evaluation, use + parameters returned by :func:`estimate_calibration_params`. For training, + use :func:`get_constantone_calibration_params` to make all cost modifiers + 1. + :arg translation_cost_model_factory: a function, which takes tree dimension + and the number of tree levels as arguments, returns an object of + :class:`TranslationCostModel`. + """ AbstractFMMCostModel.__init__( - self, translation_cost_model_factory + self, calibration_params, translation_cost_model_factory + ) + + def with_calibration_params(self, calibration_params): + """Return a copy of *self* with a new set of calibration parameters.""" + return type(self)( + calibration_params, + translation_cost_model_factory=self.translation_cost_model_factory ) @abstractmethod @@ -214,7 +230,7 @@ class AbstractQBXCostModel(AbstractFMMCostModel): """ pass - def cost_factors_for_kernels_from_model(self, nlevels, xlat_cost, context): + def qbx_cost_factors_for_kernels_from_model(self, nlevels, xlat_cost, context): """Evaluate translation cost factors from symbolic model. The result of this function can be used for process_* methods in this class. @@ -228,8 +244,8 @@ class AbstractQBXCostModel(AbstractFMMCostModel): evaluating symbolic expressions in *xlat_cost*. :return: a :class:`dict`, the translation cost of each step in FMM and QBX. """ - cost_factors = AbstractFMMCostModel.cost_factors_for_kernels_from_model( - self, nlevels, xlat_cost, context + cost_factors = self.fmm_cost_factors_for_kernels_from_model( + nlevels, xlat_cost, context ) cost_factors.update({ @@ -248,10 +264,7 @@ class AbstractQBXCostModel(AbstractFMMCostModel): return cost_factors - def __call__(self, *args, **kwargs): - return self.get_modeled_cost(*args, **kwargs) - - def get_modeled_cost(self, geo_data, kernel, kernel_arguments): + def get_qbx_modeled_cost(self, geo_data, kernel, kernel_arguments): # FIXME: This should support target filtering. lpot_source = geo_data.lpot_source use_tsqbx = lpot_source._use_target_specific_qbx @@ -268,43 +281,27 @@ class AbstractQBXCostModel(AbstractFMMCostModel): # {{{ Construct parameters - params = dict(p_qbx=lpot_source.qbx_order) + params = self.calibration_params.copy() + params.update(dict(p_qbx=lpot_source.qbx_order)) for ilevel in range(tree.nlevels): params["p_fmm_lev%d" % ilevel] = fmm_level_to_order[ilevel] - # TODO: cost model with parameters - params.update(dict( - c_l2l=1.0, - c_l2p=1.0, - c_m2l=1.0, - c_m2m=1.0, - c_m2p=1.0, - c_p2l=1.0, - c_p2m=1.0, - c_p2p=1.0, - c_p2qbxl=1.0, - c_p2p_tsqbx=1.0, - c_qbxl2p=1.0, - c_m2qbxl=1.0, - c_l2qbxl=1.0, - )) - # }}} xlat_cost = self.translation_cost_model_factory( tree.dimensions, tree.nlevels ) - translation_cost = self.cost_factors_for_kernels_from_model( + translation_cost = self.qbx_cost_factors_for_kernels_from_model( tree.nlevels, xlat_cost, params ) ndirect_sources_per_target_box = \ self.get_ndirect_sources_per_target_box(traversal) - result = AbstractFMMCostModel.__call__( - self, traversal, fmm_level_to_order, params, + result = self.get_fmm_modeled_cost( + traversal, fmm_level_to_order, ndirect_sources_per_target_box, box_target_counts_nonchild=box_target_counts_nonchild ) @@ -335,12 +332,62 @@ class AbstractQBXCostModel(AbstractFMMCostModel): return result + def __call__(self, *args, **kwargs): + return self.get_qbx_modeled_cost(*args, **kwargs) + + @staticmethod + def get_constantone_calibration_params(): + return dict( + c_l2l=1.0, + c_l2p=1.0, + c_m2l=1.0, + c_m2m=1.0, + c_m2p=1.0, + c_p2l=1.0, + c_p2m=1.0, + c_p2p=1.0, + c_p2qbxl=1.0, + c_p2p_tsqbx=1.0, + c_qbxl2p=1.0, + c_m2qbxl=1.0, + c_l2qbxl=1.0 + ) + + def estimate_calibration_params(self, model_results, timing_results, + wall_time=False, + additional_stage_to_param_names=()): + _QBX_STAGE_TO_CALIBRATION_PARAMETER = { + "form_global_qbx_locals": "c_p2qbxl", + "translate_box_multipoles_to_qbx_local": "c_m2qbxl", + "translate_box_local_to_qbx_local": "c_l2qbxl", + "eval_qbx_expansions": "c_qbxl2p", + "eval_target_specific_qbx_locals": "c_p2p_tsqbx" + } + + stage_to_param_names = _QBX_STAGE_TO_CALIBRATION_PARAMETER.copy() + stage_to_param_names.update(additional_stage_to_param_names) + + return AbstractFMMCostModel.estimate_calibration_params( + self, model_results, timing_results, wall_time=wall_time, + additional_stage_to_param_names=stage_to_param_names + ) + class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): def __init__(self, queue, + calibration_params, translation_cost_model_factory=pde_aware_translation_cost_model): self.queue = queue - AbstractQBXCostModel.__init__(self, translation_cost_model_factory) + AbstractQBXCostModel.__init__( + self, calibration_params, translation_cost_model_factory + ) + + def with_calibration_params(self, calibration_params): + """Return a copy of *self* with a new set of calibration parameters.""" + return type(self)( + self.queue, calibration_params, + translation_cost_model_factory=self.translation_cost_model_factory + ) @memoize_method def _fill_array_with_index_knl(self, idx_dtype, array_dtype): @@ -552,9 +599,11 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): * ndirect_sources_per_target_box * p2p_tsqbx_cost) - def cost_factors_for_kernels_from_model(self, nlevels, xlat_cost, context): - translation_costs = AbstractQBXCostModel.cost_factors_for_kernels_from_model( - self, nlevels, xlat_cost, context + def qbx_cost_factors_for_kernels_from_model(self, nlevels, xlat_cost, context): + translation_costs = ( + AbstractQBXCostModel.qbx_cost_factors_for_kernels_from_model( + self, nlevels, xlat_cost, context + ) ) return self.translation_costs_to_dev(translation_costs) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 8f175b84..c42990f1 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -83,36 +83,22 @@ def test_compare_cl_and_py_cost_model(ctx_factory): # {{{ Construct cost models - cl_cost_model = CLQBXCostModel(queue) - python_cost_model = PythonQBXCostModel() + cl_cost_model = CLQBXCostModel(queue, None) + python_cost_model = PythonQBXCostModel(None) tree = geo_data.tree() xlat_cost = pde_aware_translation_cost_model(tree.targets.shape[0], tree.nlevels) - constant_one_params = dict( - c_l2l=1, - c_l2p=1, - c_m2l=1, - c_m2m=1, - c_m2p=1, - c_p2l=1, - c_p2m=1, - c_p2p=1, - c_p2qbxl=1, - c_p2p_tsqbx=1, - c_qbxl2p=1, - c_m2qbxl=1, - c_l2qbxl=1, - p_qbx=5 - ) + constant_one_params = CLQBXCostModel.get_constantone_calibration_params() + constant_one_params["p_qbx"] = 5 for ilevel in range(tree.nlevels): constant_one_params["p_fmm_lev%d" % ilevel] = 10 - cl_cost_factors = cl_cost_model.cost_factors_for_kernels_from_model( + cl_cost_factors = cl_cost_model.qbx_cost_factors_for_kernels_from_model( tree.nlevels, xlat_cost, constant_one_params ) - python_cost_factors = python_cost_model.cost_factors_for_kernels_from_model( + python_cost_factors = python_cost_model.qbx_cost_factors_for_kernels_from_model( tree.nlevels, xlat_cost, constant_one_params ) -- GitLab From 1300d47d28170db61c632f9eac5d7d09d03e73c3 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Tue, 19 Feb 2019 23:12:47 -0600 Subject: [PATCH 13/55] Restore CI by merging the config file from master --- .gitlab-ci.yml | 72 ++++++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 34 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6e9d488a..1b302bbb 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,15 +1,8 @@ -# Environment variables -# -# * PYTEST_ADDOPTS is used to filter test runs. The default value is "-k-slowtest", -# which skips the slow running tests. -# * SKIP_EXAMPLES, if non-empty, can be used to skip the examples job. - Python 2.7 POCL: script: - export PY_EXE=python2.7 - export PYOPENCL_TEST=portable - - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} - - export EXTRA_INSTALL="Cython pybind11 numpy scipy mako" + - export EXTRA_INSTALL="pybind11 scipy numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -18,46 +11,50 @@ Python 2.7 POCL: - large-node except: - tags - - cl-cost-model + artifacts: + reports: + junit: test/pytest.xml -Python 3.6 POCL: +Python 3 POCL: script: - - export PY_EXE=python3.6 + - export PY_EXE=python3 - export PYOPENCL_TEST=portable - - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} - - export EXTRA_INSTALL="Cython pybind11 numpy scipy mako" + - export EXTRA_INSTALL="pybind11 numpy scipy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: - - python3.6 + - python3 - pocl - large-node except: - tags - - cl-cost-model + artifacts: + reports: + junit: test/pytest.xml -Python 3.6 POCL Examples: +Python 3 POCL Examples: script: - test -n "$SKIP_EXAMPLES" && exit - - export PY_EXE=python3.6 + - export PY_EXE=python3 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="Cython pybind11 numpy mako pyvisfile matplotlib" + - export EXTRA_INSTALL="pybind11 numpy mako pyvisfile matplotlib" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-py-project-and-run-examples.sh - ". ./build-py-project-and-run-examples.sh" tags: - - python3.6 + - python3 - pocl - large-node except: - tags - - cl-cost-model + artifacts: + reports: + junit: test/pytest.xml -Python 3.6 Conda: +Python 3 Conda: script: - export SUMPY_FORCE_SYMBOLIC_BACKEND=symengine - - export CONDA_ENVIRONMENT=.test-conda-env-py3.yml - - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} - - export REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt + - CONDA_ENVIRONMENT=.test-conda-env-py3.yml + - REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - ". ./build-and-test-py-project-within-miniconda.sh" tags: @@ -65,17 +62,18 @@ Python 3.6 Conda: - large-node except: - tags - - cl-cost-model -Python 3.6 Conda Apple: + artifacts: + reports: + junit: test/pytest.xml + +Python 3 Conda Apple: script: - export LC_ALL=en_US.UTF-8 - export LANG=en_US.UTF-8 - - export CONDA_ENVIRONMENT=.test-conda-env-py3-macos.yml - - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} - - export REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt - - export CC=gcc - - set -o xtrace + - export PYTEST_ADDOPTS=-k-slowtest + - CONDA_ENVIRONMENT=.test-conda-env-py3-macos.yml + - REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - ". ./build-and-test-py-project-within-miniconda.sh" @@ -86,16 +84,22 @@ Python 3.6 Conda Apple: - apple except: - tags - - cl-cost-model retry: 2 + # https://gitlab.tiker.net/inducer/pytential/issues/112 + allow_failure: true + + artifacts: + reports: + junit: test/pytest.xml + Documentation: script: - - EXTRA_INSTALL="Cython pybind11 numpy mako" + - EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-docs.sh - ". ./build-docs.sh" tags: - - python3.5 + - python3 only: - master -- GitLab From 6282f3d5891295e277602c194b96bcb2acec7af5 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Tue, 19 Feb 2019 23:31:00 -0600 Subject: [PATCH 14/55] Merge CI file from TS branch --- .gitlab-ci.yml | 66 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 19 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1b302bbb..58de5308 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,8 +1,15 @@ +# Environment variables +# +# * PYTEST_ADDOPTS is used to filter test runs. The default value is "-k-slowtest", +# which skips the slow running tests. +# * SKIP_EXAMPLES, if non-empty, can be used to skip the examples job. + Python 2.7 POCL: script: - export PY_EXE=python2.7 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="pybind11 scipy numpy mako" + - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} + - export EXTRA_INSTALL="Cython pybind11 numpy scipy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -15,15 +22,33 @@ Python 2.7 POCL: reports: junit: test/pytest.xml -Python 3 POCL: +Python 3.6 POCL: + script: + - export PY_EXE=python3.6 + - export PYOPENCL_TEST=portable + - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} + - export EXTRA_INSTALL="Cython pybind11 numpy scipy mako" + - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh + - ". ./build-and-test-py-project.sh" + tags: + - python3.6 + - pocl + - large-node + except: + - tags + artifacts: + reports: + junit: test/pytest.xml + +Python 3.7 POCL: script: - - export PY_EXE=python3 + - export PY_EXE=python3.7 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="pybind11 numpy scipy mako" + - export EXTRA_INSTALL="Cython pybind11 numpy scipy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: - - python3 + - python3.7 - pocl - large-node except: @@ -32,16 +57,16 @@ Python 3 POCL: reports: junit: test/pytest.xml -Python 3 POCL Examples: +Python 3.6 POCL Examples: script: - test -n "$SKIP_EXAMPLES" && exit - - export PY_EXE=python3 + - export PY_EXE=python3.6 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="pybind11 numpy mako pyvisfile matplotlib" + - export EXTRA_INSTALL="Cython pybind11 numpy mako pyvisfile matplotlib" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-py-project-and-run-examples.sh - ". ./build-py-project-and-run-examples.sh" tags: - - python3 + - python3.6 - pocl - large-node except: @@ -50,11 +75,12 @@ Python 3 POCL Examples: reports: junit: test/pytest.xml -Python 3 Conda: +Python 3.6 Conda: script: - export SUMPY_FORCE_SYMBOLIC_BACKEND=symengine - - CONDA_ENVIRONMENT=.test-conda-env-py3.yml - - REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt + - export CONDA_ENVIRONMENT=.test-conda-env-py3.yml + - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} + - export REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - ". ./build-and-test-py-project-within-miniconda.sh" tags: @@ -67,13 +93,15 @@ Python 3 Conda: reports: junit: test/pytest.xml -Python 3 Conda Apple: +Python 3.6 Conda Apple: script: - export LC_ALL=en_US.UTF-8 - export LANG=en_US.UTF-8 - - export PYTEST_ADDOPTS=-k-slowtest - - CONDA_ENVIRONMENT=.test-conda-env-py3-macos.yml - - REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt + - export CONDA_ENVIRONMENT=.test-conda-env-py3-macos.yml + - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} + - export REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt + - export CC=gcc + - set -o xtrace - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - ". ./build-and-test-py-project-within-miniconda.sh" @@ -95,11 +123,11 @@ Python 3 Conda Apple: Documentation: script: - - EXTRA_INSTALL="pybind11 numpy mako" + - EXTRA_INSTALL="Cython pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-docs.sh - ". ./build-docs.sh" tags: - - python3 + - python3.5 only: - master @@ -108,6 +136,6 @@ Flake8: - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh - ". ./prepare-and-run-flake8.sh pytential test examples" tags: - - python3 + - python3.5 except: - tags -- GitLab From 431dd123b636a59fc98b8af890eb3414aea3b36d Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Tue, 19 Feb 2019 23:45:11 -0600 Subject: [PATCH 15/55] Fix CI config file --- .gitlab-ci.yml | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 58de5308..ee8f171c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -22,16 +22,16 @@ Python 2.7 POCL: reports: junit: test/pytest.xml -Python 3.6 POCL: +Python 3 POCL: script: - - export PY_EXE=python3.6 + - export PY_EXE=python3 - export PYOPENCL_TEST=portable - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} - export EXTRA_INSTALL="Cython pybind11 numpy scipy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: - - python3.6 + - python3 - pocl - large-node except: @@ -40,33 +40,16 @@ Python 3.6 POCL: reports: junit: test/pytest.xml -Python 3.7 POCL: - script: - - export PY_EXE=python3.7 - - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="Cython pybind11 numpy scipy mako" - - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - - ". ./build-and-test-py-project.sh" - tags: - - python3.7 - - pocl - - large-node - except: - - tags - artifacts: - reports: - junit: test/pytest.xml - -Python 3.6 POCL Examples: +Python 3 POCL Examples: script: - test -n "$SKIP_EXAMPLES" && exit - - export PY_EXE=python3.6 + - export PY_EXE=python3 - export PYOPENCL_TEST=portable - export EXTRA_INSTALL="Cython pybind11 numpy mako pyvisfile matplotlib" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-py-project-and-run-examples.sh - ". ./build-py-project-and-run-examples.sh" tags: - - python3.6 + - python3 - pocl - large-node except: @@ -75,7 +58,7 @@ Python 3.6 POCL Examples: reports: junit: test/pytest.xml -Python 3.6 Conda: +Python 3 Conda: script: - export SUMPY_FORCE_SYMBOLIC_BACKEND=symengine - export CONDA_ENVIRONMENT=.test-conda-env-py3.yml @@ -93,7 +76,7 @@ Python 3.6 Conda: reports: junit: test/pytest.xml -Python 3.6 Conda Apple: +Python 3 Conda Apple: script: - export LC_ALL=en_US.UTF-8 - export LANG=en_US.UTF-8 @@ -127,7 +110,7 @@ Documentation: - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-docs.sh - ". ./build-docs.sh" tags: - - python3.5 + - python3 only: - master @@ -136,6 +119,6 @@ Flake8: - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh - ". ./prepare-and-run-flake8.sh pytential test examples" tags: - - python3.5 + - python3 except: - tags -- GitLab From f4a1a72393b73dc66900f3d9012664c3fcc86e98 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Wed, 20 Feb 2019 09:46:46 -0600 Subject: [PATCH 16/55] Use opencl cost model in boxtree --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index dd15a69e..bbbf6dc1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ git+https://github.com/inducer/modepy git+https://github.com/inducer/pyopencl git+https://github.com/inducer/islpy git+https://github.com/inducer/loopy -git+https://gitlab.tiker.net/inducer/boxtree +git+https://gitlab.tiker.net/inducer/boxtree@opencl-counter git+https://github.com/inducer/meshmode git+https://gitlab.tiker.net/inducer/sumpy git+https://gitlab.tiker.net/inducer/pyfmmlib -- GitLab From f61247cd43e022e7a8d3050d6cf8b8eeb2b38a0c Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Wed, 20 Feb 2019 23:36:11 -0600 Subject: [PATCH 17/55] Fix for python2 --- .test-conda-env-py3-requirements.txt | 2 +- pytential/qbx/cost.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.test-conda-env-py3-requirements.txt b/.test-conda-env-py3-requirements.txt index fa6c0426..eae3cf00 100644 --- a/.test-conda-env-py3-requirements.txt +++ b/.test-conda-env-py3-requirements.txt @@ -1,4 +1,4 @@ -git+https://gitlab.tiker.net/inducer/boxtree +git+https://gitlab.tiker.net/inducer/boxtree@opencl-counter git+https://github.com/inducer/pymbolic git+https://github.com/inducer/loopy git+https://gitlab.tiker.net/inducer/sumpy diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 8710d8a4..409a257b 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -36,12 +36,19 @@ from pyopencl.tools import dtype_to_ctype from mako.template import Template from pymbolic import var, evaluate from pytools import memoize_method +from functools import partial +import sys from boxtree.cost import ( FMMTranslationCostModel, AbstractFMMCostModel, PythonFMMCostModel, CLFMMCostModel ) from abc import abstractmethod +if sys.version_info >= (3, 0): + Template = partial(Template, strict_undefined=True) +else: + Template = partial(Template, strict_undefined=True, disable_unicode=True) + import logging logger = logging.getLogger(__name__) -- GitLab From fc159bfe28b52642981624197e626d793a670932 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Thu, 21 Feb 2019 22:43:56 -0600 Subject: [PATCH 18/55] Temporaily disable cost model without param --- pytential/qbx/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index bc669e51..49509939 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -755,8 +755,8 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): geo_data = self.qbx_fmm_geometry_data(target_discrs_and_qbx_sides) if self.cost_model is None: - from pytential.qbx.cost import CLQBXCostModel - cost_model = CLQBXCostModel(cl.CommandQueue(self.cl_context)) + # should supply default parameters + raise NotImplementedError("Please supply cost model explicitly") else: cost_model = self.cost_model -- GitLab From 597ba5c928cf8a7c0d314fe8ddabee93f1c7ec23 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 4 Mar 2019 17:42:04 -0600 Subject: [PATCH 19/55] Remove __init__ from AbstractQBXCostModel --- pytential/qbx/cost.py | 47 ++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 409a257b..7ad2da88 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -132,22 +132,6 @@ def taylor_translation_cost_model(dim, nlevels): # {{{ cost model class AbstractQBXCostModel(AbstractFMMCostModel): - def __init__(self, - calibration_params, - translation_cost_model_factory=pde_aware_translation_cost_model): - """ - :arg calibration_params: the calibration parameters. For evaluation, use - parameters returned by :func:`estimate_calibration_params`. For training, - use :func:`get_constantone_calibration_params` to make all cost modifiers - 1. - :arg translation_cost_model_factory: a function, which takes tree dimension - and the number of tree levels as arguments, returns an object of - :class:`TranslationCostModel`. - """ - AbstractFMMCostModel.__init__( - self, calibration_params, translation_cost_model_factory - ) - def with_calibration_params(self, calibration_params): """Return a copy of *self* with a new set of calibration parameters.""" return type(self)( @@ -384,9 +368,20 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): def __init__(self, queue, calibration_params, translation_cost_model_factory=pde_aware_translation_cost_model): + """ + :arg queue: a :class:`pyopencl.CommandQueue` object on which the execution + of this object runs. + :arg calibration_params: the calibration parameters. For evaluation, use + parameters returned by :func:`estimate_calibration_params`. For training, + use :func:`get_constantone_calibration_params` to make all cost modifiers + 1. + :arg translation_cost_model_factory: a function, which takes tree dimension + and the number of tree levels as arguments, returns an object of + :class:`TranslationCostModel`. + """ self.queue = queue - AbstractQBXCostModel.__init__( - self, calibration_params, translation_cost_model_factory + CLFMMCostModel.__init__( + self, queue, calibration_params, translation_cost_model_factory ) def with_calibration_params(self, calibration_params): @@ -617,6 +612,22 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): + def __init__(self, + calibration_params, + translation_cost_model_factory=pde_aware_translation_cost_model): + """ + :arg calibration_params: the calibration parameters. For evaluation, use + parameters returned by :func:`estimate_calibration_params`. For training, + use :func:`get_constantone_calibration_params` to make all cost modifiers + 1. + :arg translation_cost_model_factory: a function, which takes tree dimension + and the number of tree levels as arguments, returns an object of + :class:`TranslationCostModel`. + """ + PythonFMMCostModel.__init__( + self, calibration_params, translation_cost_model_factory + ) + def process_form_qbxl(self, geo_data, p2qbxl_cost, ndirect_sources_per_target_box): global_qbx_centers = geo_data.global_qbx_centers() -- GitLab From 9902fb15c05afe9ee6599d58aa9ceb7cc0e4637b Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 4 Mar 2019 17:46:32 -0600 Subject: [PATCH 20/55] Delete redundant code --- pytential/qbx/cost.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 7ad2da88..515850de 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -379,7 +379,6 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): and the number of tree levels as arguments, returns an object of :class:`TranslationCostModel`. """ - self.queue = queue CLFMMCostModel.__init__( self, queue, calibration_params, translation_cost_model_factory ) -- GitLab From a664b2178ad422dff0b7a34a98a56620609935c0 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 24 Mar 2019 10:50:52 -0500 Subject: [PATCH 21/55] Change wall_time from bool to str name in estimate_calibration_params --- pytential/qbx/cost.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 515850de..22e957db 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -345,7 +345,7 @@ class AbstractQBXCostModel(AbstractFMMCostModel): ) def estimate_calibration_params(self, model_results, timing_results, - wall_time=False, + time_field_name="wall_elapsed", additional_stage_to_param_names=()): _QBX_STAGE_TO_CALIBRATION_PARAMETER = { "form_global_qbx_locals": "c_p2qbxl", @@ -359,7 +359,7 @@ class AbstractQBXCostModel(AbstractFMMCostModel): stage_to_param_names.update(additional_stage_to_param_names) return AbstractFMMCostModel.estimate_calibration_params( - self, model_results, timing_results, wall_time=wall_time, + self, model_results, timing_results, time_field_name=time_field_name, additional_stage_to_param_names=stage_to_param_names ) -- GitLab From ab5b324c74d16e81ac2681a9e666faf81d7fd276 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 29 Jul 2019 00:12:38 -0500 Subject: [PATCH 22/55] Estimate kernel-specific parameters --- pytential/qbx/cost.py | 47 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 22e957db..9758a818 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -724,4 +724,51 @@ class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): # }}} + +def generate_parameters_output(queue, model_costs, real_costs): + """Get kernel-specific calibration parameters from samples of model costs and + real costs. + + :arg queue: a :class:`pyopencl.CommandQueue` object on which the cost model is + created. + :arg model_costs: a :class:`list` of modeled costs. Each model cost can be + obtained from `BoundExpression.get_modeled_cost`. + :arg real_costs: a :class:`list` of timing data. Each timing data can be obtained + from `BoundExpression.eval`. + :return: a :class:`dict` which maps kernels to calibration parameters. + """ + cost_per_kernel = {} + params_per_kernel = {} + + assert len(model_costs) == len(real_costs) + + for icase in range(len(model_costs)): + model_cost = model_costs[icase] + real_cost = real_costs[icase] + + for insn in real_cost: + assert (insn in model_cost) + + knls = tuple(knl for knl in insn.kernels) + + if knls not in cost_per_kernel: + cost_per_kernel[knls] = { + "model_costs": [], + "real_costs": [] + } + + cost_per_kernel[knls]["model_costs"].append(model_cost[insn]) + cost_per_kernel[knls]["real_costs"].append(real_cost[insn]) + + cost_model = CLQBXCostModel( + queue, CLQBXCostModel.get_constantone_calibration_params() + ) + + for knls in cost_per_kernel: + params_per_kernel[knls] = cost_model.estimate_calibration_params( + cost_per_kernel[knls]["model_costs"], cost_per_kernel[knls]["real_costs"] + ) + + return params_per_kernel + # vim: foldmethod=marker -- GitLab From f6511cd38c0c5dbb61c21077af19913ae2dbeadf Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Tue, 27 Aug 2019 18:36:44 -0500 Subject: [PATCH 23/55] Not store calib params in the model and use kernel-specific cost model --- examples/cost.py | 29 ++++--- pytential/qbx/__init__.py | 14 ++-- pytential/qbx/cost.py | 129 +++++++++++++------------------- pytential/symbolic/execution.py | 31 ++++++-- test/test_cost_model.py | 4 +- 5 files changed, 99 insertions(+), 108 deletions(-) diff --git a/examples/cost.py b/examples/cost.py index 3494d555..b5bc1933 100644 --- a/examples/cost.py +++ b/examples/cost.py @@ -4,6 +4,7 @@ import pyopencl as cl import numpy as np from pytential import sym, bind +from pytential.qbx.cost import CLQBXCostModel from pytools import one @@ -91,12 +92,7 @@ def get_test_density(queue, lpot_source): def calibrate_cost_model(ctx): queue = cl.CommandQueue(ctx) - - from pytential.qbx.cost import CLQBXCostModel - - perf_model = CLQBXCostModel( - queue, CLQBXCostModel.get_constantone_calibration_params() - ) + perf_model = CLQBXCostModel(queue) model_results = [] timing_results = [] @@ -106,7 +102,7 @@ def calibrate_cost_model(ctx): bound_op = get_bound_op(lpot_source) sigma = get_test_density(queue, lpot_source) - perf_S = bound_op.get_modeled_cost(queue, sigma=sigma) + perf_S = bound_op.get_modeled_cost(queue, "constant_one", sigma=sigma) # Warm-up run. bound_op.eval(queue, {"sigma": sigma}) @@ -115,25 +111,26 @@ def calibrate_cost_model(ctx): timing_data = {} bound_op.eval(queue, {"sigma": sigma}, timing_data=timing_data) - model_results.append(one(perf_S.values())) - timing_results.append(one(timing_data.values())) + model_results.append(perf_S) + timing_results.append(timing_data) - calibration_params = perf_model.estimate_calibration_params( - model_results, timing_results + calibration_params = perf_model.estimate_knl_specific_calibration_params( + model_results, timing_results, time_field_name="process_elapsed" ) - return perf_model.with_calibration_params(calibration_params) + return calibration_params -def test_cost_model(ctx, perf_model): +def test_cost_model(ctx, calibration_params): queue = cl.CommandQueue(ctx) + perf_model = CLQBXCostModel(queue) for lpot_source in test_geometries(queue): lpot_source = lpot_source.copy(cost_model=perf_model) bound_op = get_bound_op(lpot_source) sigma = get_test_density(queue, lpot_source) - perf_S = bound_op.get_modeled_cost(queue, sigma=sigma) + perf_S = bound_op.get_modeled_cost(queue, calibration_params, sigma=sigma) model_result = one(perf_S.values()) # Warm-up run. @@ -160,8 +157,8 @@ def test_cost_model(ctx, perf_model): def predict_cost(ctx): - model = calibrate_cost_model(ctx) - test_cost_model(ctx, model) + params = calibrate_cost_model(ctx) + test_cost_model(ctx, params) if __name__ == "__main__": diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index 49509939..5ecaea3c 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -642,12 +642,15 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): return self._dispatch_compute_potential_insn( queue, insn, bound_expr, evaluate, func, extra_args) - def perf_model_compute_potential_insn(self, queue, insn, bound_expr, evaluate): + def perf_model_compute_potential_insn(self, queue, insn, bound_expr, evaluate, + calibration_params): if self.fmm_level_to_order is False: raise NotImplementedError("perf modeling direct evaluations") return self._dispatch_compute_potential_insn( queue, insn, bound_expr, evaluate, - self.perf_model_compute_potential_insn_fmm) + self.perf_model_compute_potential_insn_fmm, + extra_args={"calibration_params": calibration_params} + ) def _dispatch_compute_potential_insn(self, queue, insn, bound_expr, evaluate, func, extra_args=None): @@ -748,7 +751,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): # {{{ execute fmm cost model def perf_model_compute_potential_insn_fmm(self, queue, insn, bound_expr, - evaluate): + evaluate, calibration_params): target_name_and_side_to_number, target_discrs_and_qbx_sides = ( self.get_target_discrs_and_qbx_sides(insn, bound_expr)) @@ -764,8 +767,9 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): for arg_name, arg_expr in six.iteritems(insn.kernel_arguments): kernel_args[arg_name] = evaluate(arg_expr) - cost_model_result = ( - cost_model(geo_data, insn.base_kernel, kernel_args)) + cost_model_result = cost_model( + geo_data, insn.base_kernel, kernel_args, calibration_params + ) # {{{ construct dummy outputs diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 9758a818..3fdb1630 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -132,12 +132,6 @@ def taylor_translation_cost_model(dim, nlevels): # {{{ cost model class AbstractQBXCostModel(AbstractFMMCostModel): - def with_calibration_params(self, calibration_params): - """Return a copy of *self* with a new set of calibration parameters.""" - return type(self)( - calibration_params, - translation_cost_model_factory=self.translation_cost_model_factory - ) @abstractmethod def process_form_qbxl(self, geo_data, p2qbxl_cost, @@ -255,7 +249,8 @@ class AbstractQBXCostModel(AbstractFMMCostModel): return cost_factors - def get_qbx_modeled_cost(self, geo_data, kernel, kernel_arguments): + def get_qbx_modeled_cost(self, geo_data, kernel, kernel_arguments, + calibration_params): # FIXME: This should support target filtering. lpot_source = geo_data.lpot_source use_tsqbx = lpot_source._use_target_specific_qbx @@ -272,7 +267,7 @@ class AbstractQBXCostModel(AbstractFMMCostModel): # {{{ Construct parameters - params = self.calibration_params.copy() + params = calibration_params.copy() params.update(dict(p_qbx=lpot_source.qbx_order)) for ilevel in range(tree.nlevels): @@ -294,6 +289,7 @@ class AbstractQBXCostModel(AbstractFMMCostModel): result = self.get_fmm_modeled_cost( traversal, fmm_level_to_order, ndirect_sources_per_target_box, + calibration_params, box_target_counts_nonchild=box_target_counts_nonchild ) @@ -363,32 +359,64 @@ class AbstractQBXCostModel(AbstractFMMCostModel): additional_stage_to_param_names=stage_to_param_names ) + def estimate_knl_specific_calibration_params(self, model_results, timing_results, + time_field_name="wall_elapsed"): + """Get kernel-specific calibration parameters from samples of model costs and + real costs. + + :arg model_results: a :class:`list` of modeled costs. Each model cost can be + obtained from `BoundExpression.get_modeled_cost` with "constant_one" for + argument `calibration_params`. + :arg timing_results: a :class:`list` of timing data. Each timing data can be + obtained from `BoundExpression.eval`. + :arg time_field_name: a :class:`str`, the field name from the timing result. + Usually this can be "wall_elapsed" or "process_elapsed". + :return: a :class:`dict` which maps kernels to calibration parameters. + """ + cost_per_kernel = {} + params_per_kernel = {} + + assert len(model_results) == len(timing_results) + + for icase in range(len(model_results)): + model_cost = model_results[icase] + real_cost = timing_results[icase] + + for insn in real_cost: + assert (insn in model_cost) + + knls = tuple(knl for knl in insn.kernels) + + if knls not in cost_per_kernel: + cost_per_kernel[knls] = { + "model_costs": [], + "real_costs": [] + } + + cost_per_kernel[knls]["model_costs"].append(model_cost[insn]) + cost_per_kernel[knls]["real_costs"].append(real_cost[insn]) + + for knls in cost_per_kernel: + params_per_kernel[knls] = self.estimate_calibration_params( + cost_per_kernel[knls]["model_costs"], + cost_per_kernel[knls]["real_costs"], + time_field_name=time_field_name + ) + + return params_per_kernel + class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): def __init__(self, queue, - calibration_params, translation_cost_model_factory=pde_aware_translation_cost_model): """ :arg queue: a :class:`pyopencl.CommandQueue` object on which the execution of this object runs. - :arg calibration_params: the calibration parameters. For evaluation, use - parameters returned by :func:`estimate_calibration_params`. For training, - use :func:`get_constantone_calibration_params` to make all cost modifiers - 1. :arg translation_cost_model_factory: a function, which takes tree dimension and the number of tree levels as arguments, returns an object of :class:`TranslationCostModel`. """ - CLFMMCostModel.__init__( - self, queue, calibration_params, translation_cost_model_factory - ) - - def with_calibration_params(self, calibration_params): - """Return a copy of *self* with a new set of calibration parameters.""" - return type(self)( - self.queue, calibration_params, - translation_cost_model_factory=self.translation_cost_model_factory - ) + CLFMMCostModel.__init__(self, queue, translation_cost_model_factory) @memoize_method def _fill_array_with_index_knl(self, idx_dtype, array_dtype): @@ -612,20 +640,13 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): def __init__(self, - calibration_params, translation_cost_model_factory=pde_aware_translation_cost_model): """ - :arg calibration_params: the calibration parameters. For evaluation, use - parameters returned by :func:`estimate_calibration_params`. For training, - use :func:`get_constantone_calibration_params` to make all cost modifiers - 1. :arg translation_cost_model_factory: a function, which takes tree dimension and the number of tree levels as arguments, returns an object of :class:`TranslationCostModel`. """ - PythonFMMCostModel.__init__( - self, calibration_params, translation_cost_model_factory - ) + PythonFMMCostModel.__init__(self, translation_cost_model_factory) def process_form_qbxl(self, geo_data, p2qbxl_cost, ndirect_sources_per_target_box): @@ -725,50 +746,4 @@ class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): # }}} -def generate_parameters_output(queue, model_costs, real_costs): - """Get kernel-specific calibration parameters from samples of model costs and - real costs. - - :arg queue: a :class:`pyopencl.CommandQueue` object on which the cost model is - created. - :arg model_costs: a :class:`list` of modeled costs. Each model cost can be - obtained from `BoundExpression.get_modeled_cost`. - :arg real_costs: a :class:`list` of timing data. Each timing data can be obtained - from `BoundExpression.eval`. - :return: a :class:`dict` which maps kernels to calibration parameters. - """ - cost_per_kernel = {} - params_per_kernel = {} - - assert len(model_costs) == len(real_costs) - - for icase in range(len(model_costs)): - model_cost = model_costs[icase] - real_cost = real_costs[icase] - - for insn in real_cost: - assert (insn in model_cost) - - knls = tuple(knl for knl in insn.kernels) - - if knls not in cost_per_kernel: - cost_per_kernel[knls] = { - "model_costs": [], - "real_costs": [] - } - - cost_per_kernel[knls]["model_costs"].append(model_cost[insn]) - cost_per_kernel[knls]["real_costs"].append(real_cost[insn]) - - cost_model = CLQBXCostModel( - queue, CLQBXCostModel.get_constantone_calibration_params() - ) - - for knls in cost_per_kernel: - params_per_kernel[knls] = cost_model.estimate_calibration_params( - cost_per_kernel[knls]["model_costs"], cost_per_kernel[knls]["real_costs"] - ) - - return params_per_kernel - # vim: foldmethod=marker diff --git a/pytential/symbolic/execution.py b/pytential/symbolic/execution.py index 06b497a9..46175d64 100644 --- a/pytential/symbolic/execution.py +++ b/pytential/symbolic/execution.py @@ -42,6 +42,7 @@ from pytools import memoize_in from pytential.symbolic.primitives import DEFAULT_SOURCE, DEFAULT_TARGET from pytential.symbolic.primitives import ( QBXSourceStage1, QBXSourceStage2, QBXSourceQuadStage2) +from pytential.qbx.cost import AbstractQBXCostModel # FIXME caches: fix up queues @@ -268,9 +269,11 @@ class CostModelMapper(EvaluationMapperBase): data is collected. """ - def __init__(self, bound_expr, queue, context=None, - target_geometry=None, - target_points=None, target_normals=None, target_tangents=None): + def __init__(self, bound_expr, queue, + knl_specific_calibration_params, + context=None, + target_geometry=None, + target_points=None, target_normals=None, target_tangents=None): if context is None: context = {} EvaluationMapperBase.__init__( @@ -279,13 +282,25 @@ class CostModelMapper(EvaluationMapperBase): target_points, target_normals, target_tangents) + + self.knl_specific_calibration_params = knl_specific_calibration_params self.modeled_cost = {} def exec_compute_potential_insn(self, queue, insn, bound_expr, evaluate): source = bound_expr.places[insn.source] - result, perf_model_result = ( - source.perf_model_compute_potential_insn( - queue, insn, bound_expr, evaluate)) + knls = tuple(knl for knl in insn.kernels) + + if (isinstance(self.knl_specific_calibration_params, str) + and self.knl_specific_calibration_params == "constant_one"): + calibration_params = \ + AbstractQBXCostModel.get_constantone_calibration_params() + else: + calibration_params = self.knl_specific_calibration_params[knls] + + result, perf_model_result = source.perf_model_compute_potential_insn( + queue, insn, bound_expr, evaluate, + calibration_params + ) self.modeled_cost[insn] = perf_model_result return result @@ -541,8 +556,8 @@ class BoundExpression(object): def get_discretization(self, where): return self.places.get_discretization(where) - def get_modeled_cost(self, queue, **args): - perf_model_mapper = CostModelMapper(self, queue, args) + def get_modeled_cost(self, queue, calibration_params, **args): + perf_model_mapper = CostModelMapper(self, queue, calibration_params, args) self.code.execute(perf_model_mapper) return perf_model_mapper.get_modeled_cost() diff --git a/test/test_cost_model.py b/test/test_cost_model.py index c42990f1..399a2014 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -83,8 +83,8 @@ def test_compare_cl_and_py_cost_model(ctx_factory): # {{{ Construct cost models - cl_cost_model = CLQBXCostModel(queue, None) - python_cost_model = PythonQBXCostModel(None) + cl_cost_model = CLQBXCostModel(queue) + python_cost_model = PythonQBXCostModel() tree = geo_data.tree() xlat_cost = pde_aware_translation_cost_model(tree.targets.shape[0], tree.nlevels) -- GitLab From 6f7d21cbfdcff7af01404183c8372dcf679482c0 Mon Sep 17 00:00:00 2001 From: xywei Date: Wed, 11 Sep 2019 12:08:32 -0500 Subject: [PATCH 24/55] Revert "Receive upstream updates" This reverts commit 5b30a64df6556b72235b80e5698ae7f80dda677f, reversing changes made to fd765082e9acc531a192cd7a42339262c31400cd. --- .gitlab-ci.yml | 3 +- .test-conda-env-py3-macos.yml | 9 +-- .test-conda-env-py3.yml | 11 +-- README.rst | 9 +-- azure-pipelines.yml | 126 ---------------------------------- pytential/qbx/__init__.py | 23 +++---- 6 files changed, 16 insertions(+), 165 deletions(-) delete mode 100644 azure-pipelines.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d95ed067..85257045 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -82,6 +82,7 @@ Python 3 Conda: - export SUMPY_FORCE_SYMBOLIC_BACKEND=symengine - export CONDA_ENVIRONMENT=.test-conda-env-py3.yml - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} + - export REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - ". ./build-and-test-py-project-within-miniconda.sh" tags: @@ -99,6 +100,7 @@ Python 3 Conda Apple: - export LANG=en_US.UTF-8 - export CONDA_ENVIRONMENT=.test-conda-env-py3-macos.yml - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} + - export REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt - export CC=clang - set -o xtrace - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh @@ -125,7 +127,6 @@ Documentation: Pylint: script: # Needed to avoid name shadowing issues when running from source directory. - # Pylint won't find the Cython bits without this - PROJECT_INSTALL_FLAGS="--editable" - export PY_EXE=python3 # Pin to numpy 1.15 diff --git a/.test-conda-env-py3-macos.yml b/.test-conda-env-py3-macos.yml index cbf0efad..eea9ddd7 100644 --- a/.test-conda-env-py3-macos.yml +++ b/.test-conda-env-py3-macos.yml @@ -19,11 +19,4 @@ dependencies: - clangdev - openmp - cython - -- pip -- pip: - - git+https://gitlab.tiker.net/inducer/boxtree - - git+https://github.com/inducer/pymbolic - - git+https://github.com/inducer/loopy - - git+https://gitlab.tiker.net/inducer/sumpy - - git+https://github.com/inducer/meshmode +# things not in here: loopy boxtree pymbolic meshmode sumpy diff --git a/.test-conda-env-py3.yml b/.test-conda-env-py3.yml index 750b0072..8023391b 100644 --- a/.test-conda-env-py3.yml +++ b/.test-conda-env-py3.yml @@ -10,16 +10,9 @@ dependencies: - pocl - islpy - pyopencl -- python=3 +- python>=3.6 - symengine=0.3.0 - python-symengine=0.3.0 - pyfmmlib - cython - -- pip -- pip: - - git+https://gitlab.tiker.net/inducer/boxtree - - git+https://github.com/inducer/pymbolic - - git+https://github.com/inducer/loopy - - git+https://gitlab.tiker.net/inducer/sumpy - - git+https://github.com/inducer/meshmode +# things not in here: loopy boxtree pymbolic meshmode sumpy diff --git a/README.rst b/README.rst index 722b863f..ad3f7905 100644 --- a/README.rst +++ b/README.rst @@ -2,14 +2,9 @@ pytential: 2D/3D Layer Potential Evaluation =========================================== .. image:: https://gitlab.tiker.net/inducer/pytential/badges/master/pipeline.svg - :alt: Gitlab Build Status - :target: https://gitlab.tiker.net/inducer/pytential/commits/master -.. image:: https://dev.azure.com/ak-spam/inducer/_apis/build/status/inducer.pytential?branchName=master - :alt: Azure Build Status - :target: https://dev.azure.com/ak-spam/inducer/_build/latest?definitionId=16&branchName=master + :target: https://gitlab.tiker.net/inducer/pytential/commits/master .. image:: https://badge.fury.io/py/pytential.png - :alt: Python Package Index Release Page - :target: https://pypi.org/project/pytential/ + :target: http://pypi.python.org/pypi/pytential pytential helps you accurately evaluate layer potentials (and, sooner or later, volume potentials). diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index ddd78cf1..00000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,126 +0,0 @@ -jobs: -- - job: 'Python2' - pool: - vmImage: 'ubuntu-latest' - - # https://docs.microsoft.com/en-us/azure/devops/pipelines/process/phases?view=azure-devops&tabs=yaml#timeouts - # Maximum allowed as of Sep 5, 2019 - timeoutInMinutes: 360 - - steps: - - - script: | - set -e - sed 's/python=3/python=2.7/' .test-conda-env-py3.yml > .test-conda-env-py2-pre.yml - grep -v symengine .test-conda-env-py2-pre.yml > .test-conda-env-py2.yml - cat .test-conda-env-py2.yml - CONDA_ENVIRONMENT=.test-conda-env-py2.yml - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - . ./build-and-test-py-project-within-miniconda.sh - - displayName: 'Pytest Conda' - - - task: PublishTestResults@2 - inputs: - testResultsFormat: 'JUnit' - testResultsFiles: 'test/pytest.xml' - -- - job: 'Python3' - pool: - vmImage: 'ubuntu-latest' - - # https://docs.microsoft.com/en-us/azure/devops/pipelines/process/phases?view=azure-devops&tabs=yaml#timeouts - # Maximum allowed as of Sep 5, 2019 - timeoutInMinutes: 360 - - steps: - - - script: | - set -e - grep -v symengine .test-conda-env-py3.yml > .test-conda-env.yml - CONDA_ENVIRONMENT=.test-conda-env.yml - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - . ./build-and-test-py-project-within-miniconda.sh - - displayName: 'Pytest Conda' - - - - task: PublishTestResults@2 - inputs: - testResultsFormat: 'JUnit' - testResultsFiles: 'test/pytest.xml' - -- - job: 'Python3Symengine' - pool: - vmImage: 'ubuntu-latest' - - # https://docs.microsoft.com/en-us/azure/devops/pipelines/process/phases?view=azure-devops&tabs=yaml#timeouts - # Maximum allowed as of Sep 5, 2019 - timeoutInMinutes: 360 - - steps: - - - script: | - set -e - CONDA_ENVIRONMENT=.test-conda-env-py3.yml - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - . ./build-and-test-py-project-within-miniconda.sh - - displayName: 'Pytest Conda' - - - - task: PublishTestResults@2 - inputs: - testResultsFormat: 'JUnit' - testResultsFiles: 'test/pytest.xml' - -- - job: 'Flake8' - pool: - vmImage: 'ubuntu-latest' - strategy: - matrix: - Python37: - python.version: '3.7' - - steps: - - - task: UsePythonVersion@0 - inputs: - versionSpec: '$(python.version)' - - - - script: | - set -e - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh - . ./prepare-and-run-flake8.sh pytential test - - displayName: 'Flake8' - -- - job: 'Pylint' - pool: - vmImage: 'ubuntu-latest' - - steps: - - - script: | - set -e - CONDA_ENVIRONMENT=.test-conda-env-py3-pylint.yml - sed 's/numpy/numpy=1.15/' .test-conda-env-py3.yml > $CONDA_ENVIRONMENT - echo "- matplotlib" >> $CONDA_ENVIRONMENT - echo "-------------------------------------------" - cat $CONDA_ENVIRONMENT - echo "-------------------------------------------" - USE_CONDA_BUILD=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-pylint.sh - - # Pylint won't find the Cython bits without this - PROJECT_INSTALL_FLAGS="--editable" - - . ./prepare-and-run-pylint.sh pytential test/test_*.py - - displayName: 'Pylint' diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index e98da240..cef8eed2 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -559,7 +559,8 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): return self._dispatch_compute_potential_insn( queue, insn, bound_expr, evaluate, func, extra_args) - def cost_model_compute_potential_insn(self, queue, insn, bound_expr, evaluate): + def cost_model_compute_potential_insn(self, queue, insn, bound_expr, evaluate, + calibration_params): """Using :attr:`cost_model`, evaluate the cost of executing *insn*. Cost model results are gathered in :attr:`pytential.symbolic.execution.BoundExpression.modeled_cost` @@ -567,28 +568,22 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): :returns: whatever :meth:`exec_compute_potential_insn_fmm` returns. """ - if self.fmm_level_to_order is False: raise NotImplementedError("perf modeling direct evaluations") def drive_cost_model( wrangler, strengths, geo_data, kernel, kernel_arguments): del strengths - cost_model_result = ( - self.cost_model(wrangler, geo_data, kernel, kernel_arguments)) - - from pytools.obj_array import with_object_array_or_scalar - output_placeholder = with_object_array_or_scalar( - wrangler.finalize_potentials, - wrangler.full_output_zeros() + cost_model_result = self.cost_model( + geo_data, kernel, kernel_arguments, calibration_params ) - - return output_placeholder, cost_model_result + return wrangler.full_output_zeros(), cost_model_result return self._dispatch_compute_potential_insn( - queue, insn, bound_expr, evaluate, - self.exec_compute_potential_insn_fmm, - extra_args={"fmm_driver": drive_cost_model}) + queue, insn, bound_expr, evaluate, + self.exec_compute_potential_insn_fmm, + extra_args={"fmm_driver": drive_cost_model} + ) def _dispatch_compute_potential_insn(self, queue, insn, bound_expr, evaluate, func, extra_args=None): -- GitLab From 5b92543af5f1fdf098d0f291daf0b022573632b5 Mon Sep 17 00:00:00 2001 From: xywei Date: Wed, 11 Sep 2019 12:08:51 -0500 Subject: [PATCH 25/55] Revert "Revert "Receive upstream updates"" This reverts commit 6f7d21cbfdcff7af01404183c8372dcf679482c0. --- .gitlab-ci.yml | 3 +- .test-conda-env-py3-macos.yml | 9 ++- .test-conda-env-py3.yml | 11 ++- README.rst | 9 ++- azure-pipelines.yml | 126 ++++++++++++++++++++++++++++++++++ pytential/qbx/__init__.py | 23 ++++--- 6 files changed, 165 insertions(+), 16 deletions(-) create mode 100644 azure-pipelines.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 85257045..d95ed067 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -82,7 +82,6 @@ Python 3 Conda: - export SUMPY_FORCE_SYMBOLIC_BACKEND=symengine - export CONDA_ENVIRONMENT=.test-conda-env-py3.yml - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} - - export REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - ". ./build-and-test-py-project-within-miniconda.sh" tags: @@ -100,7 +99,6 @@ Python 3 Conda Apple: - export LANG=en_US.UTF-8 - export CONDA_ENVIRONMENT=.test-conda-env-py3-macos.yml - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} - - export REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt - export CC=clang - set -o xtrace - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh @@ -127,6 +125,7 @@ Documentation: Pylint: script: # Needed to avoid name shadowing issues when running from source directory. + # Pylint won't find the Cython bits without this - PROJECT_INSTALL_FLAGS="--editable" - export PY_EXE=python3 # Pin to numpy 1.15 diff --git a/.test-conda-env-py3-macos.yml b/.test-conda-env-py3-macos.yml index eea9ddd7..cbf0efad 100644 --- a/.test-conda-env-py3-macos.yml +++ b/.test-conda-env-py3-macos.yml @@ -19,4 +19,11 @@ dependencies: - clangdev - openmp - cython -# things not in here: loopy boxtree pymbolic meshmode sumpy + +- pip +- pip: + - git+https://gitlab.tiker.net/inducer/boxtree + - git+https://github.com/inducer/pymbolic + - git+https://github.com/inducer/loopy + - git+https://gitlab.tiker.net/inducer/sumpy + - git+https://github.com/inducer/meshmode diff --git a/.test-conda-env-py3.yml b/.test-conda-env-py3.yml index 8023391b..750b0072 100644 --- a/.test-conda-env-py3.yml +++ b/.test-conda-env-py3.yml @@ -10,9 +10,16 @@ dependencies: - pocl - islpy - pyopencl -- python>=3.6 +- python=3 - symengine=0.3.0 - python-symengine=0.3.0 - pyfmmlib - cython -# things not in here: loopy boxtree pymbolic meshmode sumpy + +- pip +- pip: + - git+https://gitlab.tiker.net/inducer/boxtree + - git+https://github.com/inducer/pymbolic + - git+https://github.com/inducer/loopy + - git+https://gitlab.tiker.net/inducer/sumpy + - git+https://github.com/inducer/meshmode diff --git a/README.rst b/README.rst index ad3f7905..722b863f 100644 --- a/README.rst +++ b/README.rst @@ -2,9 +2,14 @@ pytential: 2D/3D Layer Potential Evaluation =========================================== .. image:: https://gitlab.tiker.net/inducer/pytential/badges/master/pipeline.svg - :target: https://gitlab.tiker.net/inducer/pytential/commits/master + :alt: Gitlab Build Status + :target: https://gitlab.tiker.net/inducer/pytential/commits/master +.. image:: https://dev.azure.com/ak-spam/inducer/_apis/build/status/inducer.pytential?branchName=master + :alt: Azure Build Status + :target: https://dev.azure.com/ak-spam/inducer/_build/latest?definitionId=16&branchName=master .. image:: https://badge.fury.io/py/pytential.png - :target: http://pypi.python.org/pypi/pytential + :alt: Python Package Index Release Page + :target: https://pypi.org/project/pytential/ pytential helps you accurately evaluate layer potentials (and, sooner or later, volume potentials). diff --git a/azure-pipelines.yml b/azure-pipelines.yml new file mode 100644 index 00000000..ddd78cf1 --- /dev/null +++ b/azure-pipelines.yml @@ -0,0 +1,126 @@ +jobs: +- + job: 'Python2' + pool: + vmImage: 'ubuntu-latest' + + # https://docs.microsoft.com/en-us/azure/devops/pipelines/process/phases?view=azure-devops&tabs=yaml#timeouts + # Maximum allowed as of Sep 5, 2019 + timeoutInMinutes: 360 + + steps: + - + script: | + set -e + sed 's/python=3/python=2.7/' .test-conda-env-py3.yml > .test-conda-env-py2-pre.yml + grep -v symengine .test-conda-env-py2-pre.yml > .test-conda-env-py2.yml + cat .test-conda-env-py2.yml + CONDA_ENVIRONMENT=.test-conda-env-py2.yml + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh + . ./build-and-test-py-project-within-miniconda.sh + + displayName: 'Pytest Conda' + - + task: PublishTestResults@2 + inputs: + testResultsFormat: 'JUnit' + testResultsFiles: 'test/pytest.xml' + +- + job: 'Python3' + pool: + vmImage: 'ubuntu-latest' + + # https://docs.microsoft.com/en-us/azure/devops/pipelines/process/phases?view=azure-devops&tabs=yaml#timeouts + # Maximum allowed as of Sep 5, 2019 + timeoutInMinutes: 360 + + steps: + - + script: | + set -e + grep -v symengine .test-conda-env-py3.yml > .test-conda-env.yml + CONDA_ENVIRONMENT=.test-conda-env.yml + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh + . ./build-and-test-py-project-within-miniconda.sh + + displayName: 'Pytest Conda' + + - + task: PublishTestResults@2 + inputs: + testResultsFormat: 'JUnit' + testResultsFiles: 'test/pytest.xml' + +- + job: 'Python3Symengine' + pool: + vmImage: 'ubuntu-latest' + + # https://docs.microsoft.com/en-us/azure/devops/pipelines/process/phases?view=azure-devops&tabs=yaml#timeouts + # Maximum allowed as of Sep 5, 2019 + timeoutInMinutes: 360 + + steps: + - + script: | + set -e + CONDA_ENVIRONMENT=.test-conda-env-py3.yml + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh + . ./build-and-test-py-project-within-miniconda.sh + + displayName: 'Pytest Conda' + + - + task: PublishTestResults@2 + inputs: + testResultsFormat: 'JUnit' + testResultsFiles: 'test/pytest.xml' + +- + job: 'Flake8' + pool: + vmImage: 'ubuntu-latest' + strategy: + matrix: + Python37: + python.version: '3.7' + + steps: + - + task: UsePythonVersion@0 + inputs: + versionSpec: '$(python.version)' + + - + script: | + set -e + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh + . ./prepare-and-run-flake8.sh pytential test + + displayName: 'Flake8' + +- + job: 'Pylint' + pool: + vmImage: 'ubuntu-latest' + + steps: + - + script: | + set -e + CONDA_ENVIRONMENT=.test-conda-env-py3-pylint.yml + sed 's/numpy/numpy=1.15/' .test-conda-env-py3.yml > $CONDA_ENVIRONMENT + echo "- matplotlib" >> $CONDA_ENVIRONMENT + echo "-------------------------------------------" + cat $CONDA_ENVIRONMENT + echo "-------------------------------------------" + USE_CONDA_BUILD=1 + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-pylint.sh + + # Pylint won't find the Cython bits without this + PROJECT_INSTALL_FLAGS="--editable" + + . ./prepare-and-run-pylint.sh pytential test/test_*.py + + displayName: 'Pylint' diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index cef8eed2..e98da240 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -559,8 +559,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): return self._dispatch_compute_potential_insn( queue, insn, bound_expr, evaluate, func, extra_args) - def cost_model_compute_potential_insn(self, queue, insn, bound_expr, evaluate, - calibration_params): + def cost_model_compute_potential_insn(self, queue, insn, bound_expr, evaluate): """Using :attr:`cost_model`, evaluate the cost of executing *insn*. Cost model results are gathered in :attr:`pytential.symbolic.execution.BoundExpression.modeled_cost` @@ -568,22 +567,28 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): :returns: whatever :meth:`exec_compute_potential_insn_fmm` returns. """ + if self.fmm_level_to_order is False: raise NotImplementedError("perf modeling direct evaluations") def drive_cost_model( wrangler, strengths, geo_data, kernel, kernel_arguments): del strengths - cost_model_result = self.cost_model( - geo_data, kernel, kernel_arguments, calibration_params + cost_model_result = ( + self.cost_model(wrangler, geo_data, kernel, kernel_arguments)) + + from pytools.obj_array import with_object_array_or_scalar + output_placeholder = with_object_array_or_scalar( + wrangler.finalize_potentials, + wrangler.full_output_zeros() ) - return wrangler.full_output_zeros(), cost_model_result + + return output_placeholder, cost_model_result return self._dispatch_compute_potential_insn( - queue, insn, bound_expr, evaluate, - self.exec_compute_potential_insn_fmm, - extra_args={"fmm_driver": drive_cost_model} - ) + queue, insn, bound_expr, evaluate, + self.exec_compute_potential_insn_fmm, + extra_args={"fmm_driver": drive_cost_model}) def _dispatch_compute_potential_insn(self, queue, insn, bound_expr, evaluate, func, extra_args=None): -- GitLab From ddad1e6c7a64231f24fae00c9e7c0ef05cc111f1 Mon Sep 17 00:00:00 2001 From: xywei Date: Wed, 11 Sep 2019 12:11:01 -0500 Subject: [PATCH 26/55] Revert "Revert "Revert "Receive upstream updates""" This reverts commit 5b92543af5f1fdf098d0f291daf0b022573632b5. --- .gitlab-ci.yml | 3 +- .test-conda-env-py3-macos.yml | 9 +-- .test-conda-env-py3.yml | 11 +-- README.rst | 9 +-- azure-pipelines.yml | 126 ---------------------------------- pytential/qbx/__init__.py | 23 +++---- 6 files changed, 16 insertions(+), 165 deletions(-) delete mode 100644 azure-pipelines.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d95ed067..85257045 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -82,6 +82,7 @@ Python 3 Conda: - export SUMPY_FORCE_SYMBOLIC_BACKEND=symengine - export CONDA_ENVIRONMENT=.test-conda-env-py3.yml - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} + - export REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - ". ./build-and-test-py-project-within-miniconda.sh" tags: @@ -99,6 +100,7 @@ Python 3 Conda Apple: - export LANG=en_US.UTF-8 - export CONDA_ENVIRONMENT=.test-conda-env-py3-macos.yml - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} + - export REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt - export CC=clang - set -o xtrace - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh @@ -125,7 +127,6 @@ Documentation: Pylint: script: # Needed to avoid name shadowing issues when running from source directory. - # Pylint won't find the Cython bits without this - PROJECT_INSTALL_FLAGS="--editable" - export PY_EXE=python3 # Pin to numpy 1.15 diff --git a/.test-conda-env-py3-macos.yml b/.test-conda-env-py3-macos.yml index cbf0efad..eea9ddd7 100644 --- a/.test-conda-env-py3-macos.yml +++ b/.test-conda-env-py3-macos.yml @@ -19,11 +19,4 @@ dependencies: - clangdev - openmp - cython - -- pip -- pip: - - git+https://gitlab.tiker.net/inducer/boxtree - - git+https://github.com/inducer/pymbolic - - git+https://github.com/inducer/loopy - - git+https://gitlab.tiker.net/inducer/sumpy - - git+https://github.com/inducer/meshmode +# things not in here: loopy boxtree pymbolic meshmode sumpy diff --git a/.test-conda-env-py3.yml b/.test-conda-env-py3.yml index 750b0072..8023391b 100644 --- a/.test-conda-env-py3.yml +++ b/.test-conda-env-py3.yml @@ -10,16 +10,9 @@ dependencies: - pocl - islpy - pyopencl -- python=3 +- python>=3.6 - symengine=0.3.0 - python-symengine=0.3.0 - pyfmmlib - cython - -- pip -- pip: - - git+https://gitlab.tiker.net/inducer/boxtree - - git+https://github.com/inducer/pymbolic - - git+https://github.com/inducer/loopy - - git+https://gitlab.tiker.net/inducer/sumpy - - git+https://github.com/inducer/meshmode +# things not in here: loopy boxtree pymbolic meshmode sumpy diff --git a/README.rst b/README.rst index 722b863f..ad3f7905 100644 --- a/README.rst +++ b/README.rst @@ -2,14 +2,9 @@ pytential: 2D/3D Layer Potential Evaluation =========================================== .. image:: https://gitlab.tiker.net/inducer/pytential/badges/master/pipeline.svg - :alt: Gitlab Build Status - :target: https://gitlab.tiker.net/inducer/pytential/commits/master -.. image:: https://dev.azure.com/ak-spam/inducer/_apis/build/status/inducer.pytential?branchName=master - :alt: Azure Build Status - :target: https://dev.azure.com/ak-spam/inducer/_build/latest?definitionId=16&branchName=master + :target: https://gitlab.tiker.net/inducer/pytential/commits/master .. image:: https://badge.fury.io/py/pytential.png - :alt: Python Package Index Release Page - :target: https://pypi.org/project/pytential/ + :target: http://pypi.python.org/pypi/pytential pytential helps you accurately evaluate layer potentials (and, sooner or later, volume potentials). diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index ddd78cf1..00000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,126 +0,0 @@ -jobs: -- - job: 'Python2' - pool: - vmImage: 'ubuntu-latest' - - # https://docs.microsoft.com/en-us/azure/devops/pipelines/process/phases?view=azure-devops&tabs=yaml#timeouts - # Maximum allowed as of Sep 5, 2019 - timeoutInMinutes: 360 - - steps: - - - script: | - set -e - sed 's/python=3/python=2.7/' .test-conda-env-py3.yml > .test-conda-env-py2-pre.yml - grep -v symengine .test-conda-env-py2-pre.yml > .test-conda-env-py2.yml - cat .test-conda-env-py2.yml - CONDA_ENVIRONMENT=.test-conda-env-py2.yml - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - . ./build-and-test-py-project-within-miniconda.sh - - displayName: 'Pytest Conda' - - - task: PublishTestResults@2 - inputs: - testResultsFormat: 'JUnit' - testResultsFiles: 'test/pytest.xml' - -- - job: 'Python3' - pool: - vmImage: 'ubuntu-latest' - - # https://docs.microsoft.com/en-us/azure/devops/pipelines/process/phases?view=azure-devops&tabs=yaml#timeouts - # Maximum allowed as of Sep 5, 2019 - timeoutInMinutes: 360 - - steps: - - - script: | - set -e - grep -v symengine .test-conda-env-py3.yml > .test-conda-env.yml - CONDA_ENVIRONMENT=.test-conda-env.yml - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - . ./build-and-test-py-project-within-miniconda.sh - - displayName: 'Pytest Conda' - - - - task: PublishTestResults@2 - inputs: - testResultsFormat: 'JUnit' - testResultsFiles: 'test/pytest.xml' - -- - job: 'Python3Symengine' - pool: - vmImage: 'ubuntu-latest' - - # https://docs.microsoft.com/en-us/azure/devops/pipelines/process/phases?view=azure-devops&tabs=yaml#timeouts - # Maximum allowed as of Sep 5, 2019 - timeoutInMinutes: 360 - - steps: - - - script: | - set -e - CONDA_ENVIRONMENT=.test-conda-env-py3.yml - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - . ./build-and-test-py-project-within-miniconda.sh - - displayName: 'Pytest Conda' - - - - task: PublishTestResults@2 - inputs: - testResultsFormat: 'JUnit' - testResultsFiles: 'test/pytest.xml' - -- - job: 'Flake8' - pool: - vmImage: 'ubuntu-latest' - strategy: - matrix: - Python37: - python.version: '3.7' - - steps: - - - task: UsePythonVersion@0 - inputs: - versionSpec: '$(python.version)' - - - - script: | - set -e - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh - . ./prepare-and-run-flake8.sh pytential test - - displayName: 'Flake8' - -- - job: 'Pylint' - pool: - vmImage: 'ubuntu-latest' - - steps: - - - script: | - set -e - CONDA_ENVIRONMENT=.test-conda-env-py3-pylint.yml - sed 's/numpy/numpy=1.15/' .test-conda-env-py3.yml > $CONDA_ENVIRONMENT - echo "- matplotlib" >> $CONDA_ENVIRONMENT - echo "-------------------------------------------" - cat $CONDA_ENVIRONMENT - echo "-------------------------------------------" - USE_CONDA_BUILD=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-pylint.sh - - # Pylint won't find the Cython bits without this - PROJECT_INSTALL_FLAGS="--editable" - - . ./prepare-and-run-pylint.sh pytential test/test_*.py - - displayName: 'Pylint' diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index e98da240..cef8eed2 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -559,7 +559,8 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): return self._dispatch_compute_potential_insn( queue, insn, bound_expr, evaluate, func, extra_args) - def cost_model_compute_potential_insn(self, queue, insn, bound_expr, evaluate): + def cost_model_compute_potential_insn(self, queue, insn, bound_expr, evaluate, + calibration_params): """Using :attr:`cost_model`, evaluate the cost of executing *insn*. Cost model results are gathered in :attr:`pytential.symbolic.execution.BoundExpression.modeled_cost` @@ -567,28 +568,22 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): :returns: whatever :meth:`exec_compute_potential_insn_fmm` returns. """ - if self.fmm_level_to_order is False: raise NotImplementedError("perf modeling direct evaluations") def drive_cost_model( wrangler, strengths, geo_data, kernel, kernel_arguments): del strengths - cost_model_result = ( - self.cost_model(wrangler, geo_data, kernel, kernel_arguments)) - - from pytools.obj_array import with_object_array_or_scalar - output_placeholder = with_object_array_or_scalar( - wrangler.finalize_potentials, - wrangler.full_output_zeros() + cost_model_result = self.cost_model( + geo_data, kernel, kernel_arguments, calibration_params ) - - return output_placeholder, cost_model_result + return wrangler.full_output_zeros(), cost_model_result return self._dispatch_compute_potential_insn( - queue, insn, bound_expr, evaluate, - self.exec_compute_potential_insn_fmm, - extra_args={"fmm_driver": drive_cost_model}) + queue, insn, bound_expr, evaluate, + self.exec_compute_potential_insn_fmm, + extra_args={"fmm_driver": drive_cost_model} + ) def _dispatch_compute_potential_insn(self, queue, insn, bound_expr, evaluate, func, extra_args=None): -- GitLab From a9a044b543096840d5d0634a313bfe7d9c91fb79 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Wed, 9 Oct 2019 17:07:25 -0500 Subject: [PATCH 27/55] Add more cost model test cases --- test/test_cost_model.py | 147 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 144 insertions(+), 3 deletions(-) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 399a2014..7d0f7718 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -31,10 +31,15 @@ from pyopencl.tools import ( # noqa import numpy as np import pyopencl as cl + from pytential.qbx import QBXLayerPotentialSource +from sumpy.kernel import LaplaceKernel +from pytential import bind, sym, norm # noqa + from pytential.qbx.cost import ( CLQBXCostModel, PythonQBXCostModel, pde_aware_translation_cost_model ) + import time import logging @@ -44,7 +49,8 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -@pytest.mark.opencl +# {{{ Compare the time and result of OpenCL implementation and Python implementation + def test_compare_cl_and_py_cost_model(ctx_factory): nelements = 3600 target_order = 16 @@ -257,9 +263,144 @@ def test_compare_cl_and_py_cost_model(ctx_factory): # }}} +# }}} + + +# {{{ global params + +TARGET_ORDER = 8 +OVSMP_FACTOR = 5 +TCF = 0.9 +QBX_ORDER = 5 +FMM_ORDER = 10 + +DEFAULT_LPOT_KWARGS = { + "_box_extent_norm": "l2", + "_from_sep_smaller_crit": "static_l2", + } + + +def get_lpot_source(queue, dim): + from meshmode.discretization import Discretization + from meshmode.discretization.poly_element import ( + InterpolatoryQuadratureSimplexGroupFactory) + + target_order = TARGET_ORDER + + if dim == 2: + from meshmode.mesh.generation import starfish, make_curve_mesh + mesh = make_curve_mesh(starfish, np.linspace(0, 1, 50), order=target_order) + elif dim == 3: + from meshmode.mesh.generation import generate_torus + mesh = generate_torus(2, 1, order=target_order) + else: + raise ValueError("unsupported dimension: %d" % dim) + + pre_density_discr = Discretization( + queue.context, mesh, + InterpolatoryQuadratureSimplexGroupFactory(target_order)) + + lpot_kwargs = DEFAULT_LPOT_KWARGS.copy() + lpot_kwargs.update( + _expansion_stick_out_factor=TCF, + fmm_order=FMM_ORDER, + qbx_order=QBX_ORDER, + fmm_backend="fmmlib", + ) + + from pytential.qbx import QBXLayerPotentialSource + lpot_source = QBXLayerPotentialSource( + pre_density_discr, OVSMP_FACTOR*target_order, + **lpot_kwargs) + + lpot_source, _ = lpot_source.with_refinement() + + return lpot_source + + +def get_density(queue, lpot_source): + density_discr = lpot_source.density_discr + nodes = density_discr.nodes().with_queue(queue) + return cl.clmath.sin(10 * nodes[0]) + +# }}} + + +# {{{ test that timing data gathering can execute succesfully + +def test_timing_data_gathering(ctx_getter): + """Test that timing data gathering can execute succesfully.""" + + pytest.importorskip("pyfmmlib") + + cl_ctx = ctx_getter() + queue = cl.CommandQueue(cl_ctx, + properties=cl.command_queue_properties.PROFILING_ENABLE) + + lpot_source = get_lpot_source(queue, 2) + sigma = get_density(queue, lpot_source) + + sigma_sym = sym.var("sigma") + k_sym = LaplaceKernel(lpot_source.ambient_dim) + sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) + + op_S = bind(lpot_source, sym_op_S) + + timing_data = {} + op_S.eval(queue, dict(sigma=sigma), timing_data=timing_data) + assert timing_data + print(timing_data) + +# }}} + + +# {{{ test cost model + +@pytest.mark.parametrize("dim, use_target_specific_qbx", ( + (2, False), + (3, False), + (3, True))) +def test_cost_model(ctx_getter, dim, use_target_specific_qbx): + """Test that cost model gathering can execute successfully.""" + cl_ctx = ctx_getter() + queue = cl.CommandQueue(cl_ctx) + + lpot_source = ( + get_lpot_source(queue, dim) + .copy( + _use_target_specific_qbx=use_target_specific_qbx, + cost_model=CLQBXCostModel(queue))) + + sigma = get_density(queue, lpot_source) + + sigma_sym = sym.var("sigma") + k_sym = LaplaceKernel(lpot_source.ambient_dim) + + sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) + op_S = bind(lpot_source, sym_op_S) + cost_S = op_S.get_modeled_cost(queue, "constant_one", sigma=sigma) + assert len(cost_S) == 1 + + sym_op_S_plus_D = ( + sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) + + sym.D(k_sym, sigma_sym)) + op_S_plus_D = bind(lpot_source, sym_op_S_plus_D) + cost_S_plus_D = op_S_plus_D.get_modeled_cost(queue, "constant_one", sigma=sigma) + assert len(cost_S_plus_D) == 2 + +# }}} + + +# You can test individual routines by typing +# $ python test_cost_model.py 'test_routine()' if __name__ == "__main__": - ctx_factory = cl.create_some_context - test_compare_cl_and_py_cost_model(ctx_factory) + import sys + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + # vim: foldmethod=marker -- GitLab From 3e33c0bbbef708447d8f104b3230fbaf7dd87597 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Wed, 9 Oct 2019 17:08:54 -0500 Subject: [PATCH 28/55] Automatically fetch geometry data to host when using PythonQBXCostModel --- pytential/qbx/cost.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 3d59a317..4d3d5c6f 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -747,6 +747,21 @@ class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): return neval_qbxl * qbxl2p_cost + def get_qbx_modeled_cost(self, geo_data, kernel, kernel_arguments, + calibration_params): + from pytential.qbx.utils import ToHostTransferredGeoDataWrapper + from pytential.qbx.geometry import QBXFMMGeometryData + + if not isinstance(geo_data, ToHostTransferredGeoDataWrapper): + assert isinstance(geo_data, QBXFMMGeometryData) + + queue = cl.CommandQueue(geo_data.cl_context) + geo_data = ToHostTransferredGeoDataWrapper(queue, geo_data) + + AbstractQBXCostModel.get_qbx_modeled_cost( + self, geo_data, kernel, kernel_arguments, calibration_params + ) + # }}} # vim: foldmethod=marker -- GitLab From 15e45f4ec3cccf272e159242e400618947b4325e Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Thu, 10 Oct 2019 22:00:51 -0500 Subject: [PATCH 29/55] Add constantone test case --- test/test_cost_model.py | 294 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 294 insertions(+) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 7d0f7718..2dde059f 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -32,6 +32,7 @@ from pyopencl.tools import ( # noqa import numpy as np import pyopencl as cl +from boxtree.tools import ConstantOneExpansionWrangler from pytential.qbx import QBXLayerPotentialSource from sumpy.kernel import LaplaceKernel from pytential import bind, sym, norm # noqa @@ -391,6 +392,299 @@ def test_cost_model(ctx_getter, dim, use_target_specific_qbx): # }}} +# {{{ constant one wrangler + +class ConstantOneQBXExpansionWrangler(ConstantOneExpansionWrangler): + + def __init__(self, queue, geo_data, use_target_specific_qbx): + from pytential.qbx.utils import ToHostTransferredGeoDataWrapper + geo_data = ToHostTransferredGeoDataWrapper(queue, geo_data) + + self.geo_data = geo_data + self.trav = geo_data.traversal() + self.using_tsqbx = ( + use_target_specific_qbx + # None means use by default if possible + or use_target_specific_qbx is None) + + ConstantOneExpansionWrangler.__init__(self, geo_data.tree()) + + def _get_target_slice(self, ibox): + non_qbx_box_target_lists = self.geo_data.non_qbx_box_target_lists() + pstart = non_qbx_box_target_lists.box_target_starts[ibox] + return slice( + pstart, pstart + + non_qbx_box_target_lists.box_target_counts_nonchild[ibox]) + + def output_zeros(self): + non_qbx_box_target_lists = self.geo_data.non_qbx_box_target_lists() + return np.zeros(non_qbx_box_target_lists.nfiltered_targets) + + def full_output_zeros(self): + from pytools.obj_array import make_obj_array + return make_obj_array([np.zeros(self.tree.ntargets)]) + + def qbx_local_expansion_zeros(self): + return np.zeros(self.geo_data.ncenters) + + def reorder_potentials(self, potentials): + raise NotImplementedError("reorder_potentials should not " + "be called on a QBXExpansionWrangler") + + def form_global_qbx_locals(self, src_weights): + local_exps = self.qbx_local_expansion_zeros() + ops = 0 + + if self.using_tsqbx: + return local_exps, self.timing_future(ops) + + global_qbx_centers = self.geo_data.global_qbx_centers() + qbx_center_to_target_box = self.geo_data.qbx_center_to_target_box() + + for tgt_icenter in global_qbx_centers: + itgt_box = qbx_center_to_target_box[tgt_icenter] + + start, end = ( + self.trav.neighbor_source_boxes_starts[itgt_box:itgt_box + 2]) + + src_sum = 0 + for src_ibox in self.trav.neighbor_source_boxes_lists[start:end]: + src_pslice = self._get_source_slice(src_ibox) + ops += src_pslice.stop - src_pslice.start + src_sum += np.sum(src_weights[src_pslice]) + + local_exps[tgt_icenter] = src_sum + + return local_exps, self.timing_future(ops) + + def translate_box_multipoles_to_qbx_local(self, multipole_exps): + local_exps = self.qbx_local_expansion_zeros() + ops = 0 + + global_qbx_centers = self.geo_data.global_qbx_centers() + + for isrc_level, ssn in enumerate(self.trav.from_sep_smaller_by_level): + for tgt_icenter in global_qbx_centers: + icontaining_tgt_box = ( + self.geo_data + .qbx_center_to_target_box_source_level(isrc_level) + [tgt_icenter]) + + if icontaining_tgt_box == -1: + continue + + start, stop = ( + ssn.starts[icontaining_tgt_box], + ssn.starts[icontaining_tgt_box+1]) + + for src_ibox in ssn.lists[start:stop]: + local_exps[tgt_icenter] += multipole_exps[src_ibox] + ops += 1 + + return local_exps, self.timing_future(ops) + + def translate_box_local_to_qbx_local(self, local_exps): + qbx_expansions = self.qbx_local_expansion_zeros() + ops = 0 + + global_qbx_centers = self.geo_data.global_qbx_centers() + qbx_center_to_target_box = self.geo_data.qbx_center_to_target_box() + + for tgt_icenter in global_qbx_centers: + isrc_box = qbx_center_to_target_box[tgt_icenter] + src_ibox = self.trav.target_boxes[isrc_box] + qbx_expansions[tgt_icenter] += local_exps[src_ibox] + ops += 1 + + return qbx_expansions, self.timing_future(ops) + + def eval_qbx_expansions(self, qbx_expansions): + output = self.full_output_zeros() + ops = 0 + + global_qbx_centers = self.geo_data.global_qbx_centers() + center_to_tree_targets = self.geo_data.center_to_tree_targets() + + for src_icenter in global_qbx_centers: + start, end = ( + center_to_tree_targets.starts[src_icenter:src_icenter+2]) + for icenter_tgt in range(start, end): + center_itgt = center_to_tree_targets.lists[icenter_tgt] + output[0][center_itgt] += qbx_expansions[src_icenter] + ops += 1 + + return output, self.timing_future(ops) + + def eval_target_specific_qbx_locals(self, src_weights): + pot = self.full_output_zeros() + ops = 0 + + if not self.using_tsqbx: + return pot, self.timing_future(ops) + + global_qbx_centers = self.geo_data.global_qbx_centers() + center_to_tree_targets = self.geo_data.center_to_tree_targets() + qbx_center_to_target_box = self.geo_data.qbx_center_to_target_box() + + target_box_to_src_sum = {} + target_box_to_nsrcs = {} + + for ictr in global_qbx_centers: + tgt_ibox = qbx_center_to_target_box[ictr] + + isrc_box_start, isrc_box_end = ( + self.trav.neighbor_source_boxes_starts[tgt_ibox:tgt_ibox+2]) + + if tgt_ibox not in target_box_to_src_sum: + nsrcs = 0 + src_sum = 0 + + for isrc_box in range(isrc_box_start, isrc_box_end): + src_ibox = self.trav.neighbor_source_boxes_lists[isrc_box] + + isrc_start = self.tree.box_source_starts[src_ibox] + isrc_end = (isrc_start + + self.tree.box_source_counts_nonchild[src_ibox]) + + src_sum += sum(src_weights[isrc_start:isrc_end]) + nsrcs += isrc_end - isrc_start + + target_box_to_src_sum[tgt_ibox] = src_sum + target_box_to_nsrcs[tgt_ibox] = nsrcs + + src_sum = target_box_to_src_sum[tgt_ibox] + nsrcs = target_box_to_nsrcs[tgt_ibox] + + ictr_tgt_start, ictr_tgt_end = center_to_tree_targets.starts[ictr:ictr+2] + + for ictr_tgt in range(ictr_tgt_start, ictr_tgt_end): + ctr_itgt = center_to_tree_targets.lists[ictr_tgt] + pot[0][ctr_itgt] = src_sum + + ops += (ictr_tgt_end - ictr_tgt_start) * nsrcs + + return pot, self.timing_future(ops) + +# }}} + + +# {{{ verify cost model + +class OpCountingTranslationCostModel(object): + """A translation cost model which assigns at cost of 1 to each operation.""" + + def __init__(self, dim, nlevels): + pass + + @staticmethod + def direct(): + return 1 + + p2qbxl = direct + p2p_tsqbx = direct + qbxl2p = direct + + @staticmethod + def p2l(level): + return 1 + + l2p = p2l + p2m = p2l + m2p = p2l + m2qbxl = p2l + l2qbxl = p2l + + @staticmethod + def m2m(src_level, tgt_level): + return 1 + + l2l = m2m + m2l = m2m + + +@pytest.mark.parametrize("dim, off_surface, use_target_specific_qbx", ( + (2, False, False), + (2, True, False), + (3, False, False), + (3, False, True), + (3, True, False), + (3, True, True))) +def test_cost_model_correctness(ctx_getter, dim, off_surface, + use_target_specific_qbx): + """Check that computed cost matches that of a constant-one FMM.""" + cl_ctx = ctx_getter() + queue = cl.CommandQueue(cl_ctx) + + cost_model = CLQBXCostModel( + queue, translation_cost_model_factory=OpCountingTranslationCostModel + ) + + lpot_source = get_lpot_source(queue, dim).copy( + cost_model=cost_model, + _use_target_specific_qbx=use_target_specific_qbx) + + # Construct targets. + if off_surface: + from pytential.target import PointsTarget + from boxtree.tools import make_uniform_particle_array + ntargets = 10 ** 3 + targets = PointsTarget( + make_uniform_particle_array(queue, ntargets, dim, np.float)) + target_discrs_and_qbx_sides = ((targets, 0),) + qbx_forced_limit = None + else: + targets = lpot_source.density_discr + target_discrs_and_qbx_sides = ((targets, 1),) + qbx_forced_limit = 1 + + # Construct bound op, run cost model. + sigma_sym = sym.var("sigma") + k_sym = LaplaceKernel(lpot_source.ambient_dim) + sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=qbx_forced_limit) + + op_S = bind((lpot_source, targets), sym_op_S) + sigma = get_density(queue, lpot_source) + + from pytools import one + modeled_time = one( + op_S.get_modeled_cost(queue, "constant_one", sigma=sigma).values() + ) + + # Run FMM with ConstantOneWrangler. This can't be done with pytential's + # high-level interface, so call the FMM driver directly. + from pytential.qbx.fmm import drive_fmm + geo_data = lpot_source.qbx_fmm_geometry_data( + target_discrs_and_qbx_sides=target_discrs_and_qbx_sides) + + wrangler = ConstantOneQBXExpansionWrangler( + queue, geo_data, use_target_specific_qbx) + nnodes = lpot_source.quad_stage2_density_discr.nnodes + src_weights = np.ones(nnodes) + + timing_data = {} + potential = drive_fmm(wrangler, src_weights, timing_data, + traversal=wrangler.trav)[0][geo_data.ncenters:] + + # Check constant one wrangler for correctness. + assert (potential == nnodes).all() + + # Check that the cost model matches the timing data returned by the + # constant one wrangler. + mismatches = [] + for stage in timing_data: + if stage not in modeled_time: + assert timing_data[stage]["ops_elapsed"] == 0 + else: + modeled_stage_time = cost_model.aggregate(modeled_time[stage]) + if timing_data[stage]["ops_elapsed"] != modeled_stage_time: + mismatches.append( + (stage, timing_data[stage]["ops_elapsed"], modeled_time[stage])) + + assert not mismatches, "\n".join(str(s) for s in mismatches) + +# }}} + + # You can test individual routines by typing # $ python test_cost_model.py 'test_routine()' -- GitLab From 1585a02f4707b108e2560dc0c999aa2aa0dc154a Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 14 Oct 2019 12:17:19 -0500 Subject: [PATCH 30/55] Add module-level doc for cost model --- pytential/qbx/cost.py | 47 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 4d3d5c6f..2a7e6e71 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -54,6 +54,53 @@ import logging logger = logging.getLogger(__name__) +__doc__ = """ + +This module helps predict the running time of each step of QBX, as an extension of +the similar module *boxtree.cost* in boxtree. + +:class:`QBXTranslationCostModel` describes the translation or evaluation cost of a +single operation. For example, *m2qbxl* describes the cost for translating a single +multipole expansion to a QBX local expansion. + +:class:`AbstractQBXCostModel` uses :class:`QBXTranslationCostModel` and +kernel-specific calibration parameter to compute the total cost of each step of QBX +in each box. There are two implementations of the interface +:class:`AbstractQBXCostModel`, namely :class:`CLQBXCostModel` using OpenCL and +:class:`PythonQBXCostModel` using pure Python. The kernel-specific calibration +parameter can be estimated using *estimate_knl_specific_calibration_params*. + +Translation Cost of a Single Operation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autoclass:: QBXTranslationCostModel + +.. autofunction:: pde_aware_translation_cost_model + +.. autofunction:: taylor_translation_cost_model + +Training (Generate Calibration Parameters) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automethod:: AbstractQBXCostModel.estimate_knl_specific_calibration_params + +Evaluating +^^^^^^^^^^ + +.. automethod:: AbstractQBXCostModel.get_qbx_modeled_cost + +Utilities +^^^^^^^^^ + +.. automethod:: boxtree.cost.AbstractFMMCostModel.aggregate + +.. automethod:: boxtree.cost.AbstractFMMCostModel.aggregate_stage_costs_per_box + +.. automethod:: AbstractQBXCostModel.get_constantone_calibration_params + +""" + + # {{{ translation cost model class QBXTranslationCostModel(FMMTranslationCostModel): -- GitLab From 71b90c5f32ace23c0822d12431c3647c9c607c5e Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 14 Oct 2019 12:30:39 -0500 Subject: [PATCH 31/55] Try to fix github azure CI --- .test-conda-env-py3-macos.yml | 9 ++- .test-conda-env-py3.yml | 9 ++- azure-pipelines.yml | 126 ++++++++++++++++++++++++++++++++++ requirements.txt | 2 +- 4 files changed, 143 insertions(+), 3 deletions(-) create mode 100644 azure-pipelines.yml diff --git a/.test-conda-env-py3-macos.yml b/.test-conda-env-py3-macos.yml index eea9ddd7..3c206095 100644 --- a/.test-conda-env-py3-macos.yml +++ b/.test-conda-env-py3-macos.yml @@ -19,4 +19,11 @@ dependencies: - clangdev - openmp - cython -# things not in here: loopy boxtree pymbolic meshmode sumpy + +- pip +- pip: + - git+https://github.com/gaohao95/boxtree@opencl-counter + - git+https://github.com/inducer/pymbolic + - git+https://github.com/inducer/loopy + - git+https://gitlab.tiker.net/inducer/sumpy + - git+https://github.com/inducer/meshmode diff --git a/.test-conda-env-py3.yml b/.test-conda-env-py3.yml index 8023391b..75e1c0eb 100644 --- a/.test-conda-env-py3.yml +++ b/.test-conda-env-py3.yml @@ -15,4 +15,11 @@ dependencies: - python-symengine=0.3.0 - pyfmmlib - cython -# things not in here: loopy boxtree pymbolic meshmode sumpy + +- pip +- pip: + - git+https://github.com/gaohao95/boxtree@opencl-counter + - git+https://github.com/inducer/pymbolic + - git+https://github.com/inducer/loopy + - git+https://gitlab.tiker.net/inducer/sumpy + - git+https://github.com/inducer/meshmode diff --git a/azure-pipelines.yml b/azure-pipelines.yml new file mode 100644 index 00000000..ddd78cf1 --- /dev/null +++ b/azure-pipelines.yml @@ -0,0 +1,126 @@ +jobs: +- + job: 'Python2' + pool: + vmImage: 'ubuntu-latest' + + # https://docs.microsoft.com/en-us/azure/devops/pipelines/process/phases?view=azure-devops&tabs=yaml#timeouts + # Maximum allowed as of Sep 5, 2019 + timeoutInMinutes: 360 + + steps: + - + script: | + set -e + sed 's/python=3/python=2.7/' .test-conda-env-py3.yml > .test-conda-env-py2-pre.yml + grep -v symengine .test-conda-env-py2-pre.yml > .test-conda-env-py2.yml + cat .test-conda-env-py2.yml + CONDA_ENVIRONMENT=.test-conda-env-py2.yml + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh + . ./build-and-test-py-project-within-miniconda.sh + + displayName: 'Pytest Conda' + - + task: PublishTestResults@2 + inputs: + testResultsFormat: 'JUnit' + testResultsFiles: 'test/pytest.xml' + +- + job: 'Python3' + pool: + vmImage: 'ubuntu-latest' + + # https://docs.microsoft.com/en-us/azure/devops/pipelines/process/phases?view=azure-devops&tabs=yaml#timeouts + # Maximum allowed as of Sep 5, 2019 + timeoutInMinutes: 360 + + steps: + - + script: | + set -e + grep -v symengine .test-conda-env-py3.yml > .test-conda-env.yml + CONDA_ENVIRONMENT=.test-conda-env.yml + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh + . ./build-and-test-py-project-within-miniconda.sh + + displayName: 'Pytest Conda' + + - + task: PublishTestResults@2 + inputs: + testResultsFormat: 'JUnit' + testResultsFiles: 'test/pytest.xml' + +- + job: 'Python3Symengine' + pool: + vmImage: 'ubuntu-latest' + + # https://docs.microsoft.com/en-us/azure/devops/pipelines/process/phases?view=azure-devops&tabs=yaml#timeouts + # Maximum allowed as of Sep 5, 2019 + timeoutInMinutes: 360 + + steps: + - + script: | + set -e + CONDA_ENVIRONMENT=.test-conda-env-py3.yml + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh + . ./build-and-test-py-project-within-miniconda.sh + + displayName: 'Pytest Conda' + + - + task: PublishTestResults@2 + inputs: + testResultsFormat: 'JUnit' + testResultsFiles: 'test/pytest.xml' + +- + job: 'Flake8' + pool: + vmImage: 'ubuntu-latest' + strategy: + matrix: + Python37: + python.version: '3.7' + + steps: + - + task: UsePythonVersion@0 + inputs: + versionSpec: '$(python.version)' + + - + script: | + set -e + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh + . ./prepare-and-run-flake8.sh pytential test + + displayName: 'Flake8' + +- + job: 'Pylint' + pool: + vmImage: 'ubuntu-latest' + + steps: + - + script: | + set -e + CONDA_ENVIRONMENT=.test-conda-env-py3-pylint.yml + sed 's/numpy/numpy=1.15/' .test-conda-env-py3.yml > $CONDA_ENVIRONMENT + echo "- matplotlib" >> $CONDA_ENVIRONMENT + echo "-------------------------------------------" + cat $CONDA_ENVIRONMENT + echo "-------------------------------------------" + USE_CONDA_BUILD=1 + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-pylint.sh + + # Pylint won't find the Cython bits without this + PROJECT_INSTALL_FLAGS="--editable" + + . ./prepare-and-run-pylint.sh pytential test/test_*.py + + displayName: 'Pylint' diff --git a/requirements.txt b/requirements.txt index bbbf6dc1..f13a5dc4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ git+https://github.com/inducer/modepy git+https://github.com/inducer/pyopencl git+https://github.com/inducer/islpy git+https://github.com/inducer/loopy -git+https://gitlab.tiker.net/inducer/boxtree@opencl-counter +git+https://github.com/gaohao95/boxtree@opencl-counter git+https://github.com/inducer/meshmode git+https://gitlab.tiker.net/inducer/sumpy git+https://gitlab.tiker.net/inducer/pyfmmlib -- GitLab From 1c4caa1d11f7a3cebc09cd7ce1f1c4a1c1942190 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Fri, 18 Oct 2019 01:02:02 -0500 Subject: [PATCH 32/55] Separate per-stage cost and per-box cost --- examples/cost.py | 12 ++-- pytential/qbx/__init__.py | 5 +- pytential/qbx/cost.py | 114 +++++++++++++++++++++++++++----- pytential/symbolic/execution.py | 11 +-- test/test_cost_model.py | 24 ++++--- 5 files changed, 129 insertions(+), 37 deletions(-) diff --git a/examples/cost.py b/examples/cost.py index 1ad2a742..8fa12b78 100644 --- a/examples/cost.py +++ b/examples/cost.py @@ -102,7 +102,9 @@ def calibrate_cost_model(ctx): bound_op = get_bound_op(lpot_source) sigma = get_test_density(queue, lpot_source) - cost_S = bound_op.get_modeled_cost(queue, "constant_one", sigma=sigma) + modeled_cost = bound_op.get_modeled_cost( + queue, "constant_one", False, sigma=sigma + ) # Warm-up run. bound_op.eval(queue, {"sigma": sigma}) @@ -111,7 +113,7 @@ def calibrate_cost_model(ctx): timing_data = {} bound_op.eval(queue, {"sigma": sigma}, timing_data=timing_data) - model_results.append(cost_S) + model_results.append(modeled_cost) timing_results.append(timing_data) calibration_params = cost_model.estimate_knl_specific_calibration_params( @@ -130,7 +132,9 @@ def test_cost_model(ctx, calibration_params): bound_op = get_bound_op(lpot_source) sigma = get_test_density(queue, lpot_source) - cost_S = bound_op.get_modeled_cost(queue, calibration_params, sigma=sigma) + cost_S = bound_op.get_modeled_cost( + queue, calibration_params, False, sigma=sigma + ) model_result = one(cost_S.values()) # Warm-up run. @@ -155,7 +159,7 @@ def test_cost_model(ctx, calibration_params): row = [ stage, "%.2f" % timing_result[stage], - "%.2f" % cost_model.aggregate(model_result[stage]) + "%.2f" % model_result[stage] ] table.add_row(row) diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index cef8eed2..bd9dfd44 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -560,7 +560,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): queue, insn, bound_expr, evaluate, func, extra_args) def cost_model_compute_potential_insn(self, queue, insn, bound_expr, evaluate, - calibration_params): + calibration_params, per_box): """Using :attr:`cost_model`, evaluate the cost of executing *insn*. Cost model results are gathered in :attr:`pytential.symbolic.execution.BoundExpression.modeled_cost` @@ -575,7 +575,8 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): wrangler, strengths, geo_data, kernel, kernel_arguments): del strengths cost_model_result = self.cost_model( - geo_data, kernel, kernel_arguments, calibration_params + geo_data, kernel, kernel_arguments, calibration_params, + per_box=per_box ) return wrangler.full_output_zeros(), cost_model_result diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 2a7e6e71..b930197a 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -87,15 +87,15 @@ Training (Generate Calibration Parameters) Evaluating ^^^^^^^^^^ -.. automethod:: AbstractQBXCostModel.get_qbx_modeled_cost +.. automethod:: AbstractQBXCostModel.qbx_modeled_cost_per_stage + +.. automethod:: AbstractQBXCostModel.qbx_modeled_cost_per_box Utilities ^^^^^^^^^ .. automethod:: boxtree.cost.AbstractFMMCostModel.aggregate -.. automethod:: boxtree.cost.AbstractFMMCostModel.aggregate_stage_costs_per_box - .. automethod:: AbstractQBXCostModel.get_constantone_calibration_params """ @@ -297,8 +297,8 @@ class AbstractQBXCostModel(AbstractFMMCostModel): return cost_factors - def get_qbx_modeled_cost(self, geo_data, kernel, kernel_arguments, - calibration_params): + def qbx_modeled_cost_per_box(self, geo_data, kernel, kernel_arguments, + calibration_params): # FIXME: This should support target filtering. lpot_source = geo_data.lpot_source use_tsqbx = lpot_source._use_target_specific_qbx @@ -306,6 +306,7 @@ class AbstractQBXCostModel(AbstractFMMCostModel): traversal = geo_data.traversal() nqbtl = geo_data.non_qbx_box_target_lists() box_target_counts_nonchild = nqbtl.box_target_counts_nonchild + target_boxes = traversal.target_boxes # FIXME: We can avoid using *kernel* and *kernel_arguments* if we talk # to the wrangler to obtain the FMM order (see also @@ -337,7 +338,7 @@ class AbstractQBXCostModel(AbstractFMMCostModel): ndirect_sources_per_target_box = \ self.get_ndirect_sources_per_target_box(traversal) - result = self.get_fmm_modeled_cost( + result = self.fmm_modeled_cost_per_box( traversal, fmm_level_to_order, ndirect_sources_per_target_box, calibration_params, @@ -345,33 +346,112 @@ class AbstractQBXCostModel(AbstractFMMCostModel): ) if use_tsqbx: - result["eval_target_specific_qbx_locals"] = \ - self.process_eval_target_specific_qbxl( - geo_data, translation_cost["p2p_tsqbx_cost"], - ndirect_sources_per_target_box=ndirect_sources_per_target_box - ) + result[target_boxes] += self.process_eval_target_specific_qbxl( + geo_data, translation_cost["p2p_tsqbx_cost"], + ndirect_sources_per_target_box + ) else: - result["form_global_qbx_locals"] = self.process_form_qbxl( + result[target_boxes] += self.process_form_qbxl( geo_data, translation_cost["p2qbxl_cost"], ndirect_sources_per_target_box ) - result["translate_box_multipoles_to_qbx_local"] = self.process_m2qbxl( + result[target_boxes] += self.process_m2qbxl( geo_data, translation_cost["m2qbxl_cost"] ) - result["translate_box_local_to_qbx_local"] = self.process_l2qbxl( + result[target_boxes] += self.process_l2qbxl( geo_data, translation_cost["l2qbxl_cost"] ) - result["eval_qbx_expansions"] = self.process_eval_qbxl( + result[target_boxes] += self.process_eval_qbxl( geo_data, translation_cost["qbxl2p_cost"] ) return result + def qbx_modeled_cost_per_stage(self, geo_data, kernel, kernel_arguments, + calibration_params): + # FIXME: This should support target filtering. + lpot_source = geo_data.lpot_source + use_tsqbx = lpot_source._use_target_specific_qbx + tree = geo_data.tree() + traversal = geo_data.traversal() + nqbtl = geo_data.non_qbx_box_target_lists() + box_target_counts_nonchild = nqbtl.box_target_counts_nonchild + + # FIXME: We can avoid using *kernel* and *kernel_arguments* if we talk + # to the wrangler to obtain the FMM order (see also + # https://gitlab.tiker.net/inducer/boxtree/issues/25) + fmm_level_to_order = [ + lpot_source.fmm_level_to_order( + kernel.get_base_kernel(), kernel_arguments, tree, ilevel + ) for ilevel in range(tree.nlevels) + ] + + # {{{ Construct parameters + + params = calibration_params.copy() + params.update(dict(p_qbx=lpot_source.qbx_order)) + + for ilevel in range(tree.nlevels): + params["p_fmm_lev%d" % ilevel] = fmm_level_to_order[ilevel] + + # }}} + + xlat_cost = self.translation_cost_model_factory( + tree.dimensions, tree.nlevels + ) + + translation_cost = self.qbx_cost_factors_for_kernels_from_model( + tree.nlevels, xlat_cost, params + ) + + ndirect_sources_per_target_box = \ + self.get_ndirect_sources_per_target_box(traversal) + + result = self.fmm_modeled_cost_per_stage( + traversal, fmm_level_to_order, + ndirect_sources_per_target_box, + calibration_params, + box_target_counts_nonchild=box_target_counts_nonchild + ) + + if use_tsqbx: + result["eval_target_specific_qbx_locals"] = self.aggregate( + self.process_eval_target_specific_qbxl( + geo_data, translation_cost["p2p_tsqbx_cost"], + ndirect_sources_per_target_box=ndirect_sources_per_target_box + ) + ) + else: + result["form_global_qbx_locals"] = self.aggregate( + self.process_form_qbxl( + geo_data, translation_cost["p2qbxl_cost"], + ndirect_sources_per_target_box + ) + ) + + result["translate_box_multipoles_to_qbx_local"] = self.aggregate( + self.process_m2qbxl(geo_data, translation_cost["m2qbxl_cost"]) + ) + + result["translate_box_local_to_qbx_local"] = self.aggregate( + self.process_l2qbxl(geo_data, translation_cost["l2qbxl_cost"]) + ) + + result["eval_qbx_expansions"] = self.aggregate( + self.process_eval_qbxl(geo_data, translation_cost["qbxl2p_cost"]) + ) + + return result + def __call__(self, *args, **kwargs): - return self.get_qbx_modeled_cost(*args, **kwargs) + per_box = kwargs.pop('per_box', True) + if per_box: + return self.qbx_modeled_cost_per_box(*args, **kwargs) + else: + return self.qbx_modeled_cost_per_stage(*args, **kwargs) @staticmethod def get_constantone_calibration_params(): @@ -417,7 +497,7 @@ class AbstractQBXCostModel(AbstractFMMCostModel): :arg model_results: a :class:`list` of modeled costs. Each model cost can be obtained from `BoundExpression.get_modeled_cost` with "constant_one" for - argument `calibration_params`. + argument `calibration_params`, and `per_box` set to *False*. :arg timing_results: a :class:`list` of timing data. Each timing data can be obtained from `BoundExpression.eval`. :arg time_field_name: a :class:`str`, the field name from the timing result. diff --git a/pytential/symbolic/execution.py b/pytential/symbolic/execution.py index db5d1e7e..74dbcda7 100644 --- a/pytential/symbolic/execution.py +++ b/pytential/symbolic/execution.py @@ -336,7 +336,7 @@ class CostModelMapper(EvaluationMapperBase): """ def __init__(self, bound_expr, queue, - knl_specific_calibration_params, + knl_specific_calibration_params, per_box, context=None, target_geometry=None, target_points=None, target_normals=None, target_tangents=None): @@ -351,6 +351,7 @@ class CostModelMapper(EvaluationMapperBase): self.knl_specific_calibration_params = knl_specific_calibration_params self.modeled_cost = {} + self.per_box = per_box def exec_compute_potential_insn(self, queue, insn, bound_expr, evaluate): source = bound_expr.places.get_geometry(insn.source) @@ -364,7 +365,7 @@ class CostModelMapper(EvaluationMapperBase): calibration_params = self.knl_specific_calibration_params[knls] result, cost_model_result = source.cost_model_compute_potential_insn( - queue, insn, bound_expr, evaluate, calibration_params + queue, insn, bound_expr, evaluate, calibration_params, self.per_box ) # The compiler ensures this. @@ -670,8 +671,10 @@ class BoundExpression(object): def get_discretization(self, where): return self.places.get_discretization(where) - def get_modeled_cost(self, queue, calibration_params, **args): - cost_model_mapper = CostModelMapper(self, queue, calibration_params, args) + def get_modeled_cost(self, queue, calibration_params, per_box, **args): + cost_model_mapper = CostModelMapper( + self, queue, calibration_params, per_box, args + ) self.code.execute(cost_model_mapper) return cost_model_mapper.get_modeled_cost() diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 2dde059f..93e6a2a1 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -357,11 +357,14 @@ def test_timing_data_gathering(ctx_getter): # {{{ test cost model -@pytest.mark.parametrize("dim, use_target_specific_qbx", ( - (2, False), - (3, False), - (3, True))) -def test_cost_model(ctx_getter, dim, use_target_specific_qbx): +@pytest.mark.parametrize("dim, use_target_specific_qbx, per_box", ( + (2, False, False), + (3, False, False), + (3, True, False), + (2, False, True), + (3, False, True), + (3, True, True))) +def test_cost_model(ctx_getter, dim, use_target_specific_qbx, per_box): """Test that cost model gathering can execute successfully.""" cl_ctx = ctx_getter() queue = cl.CommandQueue(cl_ctx) @@ -379,14 +382,16 @@ def test_cost_model(ctx_getter, dim, use_target_specific_qbx): sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) op_S = bind(lpot_source, sym_op_S) - cost_S = op_S.get_modeled_cost(queue, "constant_one", sigma=sigma) + cost_S = op_S.get_modeled_cost(queue, "constant_one", per_box, sigma=sigma) assert len(cost_S) == 1 sym_op_S_plus_D = ( sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) + sym.D(k_sym, sigma_sym)) op_S_plus_D = bind(lpot_source, sym_op_S_plus_D) - cost_S_plus_D = op_S_plus_D.get_modeled_cost(queue, "constant_one", sigma=sigma) + cost_S_plus_D = op_S_plus_D.get_modeled_cost( + queue, "constant_one", per_box, sigma=sigma + ) assert len(cost_S_plus_D) == 2 # }}} @@ -647,7 +652,7 @@ def test_cost_model_correctness(ctx_getter, dim, off_surface, from pytools import one modeled_time = one( - op_S.get_modeled_cost(queue, "constant_one", sigma=sigma).values() + op_S.get_modeled_cost(queue, "constant_one", False, sigma=sigma).values() ) # Run FMM with ConstantOneWrangler. This can't be done with pytential's @@ -675,8 +680,7 @@ def test_cost_model_correctness(ctx_getter, dim, off_surface, if stage not in modeled_time: assert timing_data[stage]["ops_elapsed"] == 0 else: - modeled_stage_time = cost_model.aggregate(modeled_time[stage]) - if timing_data[stage]["ops_elapsed"] != modeled_stage_time: + if timing_data[stage]["ops_elapsed"] != modeled_time[stage]: mismatches.append( (stage, timing_data[stage]["ops_elapsed"], modeled_time[stage])) -- GitLab From c61f16d29f5ea09728533f8ec28e7bf4dbad7e4a Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Fri, 18 Oct 2019 17:12:57 -0500 Subject: [PATCH 33/55] Add metadata gathering and bug fix --- examples/cost.py | 8 ++--- pytential/qbx/__init__.py | 4 +-- pytential/qbx/cost.py | 55 +++++++++++++++++++++++++---- pytential/symbolic/execution.py | 15 +++++--- test/test_cost_model.py | 62 +++++++++++++++++++++++++++++---- 5 files changed, 120 insertions(+), 24 deletions(-) diff --git a/examples/cost.py b/examples/cost.py index 8fa12b78..d45dc201 100644 --- a/examples/cost.py +++ b/examples/cost.py @@ -102,8 +102,8 @@ def calibrate_cost_model(ctx): bound_op = get_bound_op(lpot_source) sigma = get_test_density(queue, lpot_source) - modeled_cost = bound_op.get_modeled_cost( - queue, "constant_one", False, sigma=sigma + modeled_cost, _ = bound_op.get_modeled_cost( + queue, "constant_one", per_box=False, sigma=sigma ) # Warm-up run. @@ -132,8 +132,8 @@ def test_cost_model(ctx, calibration_params): bound_op = get_bound_op(lpot_source) sigma = get_test_density(queue, lpot_source) - cost_S = bound_op.get_modeled_cost( - queue, calibration_params, False, sigma=sigma + cost_S, _ = bound_op.get_modeled_cost( + queue, calibration_params, per_box=False, sigma=sigma ) model_result = one(cost_S.values()) diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index bd9dfd44..9efcd9d5 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -574,11 +574,11 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): def drive_cost_model( wrangler, strengths, geo_data, kernel, kernel_arguments): del strengths - cost_model_result = self.cost_model( + cost_model_result, metadata = self.cost_model( geo_data, kernel, kernel_arguments, calibration_params, per_box=per_box ) - return wrangler.full_output_zeros(), cost_model_result + return wrangler.full_output_zeros(), (cost_model_result, metadata) return self._dispatch_compute_potential_insn( queue, insn, bound_expr, evaluate, diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index b930197a..5fb0691a 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -297,6 +297,24 @@ class AbstractQBXCostModel(AbstractFMMCostModel): return cost_factors + @staticmethod + def gather_metadata(geo_data, fmm_level_to_order): + lpot_source = geo_data.lpot_source + tree = geo_data.tree() + + metadata = { + "p_qbx": lpot_source.qbx_order, + "nlevels": tree.nlevels, + "nsources": tree.nsources, + "ntargets": tree.ntargets, + "ncenters": geo_data.ncenters + } + + for level in range(tree.nlevels): + metadata["p_fmm_lev%d" % level] = fmm_level_to_order[level] + + return metadata + def qbx_modeled_cost_per_box(self, geo_data, kernel, kernel_arguments, calibration_params): # FIXME: This should support target filtering. @@ -368,7 +386,9 @@ class AbstractQBXCostModel(AbstractFMMCostModel): geo_data, translation_cost["qbxl2p_cost"] ) - return result + metadata = self.gather_metadata(geo_data, fmm_level_to_order) + + return result, metadata def qbx_modeled_cost_per_stage(self, geo_data, kernel, kernel_arguments, calibration_params): @@ -444,7 +464,9 @@ class AbstractQBXCostModel(AbstractFMMCostModel): self.process_eval_qbxl(geo_data, translation_cost["qbxl2p_cost"]) ) - return result + metadata = self.gather_metadata(geo_data, fmm_level_to_order) + + return result, metadata def __call__(self, *args, **kwargs): per_box = kwargs.pop('per_box', True) @@ -874,8 +896,10 @@ class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): return neval_qbxl * qbxl2p_cost - def get_qbx_modeled_cost(self, geo_data, kernel, kernel_arguments, - calibration_params): + def qbx_modeled_cost_per_box(self, geo_data, kernel, kernel_arguments, + calibration_params): + """This function additionally transfers geo_data to host if necessary + """ from pytential.qbx.utils import ToHostTransferredGeoDataWrapper from pytential.qbx.geometry import QBXFMMGeometryData @@ -885,9 +909,26 @@ class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): queue = cl.CommandQueue(geo_data.cl_context) geo_data = ToHostTransferredGeoDataWrapper(queue, geo_data) - AbstractQBXCostModel.get_qbx_modeled_cost( - self, geo_data, kernel, kernel_arguments, calibration_params - ) + return AbstractQBXCostModel.qbx_modeled_cost_per_box( + self, geo_data, kernel, kernel_arguments, calibration_params + ) + + def qbx_modeled_cost_per_stage(self, geo_data, kernel, kernel_arguments, + calibration_params): + """This function additionally transfers geo_data to host if necessary + """ + from pytential.qbx.utils import ToHostTransferredGeoDataWrapper + from pytential.qbx.geometry import QBXFMMGeometryData + + if not isinstance(geo_data, ToHostTransferredGeoDataWrapper): + assert isinstance(geo_data, QBXFMMGeometryData) + + queue = cl.CommandQueue(geo_data.cl_context) + geo_data = ToHostTransferredGeoDataWrapper(queue, geo_data) + + return AbstractQBXCostModel.qbx_modeled_cost_per_stage( + self, geo_data, kernel, kernel_arguments, calibration_params + ) # }}} diff --git a/pytential/symbolic/execution.py b/pytential/symbolic/execution.py index 74dbcda7..62959a00 100644 --- a/pytential/symbolic/execution.py +++ b/pytential/symbolic/execution.py @@ -351,6 +351,7 @@ class CostModelMapper(EvaluationMapperBase): self.knl_specific_calibration_params = knl_specific_calibration_params self.modeled_cost = {} + self.metadata = {} self.per_box = per_box def exec_compute_potential_insn(self, queue, insn, bound_expr, evaluate): @@ -364,19 +365,21 @@ class CostModelMapper(EvaluationMapperBase): else: calibration_params = self.knl_specific_calibration_params[knls] - result, cost_model_result = source.cost_model_compute_potential_insn( - queue, insn, bound_expr, evaluate, calibration_params, self.per_box - ) + result, (cost_model_result, metadata) = \ + source.cost_model_compute_potential_insn( + queue, insn, bound_expr, evaluate, calibration_params, self.per_box, + ) # The compiler ensures this. assert insn not in self.modeled_cost self.modeled_cost[insn] = cost_model_result + self.metadata[insn] = metadata return result def get_modeled_cost(self): - return self.modeled_cost + return self.modeled_cost, self.metadata # }}} @@ -671,7 +674,9 @@ class BoundExpression(object): def get_discretization(self, where): return self.places.get_discretization(where) - def get_modeled_cost(self, queue, calibration_params, per_box, **args): + def get_modeled_cost(self, queue, calibration_params, **args): + per_box = args.pop('per_box', True) + cost_model_mapper = CostModelMapper( self, queue, calibration_params, per_box, args ) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 93e6a2a1..a170964d 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -34,8 +34,9 @@ import pyopencl as cl from boxtree.tools import ConstantOneExpansionWrangler from pytential.qbx import QBXLayerPotentialSource -from sumpy.kernel import LaplaceKernel +from sumpy.kernel import LaplaceKernel, HelmholtzKernel from pytential import bind, sym, norm # noqa +from pytools import one from pytential.qbx.cost import ( CLQBXCostModel, PythonQBXCostModel, pde_aware_translation_cost_model @@ -382,21 +383,69 @@ def test_cost_model(ctx_getter, dim, use_target_specific_qbx, per_box): sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) op_S = bind(lpot_source, sym_op_S) - cost_S = op_S.get_modeled_cost(queue, "constant_one", per_box, sigma=sigma) + cost_S, _ = op_S.get_modeled_cost( + queue, "constant_one", per_box=per_box, sigma=sigma + ) assert len(cost_S) == 1 sym_op_S_plus_D = ( sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) + sym.D(k_sym, sigma_sym)) op_S_plus_D = bind(lpot_source, sym_op_S_plus_D) - cost_S_plus_D = op_S_plus_D.get_modeled_cost( - queue, "constant_one", per_box, sigma=sigma + cost_S_plus_D, _ = op_S_plus_D.get_modeled_cost( + queue, "constant_one", per_box=per_box, sigma=sigma ) assert len(cost_S_plus_D) == 2 # }}} +# {{{ test cost model metadata gathering + +def test_cost_model_metadata_gathering(ctx_getter): + """Test that the cost model correctly gathers metadata.""" + cl_ctx = ctx_getter() + queue = cl.CommandQueue(cl_ctx) + + from sumpy.expansion.level_to_order import SimpleExpansionOrderFinder + + fmm_level_to_order = SimpleExpansionOrderFinder(tol=1e-5) + + lpot_source = get_lpot_source(queue, 2).copy( + fmm_level_to_order=fmm_level_to_order) + + sigma = get_density(queue, lpot_source) + + sigma_sym = sym.var("sigma") + k_sym = HelmholtzKernel(2, "k") + k = 2 + + sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1, k=sym.var("k")) + op_S = bind(lpot_source, sym_op_S) + + _, metadata = op_S.get_modeled_cost( + queue, "constant_one", sigma=sigma, k=k, per_box=False, return_metadata=True) + metadata = one(metadata.values()) + + geo_data = lpot_source.qbx_fmm_geometry_data( + target_discrs_and_qbx_sides=((lpot_source.density_discr, 1),)) + + tree = geo_data.tree() + + assert metadata["p_qbx"] == QBX_ORDER + assert metadata["nlevels"] == tree.nlevels + assert metadata["nsources"] == tree.nsources + assert metadata["ntargets"] == tree.ntargets + assert metadata["ncenters"] == geo_data.ncenters + + for level in range(tree.nlevels): + assert ( + metadata["p_fmm_lev%d" % level] + == fmm_level_to_order(k_sym, {"k": 2}, tree, level)) + +# }}} + + # {{{ constant one wrangler class ConstantOneQBXExpansionWrangler(ConstantOneExpansionWrangler): @@ -651,9 +700,10 @@ def test_cost_model_correctness(ctx_getter, dim, off_surface, sigma = get_density(queue, lpot_source) from pytools import one - modeled_time = one( - op_S.get_modeled_cost(queue, "constant_one", False, sigma=sigma).values() + modeled_time, _ = op_S.get_modeled_cost( + queue, "constant_one", per_box=False, sigma=sigma ) + modeled_time = one(modeled_time.values()) # Run FMM with ConstantOneWrangler. This can't be done with pytential's # high-level interface, so call the FMM driver directly. -- GitLab From f005a7a2bcd1e6e71d9c1d90b858d5729529a9a7 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sat, 19 Oct 2019 12:20:00 -0500 Subject: [PATCH 34/55] Add varying order test case --- test/test_cost_model.py | 59 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index a170964d..6a4d724b 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -739,6 +739,65 @@ def test_cost_model_correctness(ctx_getter, dim, off_surface, # }}} +# {{{ test order varying by level + +def test_cost_model_order_varying_by_level(ctx_getter): + """For FMM order varying by level, this checks to ensure that the costs are + different. The varying-level case should have larger cost. + """ + + cl_ctx = ctx_getter() + queue = cl.CommandQueue(cl_ctx) + + # {{{ constant level to order + + def level_to_order_constant(kernel, kernel_args, tree, level): + return 1 + + lpot_source = get_lpot_source(queue, 2).copy( + cost_model=CLQBXCostModel(queue), + fmm_level_to_order=level_to_order_constant) + + sigma_sym = sym.var("sigma") + + k_sym = LaplaceKernel(2) + sym_op = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) + + sigma = get_density(queue, lpot_source) + + cost_constant, metadata = bind(lpot_source, sym_op).get_modeled_cost( + queue, "constant_one", per_box=False, sigma=sigma + ) + + cost_constant = one(cost_constant.values()) + metadata = one(metadata.values()) + + # }}} + + # {{{ varying level to order + + def level_to_order_varying(kernel, kernel_args, tree, level): + return metadata["nlevels"] - level + + lpot_source = get_lpot_source(queue, 2).copy( + cost_model=CLQBXCostModel(queue), + fmm_level_to_order=level_to_order_varying) + + sigma = get_density(queue, lpot_source) + + cost_varying, _ = bind(lpot_source, sym_op).get_modeled_cost( + queue, "constant_one", per_box=False, sigma=sigma + ) + + cost_varying = one(cost_varying.values()) + + # }}} + + assert sum(cost_varying.values()) > sum(cost_constant.values()) + +# }}} + + # You can test individual routines by typing # $ python test_cost_model.py 'test_routine()' -- GitLab From 9d11c8dc799750e13e56a5a4ebdf1ac19015a5e0 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 21 Oct 2019 00:35:55 -0500 Subject: [PATCH 35/55] Compare per-box cost to per-stage cost --- test/test_cost_model.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 6a4d724b..55fb8eee 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -736,6 +736,26 @@ def test_cost_model_correctness(ctx_getter, dim, off_surface, assert not mismatches, "\n".join(str(s) for s in mismatches) + # {{{ Test per-box cost + + total_cost = 0.0 + for stage in timing_data: + total_cost += timing_data[stage]["ops_elapsed"] + + per_box_cost, _ = op_S.get_modeled_cost( + queue, "constant_one", per_box=True, sigma=sigma + ) + per_box_cost = one(per_box_cost.values()) + + total_aggregate_cost = cost_model.aggregate(per_box_cost) + assert total_cost == ( + total_aggregate_cost + + modeled_time["coarsen_multipoles"] + + modeled_time["refine_locals"] + ) + + # }}} + # }}} -- GitLab From 9e8bc029b474f8783f6e2ff2ef3091ef3a85ff7a Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sat, 11 Jan 2020 15:08:15 -0800 Subject: [PATCH 36/55] Make cost model compatible with upstream changes --- examples/cost.py | 33 ++++++++- pytential/qbx/__init__.py | 5 +- pytential/qbx/cost.py | 115 +++++++++++++++++--------------- pytential/symbolic/execution.py | 2 +- test/test_cost_model.py | 22 +++--- 5 files changed, 106 insertions(+), 71 deletions(-) diff --git a/examples/cost.py b/examples/cost.py index d45dc201..088ce5b6 100644 --- a/examples/cost.py +++ b/examples/cost.py @@ -1,10 +1,37 @@ +from __future__ import division, print_function + +__copyright__ = """ + Copyright (C) 2018 Matt Wala + Copyright (C) 2019 Hao Gao +""" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + """Calibrates a cost model and reports on the accuracy.""" import pyopencl as cl import numpy as np from pytential import sym, bind -from pytential.qbx.cost import CLQBXCostModel +from pytential.qbx.cost import QBXCostModel from pytools import one @@ -92,7 +119,7 @@ def get_test_density(queue, lpot_source): def calibrate_cost_model(ctx): queue = cl.CommandQueue(ctx) - cost_model = CLQBXCostModel(queue) + cost_model = QBXCostModel(queue) model_results = [] timing_results = [] @@ -125,7 +152,7 @@ def calibrate_cost_model(ctx): def test_cost_model(ctx, calibration_params): queue = cl.CommandQueue(ctx) - cost_model = CLQBXCostModel(queue) + cost_model = QBXCostModel(queue) for lpot_source in test_geometries(queue): lpot_source = lpot_source.copy(cost_model=cost_model) diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index 9efcd9d5..0ccb9fb2 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -225,8 +225,9 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): self.geometry_data_inspector = geometry_data_inspector if cost_model is None: - from pytential.qbx.cost import PythonQBXCostModel - cost_model = PythonQBXCostModel() + from pytential.qbx.cost import QBXCostModel + with cl.CommandQueue(self.cl_context) as queue: + cost_model = QBXCostModel(queue) self.cost_model = cost_model diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 5fb0691a..e47cde02 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -40,7 +40,7 @@ from functools import partial import sys from boxtree.cost import ( - FMMTranslationCostModel, AbstractFMMCostModel, PythonFMMCostModel, CLFMMCostModel + FMMTranslationCostModel, AbstractFMMCostModel, FMMCostModel, _PythonFMMCostModel ) from abc import abstractmethod @@ -65,38 +65,39 @@ multipole expansion to a QBX local expansion. :class:`AbstractQBXCostModel` uses :class:`QBXTranslationCostModel` and kernel-specific calibration parameter to compute the total cost of each step of QBX -in each box. There are two implementations of the interface -:class:`AbstractQBXCostModel`, namely :class:`CLQBXCostModel` using OpenCL and -:class:`PythonQBXCostModel` using pure Python. The kernel-specific calibration -parameter can be estimated using *estimate_knl_specific_calibration_params*. +in each box. :class:`QBXCostModel` is one implementation of +:class:`AbstractQBXCostModel` using OpenCL. + +:file:`examples/cost.py` demostrates how the calibration and evaluation are +performed. Translation Cost of a Single Operation ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: QBXTranslationCostModel -.. autofunction:: pde_aware_translation_cost_model +.. autofunction:: make_pde_aware_translation_cost_model -.. autofunction:: taylor_translation_cost_model +.. autofunction:: make_taylor_translation_cost_model -Training (Generate Calibration Parameters) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Calibration (Generate Calibration Parameters) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. automethod:: AbstractQBXCostModel.estimate_knl_specific_calibration_params Evaluating ^^^^^^^^^^ -.. automethod:: AbstractQBXCostModel.qbx_modeled_cost_per_stage +.. automethod:: AbstractQBXCostModel.qbx_cost_per_stage -.. automethod:: AbstractQBXCostModel.qbx_modeled_cost_per_box +.. automethod:: AbstractQBXCostModel.qbx_cost_per_box Utilities ^^^^^^^^^ -.. automethod:: boxtree.cost.AbstractFMMCostModel.aggregate +.. automethod:: boxtree.cost.AbstractFMMCostModel.aggregate_over_boxes -.. automethod:: AbstractQBXCostModel.get_constantone_calibration_params +.. automethod:: AbstractQBXCostModel.get_unit_calibration_params """ @@ -138,7 +139,7 @@ class QBXTranslationCostModel(FMMTranslationCostModel): # {{{ translation cost model factories -def pde_aware_translation_cost_model(dim, nlevels): +def make_pde_aware_translation_cost_model(dim, nlevels): """Create a cost model for FMM translation operators that make use of the knowledge that the potential satisfies a PDE. """ @@ -159,7 +160,7 @@ def pde_aware_translation_cost_model(dim, nlevels): uses_point_and_shoot=uses_point_and_shoot) -def taylor_translation_cost_model(dim, nlevels): +def make_taylor_translation_cost_model(dim, nlevels): """Create a cost model for FMM translation based on Taylor expansions in Cartesian coordinates. """ @@ -315,8 +316,8 @@ class AbstractQBXCostModel(AbstractFMMCostModel): return metadata - def qbx_modeled_cost_per_box(self, geo_data, kernel, kernel_arguments, - calibration_params): + def qbx_cost_per_box(self, geo_data, kernel, kernel_arguments, + calibration_params): # FIXME: This should support target filtering. lpot_source = geo_data.lpot_source use_tsqbx = lpot_source._use_target_specific_qbx @@ -356,10 +357,11 @@ class AbstractQBXCostModel(AbstractFMMCostModel): ndirect_sources_per_target_box = \ self.get_ndirect_sources_per_target_box(traversal) - result = self.fmm_modeled_cost_per_box( + # get FMM cost per box from parent class + result = self.cost_per_box( traversal, fmm_level_to_order, - ndirect_sources_per_target_box, calibration_params, + ndirect_sources_per_target_box=ndirect_sources_per_target_box, box_target_counts_nonchild=box_target_counts_nonchild ) @@ -390,8 +392,8 @@ class AbstractQBXCostModel(AbstractFMMCostModel): return result, metadata - def qbx_modeled_cost_per_stage(self, geo_data, kernel, kernel_arguments, - calibration_params): + def qbx_cost_per_stage(self, geo_data, kernel, kernel_arguments, + calibration_params): # FIXME: This should support target filtering. lpot_source = geo_data.lpot_source use_tsqbx = lpot_source._use_target_specific_qbx @@ -430,37 +432,38 @@ class AbstractQBXCostModel(AbstractFMMCostModel): ndirect_sources_per_target_box = \ self.get_ndirect_sources_per_target_box(traversal) - result = self.fmm_modeled_cost_per_stage( + # get FMM per-stage cost from parent class + result = self.cost_per_stage( traversal, fmm_level_to_order, - ndirect_sources_per_target_box, calibration_params, + ndirect_sources_per_target_box=ndirect_sources_per_target_box, box_target_counts_nonchild=box_target_counts_nonchild ) if use_tsqbx: - result["eval_target_specific_qbx_locals"] = self.aggregate( + result["eval_target_specific_qbx_locals"] = self.aggregate_over_boxes( self.process_eval_target_specific_qbxl( geo_data, translation_cost["p2p_tsqbx_cost"], ndirect_sources_per_target_box=ndirect_sources_per_target_box ) ) else: - result["form_global_qbx_locals"] = self.aggregate( + result["form_global_qbx_locals"] = self.aggregate_over_boxes( self.process_form_qbxl( geo_data, translation_cost["p2qbxl_cost"], ndirect_sources_per_target_box ) ) - result["translate_box_multipoles_to_qbx_local"] = self.aggregate( + result["translate_box_multipoles_to_qbx_local"] = self.aggregate_over_boxes( self.process_m2qbxl(geo_data, translation_cost["m2qbxl_cost"]) ) - result["translate_box_local_to_qbx_local"] = self.aggregate( + result["translate_box_local_to_qbx_local"] = self.aggregate_over_boxes( self.process_l2qbxl(geo_data, translation_cost["l2qbxl_cost"]) ) - result["eval_qbx_expansions"] = self.aggregate( + result["eval_qbx_expansions"] = self.aggregate_over_boxes( self.process_eval_qbxl(geo_data, translation_cost["qbxl2p_cost"]) ) @@ -471,12 +474,12 @@ class AbstractQBXCostModel(AbstractFMMCostModel): def __call__(self, *args, **kwargs): per_box = kwargs.pop('per_box', True) if per_box: - return self.qbx_modeled_cost_per_box(*args, **kwargs) + return self.qbx_cost_per_box(*args, **kwargs) else: - return self.qbx_modeled_cost_per_stage(*args, **kwargs) + return self.qbx_cost_per_stage(*args, **kwargs) @staticmethod - def get_constantone_calibration_params(): + def get_unit_calibration_params(): return dict( c_l2l=1.0, c_l2p=1.0, @@ -493,18 +496,18 @@ class AbstractQBXCostModel(AbstractFMMCostModel): c_l2qbxl=1.0 ) + _QBX_STAGE_TO_CALIBRATION_PARAMETER = { + "form_global_qbx_locals": "c_p2qbxl", + "translate_box_multipoles_to_qbx_local": "c_m2qbxl", + "translate_box_local_to_qbx_local": "c_l2qbxl", + "eval_qbx_expansions": "c_qbxl2p", + "eval_target_specific_qbx_locals": "c_p2p_tsqbx" + } + def estimate_calibration_params(self, model_results, timing_results, time_field_name="wall_elapsed", additional_stage_to_param_names=()): - _QBX_STAGE_TO_CALIBRATION_PARAMETER = { - "form_global_qbx_locals": "c_p2qbxl", - "translate_box_multipoles_to_qbx_local": "c_m2qbxl", - "translate_box_local_to_qbx_local": "c_l2qbxl", - "eval_qbx_expansions": "c_qbxl2p", - "eval_target_specific_qbx_locals": "c_p2p_tsqbx" - } - - stage_to_param_names = _QBX_STAGE_TO_CALIBRATION_PARAMETER.copy() + stage_to_param_names = self._QBX_STAGE_TO_CALIBRATION_PARAMETER.copy() stage_to_param_names.update(additional_stage_to_param_names) return AbstractFMMCostModel.estimate_calibration_params( @@ -559,9 +562,10 @@ class AbstractQBXCostModel(AbstractFMMCostModel): return params_per_kernel -class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): - def __init__(self, queue, - translation_cost_model_factory=pde_aware_translation_cost_model): +class QBXCostModel(AbstractQBXCostModel, FMMCostModel): + def __init__( + self, queue, + translation_cost_model_factory=make_pde_aware_translation_cost_model): """ :arg queue: a :class:`pyopencl.CommandQueue` object on which the execution of this object runs. @@ -569,7 +573,7 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): and the number of tree levels as arguments, returns an object of :class:`TranslationCostModel`. """ - CLFMMCostModel.__init__(self, queue, translation_cost_model_factory) + FMMCostModel.__init__(self, queue, translation_cost_model_factory) @memoize_method def _fill_array_with_index_knl(self, idx_dtype, array_dtype): @@ -791,15 +795,16 @@ class CLQBXCostModel(AbstractQBXCostModel, CLFMMCostModel): return self.translation_costs_to_dev(translation_costs) -class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): - def __init__(self, - translation_cost_model_factory=pde_aware_translation_cost_model): +class _PythonQBXCostModel(AbstractQBXCostModel, _PythonFMMCostModel): + def __init__( + self, + translation_cost_model_factory=make_pde_aware_translation_cost_model): """ :arg translation_cost_model_factory: a function, which takes tree dimension and the number of tree levels as arguments, returns an object of :class:`TranslationCostModel`. """ - PythonFMMCostModel.__init__(self, translation_cost_model_factory) + _PythonFMMCostModel.__init__(self, translation_cost_model_factory) def process_form_qbxl(self, geo_data, p2qbxl_cost, ndirect_sources_per_target_box): @@ -896,9 +901,9 @@ class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): return neval_qbxl * qbxl2p_cost - def qbx_modeled_cost_per_box(self, geo_data, kernel, kernel_arguments, - calibration_params): - """This function additionally transfers geo_data to host if necessary + def qbx_cost_per_box(self, geo_data, kernel, kernel_arguments, + calibration_params): + """This function transfers *geo_data* to host if necessary """ from pytential.qbx.utils import ToHostTransferredGeoDataWrapper from pytential.qbx.geometry import QBXFMMGeometryData @@ -909,12 +914,12 @@ class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): queue = cl.CommandQueue(geo_data.cl_context) geo_data = ToHostTransferredGeoDataWrapper(queue, geo_data) - return AbstractQBXCostModel.qbx_modeled_cost_per_box( + return AbstractQBXCostModel.qbx_cost_per_box( self, geo_data, kernel, kernel_arguments, calibration_params ) - def qbx_modeled_cost_per_stage(self, geo_data, kernel, kernel_arguments, - calibration_params): + def qbx_cost_per_stage(self, geo_data, kernel, kernel_arguments, + calibration_params): """This function additionally transfers geo_data to host if necessary """ from pytential.qbx.utils import ToHostTransferredGeoDataWrapper @@ -926,7 +931,7 @@ class PythonQBXCostModel(AbstractQBXCostModel, PythonFMMCostModel): queue = cl.CommandQueue(geo_data.cl_context) geo_data = ToHostTransferredGeoDataWrapper(queue, geo_data) - return AbstractQBXCostModel.qbx_modeled_cost_per_stage( + return AbstractQBXCostModel.qbx_cost_per_stage( self, geo_data, kernel, kernel_arguments, calibration_params ) diff --git a/pytential/symbolic/execution.py b/pytential/symbolic/execution.py index 62959a00..abd52823 100644 --- a/pytential/symbolic/execution.py +++ b/pytential/symbolic/execution.py @@ -361,7 +361,7 @@ class CostModelMapper(EvaluationMapperBase): if (isinstance(self.knl_specific_calibration_params, str) and self.knl_specific_calibration_params == "constant_one"): calibration_params = \ - AbstractQBXCostModel.get_constantone_calibration_params() + AbstractQBXCostModel.get_unit_calibration_params() else: calibration_params = self.knl_specific_calibration_params[knls] diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 55fb8eee..b06a0eec 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -39,7 +39,7 @@ from pytential import bind, sym, norm # noqa from pytools import one from pytential.qbx.cost import ( - CLQBXCostModel, PythonQBXCostModel, pde_aware_translation_cost_model + QBXCostModel, _PythonQBXCostModel, make_pde_aware_translation_cost_model ) import time @@ -91,13 +91,15 @@ def test_compare_cl_and_py_cost_model(ctx_factory): # {{{ Construct cost models - cl_cost_model = CLQBXCostModel(queue) - python_cost_model = PythonQBXCostModel() + cl_cost_model = QBXCostModel(queue) + python_cost_model = _PythonQBXCostModel() tree = geo_data.tree() - xlat_cost = pde_aware_translation_cost_model(tree.targets.shape[0], tree.nlevels) + xlat_cost = make_pde_aware_translation_cost_model( + tree.targets.shape[0], tree.nlevels + ) - constant_one_params = CLQBXCostModel.get_constantone_calibration_params() + constant_one_params = QBXCostModel.get_unit_calibration_params() constant_one_params["p_qbx"] = 5 for ilevel in range(tree.nlevels): constant_one_params["p_fmm_lev%d" % ilevel] = 10 @@ -374,7 +376,7 @@ def test_cost_model(ctx_getter, dim, use_target_specific_qbx, per_box): get_lpot_source(queue, dim) .copy( _use_target_specific_qbx=use_target_specific_qbx, - cost_model=CLQBXCostModel(queue))) + cost_model=QBXCostModel(queue))) sigma = get_density(queue, lpot_source) @@ -669,7 +671,7 @@ def test_cost_model_correctness(ctx_getter, dim, off_surface, cl_ctx = ctx_getter() queue = cl.CommandQueue(cl_ctx) - cost_model = CLQBXCostModel( + cost_model = QBXCostModel( queue, translation_cost_model_factory=OpCountingTranslationCostModel ) @@ -747,7 +749,7 @@ def test_cost_model_correctness(ctx_getter, dim, off_surface, ) per_box_cost = one(per_box_cost.values()) - total_aggregate_cost = cost_model.aggregate(per_box_cost) + total_aggregate_cost = cost_model.aggregate_over_boxes(per_box_cost) assert total_cost == ( total_aggregate_cost + modeled_time["coarsen_multipoles"] @@ -775,7 +777,7 @@ def test_cost_model_order_varying_by_level(ctx_getter): return 1 lpot_source = get_lpot_source(queue, 2).copy( - cost_model=CLQBXCostModel(queue), + cost_model=QBXCostModel(queue), fmm_level_to_order=level_to_order_constant) sigma_sym = sym.var("sigma") @@ -800,7 +802,7 @@ def test_cost_model_order_varying_by_level(ctx_getter): return metadata["nlevels"] - level lpot_source = get_lpot_source(queue, 2).copy( - cost_model=CLQBXCostModel(queue), + cost_model=QBXCostModel(queue), fmm_level_to_order=level_to_order_varying) sigma = get_density(queue, lpot_source) -- GitLab From dc01a690fba0ff436db9678c116529bcb845b902 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sat, 11 Jan 2020 15:34:49 -0800 Subject: [PATCH 37/55] Fix CI configuration file --- .gitlab-ci.yml | 6 ++++-- .test-conda-env-py3-requirements.txt | 5 ----- 2 files changed, 4 insertions(+), 7 deletions(-) delete mode 100644 .test-conda-env-py3-requirements.txt diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 73c8aa9e..ebf12132 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -82,7 +82,6 @@ Python 3 Conda: - export SUMPY_FORCE_SYMBOLIC_BACKEND=symengine - export CONDA_ENVIRONMENT=.test-conda-env-py3.yml - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} - - export REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - ". ./build-and-test-py-project-within-miniconda.sh" tags: @@ -90,6 +89,7 @@ Python 3 Conda: - large-node except: - tags + artifacts: reports: junit: test/pytest.xml @@ -100,16 +100,17 @@ Python 3 Conda Apple: - export LANG=en_US.UTF-8 - export CONDA_ENVIRONMENT=.test-conda-env-py3-macos.yml - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} - - export REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt - export CC=clang - set -o xtrace - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - ". ./build-and-test-py-project-within-miniconda.sh" + tags: - apple except: - tags retry: 2 + artifacts: reports: junit: test/pytest.xml @@ -127,6 +128,7 @@ Documentation: Pylint: script: # Needed to avoid name shadowing issues when running from source directory. + # Pylint won't find the Cython bits without this - PROJECT_INSTALL_FLAGS="--editable" - export PY_EXE=python3 # Pin to numpy 1.15 diff --git a/.test-conda-env-py3-requirements.txt b/.test-conda-env-py3-requirements.txt deleted file mode 100644 index eae3cf00..00000000 --- a/.test-conda-env-py3-requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -git+https://gitlab.tiker.net/inducer/boxtree@opencl-counter -git+https://github.com/inducer/pymbolic -git+https://github.com/inducer/loopy -git+https://gitlab.tiker.net/inducer/sumpy -git+https://github.com/inducer/meshmode -- GitLab From 203533ae7123ba7d2b21e8d2b1da87481e4cc613 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Fri, 21 Feb 2020 11:27:06 -0600 Subject: [PATCH 38/55] Split get_modeled_cost into per-box and per-stage costs --- README.rst | 9 +++++-- examples/cost.py | 8 ++---- pytential/qbx/cost.py | 4 +-- pytential/symbolic/execution.py | 33 ++++++++++++++++++++++--- test/test_cost_model.py | 44 +++++++++++++++++++-------------- 5 files changed, 66 insertions(+), 32 deletions(-) diff --git a/README.rst b/README.rst index ad3f7905..722b863f 100644 --- a/README.rst +++ b/README.rst @@ -2,9 +2,14 @@ pytential: 2D/3D Layer Potential Evaluation =========================================== .. image:: https://gitlab.tiker.net/inducer/pytential/badges/master/pipeline.svg - :target: https://gitlab.tiker.net/inducer/pytential/commits/master + :alt: Gitlab Build Status + :target: https://gitlab.tiker.net/inducer/pytential/commits/master +.. image:: https://dev.azure.com/ak-spam/inducer/_apis/build/status/inducer.pytential?branchName=master + :alt: Azure Build Status + :target: https://dev.azure.com/ak-spam/inducer/_build/latest?definitionId=16&branchName=master .. image:: https://badge.fury.io/py/pytential.png - :target: http://pypi.python.org/pypi/pytential + :alt: Python Package Index Release Page + :target: https://pypi.org/project/pytential/ pytential helps you accurately evaluate layer potentials (and, sooner or later, volume potentials). diff --git a/examples/cost.py b/examples/cost.py index 088ce5b6..03d7dc6f 100644 --- a/examples/cost.py +++ b/examples/cost.py @@ -129,9 +129,7 @@ def calibrate_cost_model(ctx): bound_op = get_bound_op(lpot_source) sigma = get_test_density(queue, lpot_source) - modeled_cost, _ = bound_op.get_modeled_cost( - queue, "constant_one", per_box=False, sigma=sigma - ) + modeled_cost, _ = bound_op.cost_per_stage(queue, "constant_one", sigma=sigma) # Warm-up run. bound_op.eval(queue, {"sigma": sigma}) @@ -159,9 +157,7 @@ def test_cost_model(ctx, calibration_params): bound_op = get_bound_op(lpot_source) sigma = get_test_density(queue, lpot_source) - cost_S, _ = bound_op.get_modeled_cost( - queue, calibration_params, per_box=False, sigma=sigma - ) + cost_S, _ = bound_op.cost_per_stage(queue, calibration_params, sigma=sigma) model_result = one(cost_S.values()) # Warm-up run. diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index e47cde02..ea933d17 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -521,8 +521,8 @@ class AbstractQBXCostModel(AbstractFMMCostModel): real costs. :arg model_results: a :class:`list` of modeled costs. Each model cost can be - obtained from `BoundExpression.get_modeled_cost` with "constant_one" for - argument `calibration_params`, and `per_box` set to *False*. + obtained from `BoundExpression.cost_per_stage` with "constant_one" for + argument `calibration_params`. :arg timing_results: a :class:`list` of timing data. Each timing data can be obtained from `BoundExpression.eval`. :arg time_field_name: a :class:`str`, the field name from the timing result. diff --git a/pytential/symbolic/execution.py b/pytential/symbolic/execution.py index 964044b2..97663240 100644 --- a/pytential/symbolic/execution.py +++ b/pytential/symbolic/execution.py @@ -655,7 +655,8 @@ class BoundExpression(object): """An expression readied for evaluation by binding it to a :class:`GeometryCollection`. - .. automethod :: get_modeled_cost + .. automethod :: cost_per_stage + .. automethod :: cost_per_box .. automethod :: scipy_op .. automethod :: eval .. automethod :: __call__ @@ -674,11 +675,35 @@ class BoundExpression(object): def get_discretization(self, where): return self.places.get_discretization(where) - def get_modeled_cost(self, queue, calibration_params, **args): - per_box = args.pop('per_box', True) + def cost_per_stage(self, queue, calibration_params, **args): + """ + :arg queue: a :class:`pyopencl.CommandQueue` object. + :arg calibration_params: either a :class:`dict` returned by + `estimate_knl_specific_calibration_params`, or a :class:`str` + "constant_one". + :return: a :class:`dict` mapping from instruction to per-stage cost. Each + per-stage cost is represented by a :class:`dict` mapping from the stage + name to the predicted time. + """ + cost_model_mapper = CostModelMapper( + self, queue, calibration_params, False, args + ) + self.code.execute(cost_model_mapper) + return cost_model_mapper.get_modeled_cost() + def cost_per_box(self, queue, calibration_params, **args): + """ + :arg queue: a :class:`pyopencl.CommandQueue` object. + :arg calibration_params: either a :class:`dict` returned by + `estimate_knl_specific_calibration_params`, or a :class:`str` + "constant_one". + :return: a :class:`dict` mapping from instruction to per-box cost. Each + per-box cost is represented by a :class:`numpy.ndarray` or + :class:`pyopencl.array.Array` of shape (nboxes,), where the ith entry + represents the cost of all stages for box i. + """ cost_model_mapper = CostModelMapper( - self, queue, calibration_params, per_box, args + self, queue, calibration_params, True, args ) self.code.execute(cost_model_mapper) return cost_model_mapper.get_modeled_cost() diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 3ca975af..9cefb5b9 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -385,18 +385,28 @@ def test_cost_model(ctx_factory, dim, use_target_specific_qbx, per_box): sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) op_S = bind(lpot_source, sym_op_S) - cost_S, _ = op_S.get_modeled_cost( - queue, "constant_one", per_box=per_box, sigma=sigma - ) + + if per_box: + cost_S, _ = op_S.cost_per_box(queue, "constant_one", sigma=sigma) + else: + cost_S, _ = op_S.cost_per_stage(queue, "constant_one", sigma=sigma) + assert len(cost_S) == 1 sym_op_S_plus_D = ( sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) + sym.D(k_sym, sigma_sym, qbx_forced_limit="avg")) op_S_plus_D = bind(lpot_source, sym_op_S_plus_D) - cost_S_plus_D, _ = op_S_plus_D.get_modeled_cost( - queue, "constant_one", per_box=per_box, sigma=sigma - ) + + if per_box: + cost_S_plus_D, _ = op_S_plus_D.cost_per_box( + queue, "constant_one", sigma=sigma + ) + else: + cost_S_plus_D, _ = op_S_plus_D.cost_per_stage( + queue, "constant_one", sigma=sigma + ) + assert len(cost_S_plus_D) == 2 # }}} @@ -425,8 +435,9 @@ def test_cost_model_metadata_gathering(ctx_factory): sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1, k=sym.var("k")) op_S = bind(lpot_source, sym_op_S) - _, metadata = op_S.get_modeled_cost( - queue, "constant_one", sigma=sigma, k=k, per_box=False, return_metadata=True) + _, metadata = op_S.cost_per_stage( + queue, "constant_one", sigma=sigma, k=k, return_metadata=True + ) metadata = one(metadata.values()) geo_data = lpot_source.qbx_fmm_geometry_data( @@ -702,9 +713,7 @@ def test_cost_model_correctness(ctx_factory, dim, off_surface, sigma = get_density(queue, lpot_source) from pytools import one - modeled_time, _ = op_S.get_modeled_cost( - queue, "constant_one", per_box=False, sigma=sigma - ) + modeled_time, _ = op_S.cost_per_stage(queue, "constant_one", sigma=sigma) modeled_time = one(modeled_time.values()) # Run FMM with ConstantOneWrangler. This can't be done with pytential's @@ -744,9 +753,8 @@ def test_cost_model_correctness(ctx_factory, dim, off_surface, for stage in timing_data: total_cost += timing_data[stage]["ops_elapsed"] - per_box_cost, _ = op_S.get_modeled_cost( - queue, "constant_one", per_box=True, sigma=sigma - ) + per_box_cost, _ = op_S.cost_per_box(queue, "constant_one", sigma=sigma) + print(per_box_cost) per_box_cost = one(per_box_cost.values()) total_aggregate_cost = cost_model.aggregate_over_boxes(per_box_cost) @@ -787,8 +795,8 @@ def test_cost_model_order_varying_by_level(ctx_factory): sigma = get_density(queue, lpot_source) - cost_constant, metadata = bind(lpot_source, sym_op).get_modeled_cost( - queue, "constant_one", per_box=False, sigma=sigma + cost_constant, metadata = bind(lpot_source, sym_op).cost_per_stage( + queue, "constant_one", sigma=sigma ) cost_constant = one(cost_constant.values()) @@ -807,8 +815,8 @@ def test_cost_model_order_varying_by_level(ctx_factory): sigma = get_density(queue, lpot_source) - cost_varying, _ = bind(lpot_source, sym_op).get_modeled_cost( - queue, "constant_one", per_box=False, sigma=sigma + cost_varying, _ = bind(lpot_source, sym_op).cost_per_stage( + queue, "constant_one", sigma=sigma ) cost_varying = one(cost_varying.values()) -- GitLab From bed95f62af2638018a514eeacb4b0198be73b90a Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 23 Feb 2020 17:09:02 -0600 Subject: [PATCH 39/55] Interface tweak and better doc --- pytential/qbx/__init__.py | 18 ++++++++++++------ pytential/qbx/cost.py | 37 ++++++++++++++++++++++++++++--------- 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index 0ccb9fb2..90711b90 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -226,8 +226,8 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): if cost_model is None: from pytential.qbx.cost import QBXCostModel - with cl.CommandQueue(self.cl_context) as queue: - cost_model = QBXCostModel(queue) + queue = cl.CommandQueue(self.cl_context) + cost_model = QBXCostModel(queue) self.cost_model = cost_model @@ -575,10 +575,16 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): def drive_cost_model( wrangler, strengths, geo_data, kernel, kernel_arguments): del strengths - cost_model_result, metadata = self.cost_model( - geo_data, kernel, kernel_arguments, calibration_params, - per_box=per_box - ) + + if per_box: + cost_model_result, metadata = self.cost_model.qbx_cost_per_box( + geo_data, kernel, kernel_arguments, calibration_params + ) + else: + cost_model_result, metadata = self.cost_model.qbx_cost_per_stage( + geo_data, kernel, kernel_arguments, calibration_params + ) + return wrangler.full_output_zeros(), (cost_model_result, metadata) return self._dispatch_compute_potential_insn( diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index ea933d17..c6d4e3f7 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -80,6 +80,13 @@ Translation Cost of a Single Operation .. autofunction:: make_taylor_translation_cost_model +Cost Model Classes +^^^^^^^^^^^^^^^^^^ + +.. autoclass:: AbstractQBXCostModel + +.. autoclass:: QBXCostModel + Calibration (Generate Calibration Parameters) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -92,13 +99,16 @@ Evaluating .. automethod:: AbstractQBXCostModel.qbx_cost_per_box +To get the cost from `BoundExpression`, refer to +:meth:`pytential.symbolic.execution.BoundExpression.cost_per_stage` and +:meth:`pytential.symbolic.execution.BoundExpression.cost_per_box`. + Utilities ^^^^^^^^^ .. automethod:: boxtree.cost.AbstractFMMCostModel.aggregate_over_boxes .. automethod:: AbstractQBXCostModel.get_unit_calibration_params - """ @@ -181,6 +191,19 @@ def make_taylor_translation_cost_model(dim, nlevels): # {{{ cost model class AbstractQBXCostModel(AbstractFMMCostModel): + """An interface to obtain both QBX operation counts and calibrated (e.g. in + seconds) cost estimates. + + * To obtain operation counts only, use :meth:`get_unit_calibration_params` + with :meth:`qbx_cost_per_stage` or :meth:`qbx_cost_per_box`. + + * To calibrate the model, pass operation counts per stage together with timing + data to :meth:`estimate_knl_specific_calibration_params`. + + * To evaluate the calibrated models, pass the kernel-specific calibration + parameters from :meth:`estimate_knl_specific_calibration_params` to + :meth:`qbx_cost_per_stage` or :meth:`qbx_cost_per_box`. + """ @abstractmethod def process_form_qbxl(self, geo_data, p2qbxl_cost, @@ -471,13 +494,6 @@ class AbstractQBXCostModel(AbstractFMMCostModel): return result, metadata - def __call__(self, *args, **kwargs): - per_box = kwargs.pop('per_box', True) - if per_box: - return self.qbx_cost_per_box(*args, **kwargs) - else: - return self.qbx_cost_per_stage(*args, **kwargs) - @staticmethod def get_unit_calibration_params(): return dict( @@ -563,6 +579,9 @@ class AbstractQBXCostModel(AbstractFMMCostModel): class QBXCostModel(AbstractQBXCostModel, FMMCostModel): + """This class is an implementation of interface :class:`AbstractQBXCostModel` + using PyOpenCL + """ def __init__( self, queue, translation_cost_model_factory=make_pde_aware_translation_cost_model): @@ -571,7 +590,7 @@ class QBXCostModel(AbstractQBXCostModel, FMMCostModel): of this object runs. :arg translation_cost_model_factory: a function, which takes tree dimension and the number of tree levels as arguments, returns an object of - :class:`TranslationCostModel`. + :class:`QBXTranslationCostModel`. """ FMMCostModel.__init__(self, queue, translation_cost_model_factory) -- GitLab From 9eb057a066c3e3a91af3e6a30aa49c3587d68fdf Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 23 Feb 2020 17:13:33 -0600 Subject: [PATCH 40/55] Switch boxtree requirement back to master branch --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6c77ae23..625deb28 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ git+https://github.com/inducer/modepy git+https://github.com/inducer/pyopencl git+https://github.com/inducer/islpy git+https://github.com/inducer/loopy -git+https://github.com/gaohao95/boxtree@opencl-counter +git+https://gitlab.tiker.net/inducer/boxtree git+https://github.com/inducer/meshmode git+https://gitlab.tiker.net/inducer/sumpy git+https://gitlab.tiker.net/inducer/pyfmmlib -- GitLab From cd77f318331c2e5ed7f7f80f8c1fd6920f3dc93a Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Tue, 25 Feb 2020 10:15:44 -0600 Subject: [PATCH 41/55] Use frozenset instead of tuple --- pytential/qbx/cost.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index c6d4e3f7..629be98e 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -557,7 +557,7 @@ class AbstractQBXCostModel(AbstractFMMCostModel): for insn in real_cost: assert (insn in model_cost) - knls = tuple(knl for knl in insn.kernels) + knls = frozenset(knl for knl in insn.kernels) if knls not in cost_per_kernel: cost_per_kernel[knls] = { -- GitLab From 7ed1b6fddc7e15e1d85a0e3c3719e78461bcf145 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Tue, 14 Apr 2020 23:49:49 -0500 Subject: [PATCH 42/55] Bug fix --- pytential/symbolic/execution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytential/symbolic/execution.py b/pytential/symbolic/execution.py index 97663240..643c3237 100644 --- a/pytential/symbolic/execution.py +++ b/pytential/symbolic/execution.py @@ -356,7 +356,7 @@ class CostModelMapper(EvaluationMapperBase): def exec_compute_potential_insn(self, queue, insn, bound_expr, evaluate): source = bound_expr.places.get_geometry(insn.source) - knls = tuple(knl for knl in insn.kernels) + knls = frozenset(knl for knl in insn.kernels) if (isinstance(self.knl_specific_calibration_params, str) and self.knl_specific_calibration_params == "constant_one"): -- GitLab From 97f90cb2acb6d08f4a7f7f3c79f591c2a516d847 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 16 Jul 2020 22:37:42 -0500 Subject: [PATCH 43/55] More array context fixes in pytential.qbx.cost --- pytential/qbx/cost.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 629be98e..1280c0b3 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -39,6 +39,8 @@ from pytools import memoize_method from functools import partial import sys +from meshmode.array_context import PyOpenCLArrayContext + from boxtree.cost import ( FMMTranslationCostModel, AbstractFMMCostModel, FMMCostModel, _PythonFMMCostModel ) @@ -580,10 +582,10 @@ class AbstractQBXCostModel(AbstractFMMCostModel): class QBXCostModel(AbstractQBXCostModel, FMMCostModel): """This class is an implementation of interface :class:`AbstractQBXCostModel` - using PyOpenCL + using :mod:`pyopencl`. """ def __init__( - self, queue, + self, actx, translation_cost_model_factory=make_pde_aware_translation_cost_model): """ :arg queue: a :class:`pyopencl.CommandQueue` object on which the execution @@ -592,7 +594,12 @@ class QBXCostModel(AbstractQBXCostModel, FMMCostModel): and the number of tree levels as arguments, returns an object of :class:`QBXTranslationCostModel`. """ - FMMCostModel.__init__(self, queue, translation_cost_model_factory) + if not isinstance(actx, PyOpenCLArrayContext): + raise TypeError("actx must be a PyOpenCLArrayContext") + + # FIXME: Should the cost model own a queue? + self.array_context = actx + FMMCostModel.__init__(self, actx.queue, translation_cost_model_factory) @memoize_method def _fill_array_with_index_knl(self, idx_dtype, array_dtype): -- GitLab From 789c909eed4d8f77dbb03cfe64ac6749dee5fa22 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 16 Jul 2020 18:37:00 -0500 Subject: [PATCH 44/55] Add logger config example to examples/cost.py --- examples/cost.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/cost.py b/examples/cost.py index 2e953906..da2f34f4 100644 --- a/examples/cost.py +++ b/examples/cost.py @@ -202,6 +202,9 @@ def test_cost_model(ctx, calibration_params): def predict_cost(ctx): + import logging + logging.basicConfig(level=logging.WARNING) # INFO for more progress info + params = calibrate_cost_model(ctx) test_cost_model(ctx, params) -- GitLab From d2d8595c7d2e6b6e5c5dd5dacf7c002948a8136f Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 16 Jul 2020 18:37:27 -0500 Subject: [PATCH 45/55] Fix a class name typo in FMMLib integration --- pytential/qbx/fmmlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytential/qbx/fmmlib.py b/pytential/qbx/fmmlib.py index 98df707e..531b2561 100644 --- a/pytential/qbx/fmmlib.py +++ b/pytential/qbx/fmmlib.py @@ -234,7 +234,7 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): def reorder_potentials(self, potentials): raise NotImplementedError("reorder_potentials should not " - "be called on a QBXFMMLibHelmholtzExpansionWrangler") + "be called on a QBXFMMLibExpansionWrangler") # Because this is a multi-stage, more complicated process that combines # potentials from non-QBX targets and QBX targets. -- GitLab From 62be7ac0e99af37ee9a1f60c7c161bcd85f5d953 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 16 Jul 2020 18:38:08 -0500 Subject: [PATCH 46/55] Porting CL cost model to array context: examples/cost.py and test_cost_model.py work --- pytential/qbx/__init__.py | 7 ++++++- pytential/symbolic/execution.py | 2 +- test/test_cost_model.py | 25 ++++++++++++------------- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index b260c710..b838764a 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -488,7 +488,12 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): geo_data, kernel, kernel_arguments, calibration_params ) - return wrangler.full_output_zeros(), (cost_model_result, metadata) + from pytools.obj_array import obj_array_vectorize + return ( + obj_array_vectorize( + wrangler.finalize_potentials, + wrangler.full_output_zeros()), + (cost_model_result, metadata)) return self._dispatch_compute_potential_insn( actx, insn, bound_expr, evaluate, diff --git a/pytential/symbolic/execution.py b/pytential/symbolic/execution.py index 8164e547..f2899eec 100644 --- a/pytential/symbolic/execution.py +++ b/pytential/symbolic/execution.py @@ -926,7 +926,7 @@ class BoundExpression(object): self.code.execute(cost_model_mapper) return cost_model_mapper.get_modeled_cost() - def cost_per_box(self, queue, calibration_params, **kwargs): + def cost_per_box(self, calibration_params, **kwargs): """ :arg queue: a :class:`pyopencl.CommandQueue` object. :arg calibration_params: either a :class:`dict` returned by diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 3803e947..1aa5e157 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -64,6 +64,7 @@ def test_compare_cl_and_py_cost_model(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) + actx = PyOpenCLArrayContext(queue) # {{{ Construct geometry @@ -74,7 +75,7 @@ def test_compare_cl_and_py_cost_model(ctx_factory): from meshmode.discretization.poly_element import \ InterpolatoryQuadratureSimplexGroupFactory pre_density_discr = Discretization( - ctx, mesh, + actx, mesh, InterpolatoryQuadratureSimplexGroupFactory(target_order) ) @@ -86,7 +87,7 @@ def test_compare_cl_and_py_cost_model(ctx_factory): places = GeometryCollection(qbx) from pytential.qbx.refinement import refine_geometry_collection - places = refine_geometry_collection(queue, places) + places = refine_geometry_collection(places) target_discrs_and_qbx_sides = tuple([(qbx.density_discr, 0)]) geo_data_dev = qbx.qbx_fmm_geometry_data( @@ -100,7 +101,7 @@ def test_compare_cl_and_py_cost_model(ctx_factory): # {{{ Construct cost models - cl_cost_model = QBXCostModel(queue) + cl_cost_model = QBXCostModel(actx) python_cost_model = _PythonQBXCostModel() tree = geo_data.tree() @@ -387,7 +388,7 @@ def test_cost_model(ctx_factory, dim, use_target_specific_qbx, per_box): lpot_source = get_lpot_source(actx, dim).copy( _use_target_specific_qbx=use_target_specific_qbx, - cost_model=QBXCostModel(queue)) + cost_model=QBXCostModel(actx)) places = GeometryCollection(lpot_source) density_discr = places.get_discretization(places.auto_source.geometry) @@ -702,8 +703,7 @@ def test_cost_model_correctness(ctx_factory, dim, off_surface, actx = PyOpenCLArrayContext(queue) cost_model = QBXCostModel( - queue, translation_cost_model_factory=OpCountingTranslationCostModel - ) + actx, translation_cost_model_factory=OpCountingTranslationCostModel) lpot_source = get_lpot_source(actx, dim).copy( cost_model=cost_model, @@ -780,7 +780,7 @@ def test_cost_model_correctness(ctx_factory, dim, off_surface, for stage in timing_data: total_cost += timing_data[stage]["ops_elapsed"] - per_box_cost, _ = op_S.cost_per_box(queue, "constant_one", sigma=sigma) + per_box_cost, _ = op_S.cost_per_box("constant_one", sigma=sigma) print(per_box_cost) per_box_cost = one(per_box_cost.values()) @@ -812,7 +812,7 @@ def test_cost_model_order_varying_by_level(ctx_factory): def level_to_order_constant(kernel, kernel_args, tree, level): return 1 - lpot_source = get_lpot_source(queue, 2).copy( + lpot_source = get_lpot_source(actx, 2).copy( cost_model=QBXCostModel(actx), fmm_level_to_order=level_to_order_constant) places = GeometryCollection(lpot_source) @@ -838,18 +838,17 @@ def test_cost_model_order_varying_by_level(ctx_factory): def level_to_order_varying(kernel, kernel_args, tree, level): return metadata["nlevels"] - level - lpot_source = get_lpot_source(queue, 2).copy( - cost_model=QBXCostModel(queue), + lpot_source = get_lpot_source(actx, 2).copy( + cost_model=QBXCostModel(actx), fmm_level_to_order=level_to_order_varying) places = GeometryCollection(lpot_source) density_discr = places.get_discretization(places.auto_source.geometry) - sigma = get_density(queue, density_discr) + sigma = get_density(actx, density_discr) cost_varying, _ = bind(lpot_source, sym_op).cost_per_stage( - queue, "constant_one", sigma=sigma - ) + "constant_one", sigma=sigma) cost_varying = one(cost_varying.values()) -- GitLab From e8b2c2acae4453372a0b85f685707e79ca076d16 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 26 Jul 2020 21:54:20 -0700 Subject: [PATCH 47/55] Address code review --- examples/cost.py | 2 +- pytential/qbx/__init__.py | 14 +++++++++++--- pytential/qbx/cost.py | 14 +++++++++----- pytential/symbolic/execution.py | 27 ++++++++++++++++++--------- 4 files changed, 39 insertions(+), 18 deletions(-) diff --git a/examples/cost.py b/examples/cost.py index da2f34f4..6cef13b2 100644 --- a/examples/cost.py +++ b/examples/cost.py @@ -146,7 +146,7 @@ def calibrate_cost_model(ctx): model_results.append(modeled_cost) timing_results.append(timing_data) - calibration_params = cost_model.estimate_knl_specific_calibration_params( + calibration_params = cost_model.estimate_kernel_specific_calibration_params( model_results, timing_results, time_field_name="process_elapsed" ) diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index b838764a..46661ce0 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -108,9 +108,9 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): :arg _use_target_specific_qbx: Whether to use target-specific acceleration by default if possible. *None* means "use if possible". - :arg cost_model: Either *None* or instance of - :class:`~pytential.qbx.cost.AbstractQBXCostModel`, used for gathering - modeled costs (experimental) + :arg cost_model: Either *None* or an object implementing the + :class:`~pytential.qbx.cost.AbstractQBXCostModel` interface, used for + gathering modeled costs (experimental) """ # {{{ argument processing @@ -470,6 +470,14 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): :attr:`pytential.symbolic.execution.BoundExpression.modeled_cost` along the way. + :arg calibration_params: a :class:`dict` of calibration parameters, mapping + from parameter names to calibration values. + :arg per_box: if *true*, cost model result will be a :class:`numpy.ndarray` + or :class:`pyopencl.array.Array` with shape of the number of boxes, where + the ith entry is the sum of the cost of all stages for box i. If *false*, + cost model result will be a :class:`dict`, mapping from the stage name to + predicted cost of the stage for all boxes. + :returns: whatever :meth:`exec_compute_potential_insn_fmm` returns. """ if self.fmm_level_to_order is False: diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 1280c0b3..bcdf56c2 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -57,6 +57,10 @@ logger = logging.getLogger(__name__) __doc__ = """ +.. note:: + + This module is experimental. Its interface is subject to change until this + notice is removed. This module helps predict the running time of each step of QBX, as an extension of the similar module *boxtree.cost* in boxtree. @@ -92,7 +96,7 @@ Cost Model Classes Calibration (Generate Calibration Parameters) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. automethod:: AbstractQBXCostModel.estimate_knl_specific_calibration_params +.. automethod:: AbstractQBXCostModel.estimate_kernel_specific_calibration_params Evaluating ^^^^^^^^^^ @@ -200,10 +204,10 @@ class AbstractQBXCostModel(AbstractFMMCostModel): with :meth:`qbx_cost_per_stage` or :meth:`qbx_cost_per_box`. * To calibrate the model, pass operation counts per stage together with timing - data to :meth:`estimate_knl_specific_calibration_params`. + data to :meth:`estimate_kernel_specific_calibration_params`. * To evaluate the calibrated models, pass the kernel-specific calibration - parameters from :meth:`estimate_knl_specific_calibration_params` to + parameters from :meth:`estimate_kernel_specific_calibration_params` to :meth:`qbx_cost_per_stage` or :meth:`qbx_cost_per_box`. """ @@ -533,8 +537,8 @@ class AbstractQBXCostModel(AbstractFMMCostModel): additional_stage_to_param_names=stage_to_param_names ) - def estimate_knl_specific_calibration_params(self, model_results, timing_results, - time_field_name="wall_elapsed"): + def estimate_kernel_specific_calibration_params( + self, model_results, timing_results, time_field_name="wall_elapsed"): """Get kernel-specific calibration parameters from samples of model costs and real costs. diff --git a/pytential/symbolic/execution.py b/pytential/symbolic/execution.py index f2899eec..058cbd53 100644 --- a/pytential/symbolic/execution.py +++ b/pytential/symbolic/execution.py @@ -350,10 +350,18 @@ class CostModelMapper(EvaluationMapperBase): This executes everything *except* the layer potential operator. Instead of executing the operator, the cost model gets run and the cost data is collected. + + .. attribute:: kernel_to_calibration_params + + Can either be a :class:`str` "constant_one", which uses the constant 1.0 as + calibration parameters for all stages of all kernels, or be a :class:`dict`, + which maps from kernels to the calibration parameters, returned from + `estimate_kernel_specific_calibration_params`. + """ def __init__(self, bound_expr, actx, - knl_specific_calibration_params, per_box, + kernel_to_calibration_params, per_box, context=None, target_geometry=None, target_points=None, target_normals=None, target_tangents=None): @@ -366,7 +374,7 @@ class CostModelMapper(EvaluationMapperBase): target_normals, target_tangents) - self.knl_specific_calibration_params = knl_specific_calibration_params + self.kernel_to_calibration_params = kernel_to_calibration_params self.modeled_cost = {} self.metadata = {} self.per_box = per_box @@ -376,12 +384,12 @@ class CostModelMapper(EvaluationMapperBase): source = bound_expr.places.get_geometry(insn.source.geometry) knls = frozenset(knl for knl in insn.kernels) - if (isinstance(self.knl_specific_calibration_params, str) - and self.knl_specific_calibration_params == "constant_one"): + if (isinstance(self.kernel_to_calibration_params, str) + and self.kernel_to_calibration_params == "constant_one"): calibration_params = \ AbstractQBXCostModel.get_unit_calibration_params() else: - calibration_params = self.knl_specific_calibration_params[knls] + calibration_params = self.kernel_to_calibration_params[knls] result, (cost_model_result, metadata) = \ source.cost_model_compute_potential_insn( @@ -909,7 +917,7 @@ class BoundExpression(object): """ :arg queue: a :class:`pyopencl.CommandQueue` object. :arg calibration_params: either a :class:`dict` returned by - `estimate_knl_specific_calibration_params`, or a :class:`str` + `estimate_kernel_specific_calibration_params`, or a :class:`str` "constant_one". :return: a :class:`dict` mapping from instruction to per-stage cost. Each per-stage cost is represented by a :class:`dict` mapping from the stage @@ -921,7 +929,7 @@ class BoundExpression(object): raise ValueError("unable to figure array context from arguments") cost_model_mapper = CostModelMapper( - self, array_context, calibration_params, False, kwargs + self, array_context, calibration_params, per_box=False, context=kwargs ) self.code.execute(cost_model_mapper) return cost_model_mapper.get_modeled_cost() @@ -930,7 +938,7 @@ class BoundExpression(object): """ :arg queue: a :class:`pyopencl.CommandQueue` object. :arg calibration_params: either a :class:`dict` returned by - `estimate_knl_specific_calibration_params`, or a :class:`str` + `estimate_kernel_specific_calibration_params`, or a :class:`str` "constant_one". :return: a :class:`dict` mapping from instruction to per-box cost. Each per-box cost is represented by a :class:`numpy.ndarray` or @@ -940,7 +948,8 @@ class BoundExpression(object): array_context = _find_array_context_from_args_in_context(kwargs) cost_model_mapper = CostModelMapper( - self, array_context, calibration_params, True, kwargs) + self, array_context, calibration_params, per_box=True, context=kwargs + ) self.code.execute(cost_model_mapper) return cost_model_mapper.get_modeled_cost() -- GitLab From 6176ca9083606f05446fcefb28e0abf6e9754ec5 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Wed, 12 Aug 2020 09:02:17 -0700 Subject: [PATCH 48/55] Remove command queue from cost model --- examples/cost.py | 4 +- pytential/qbx/__init__.py | 9 +- pytential/qbx/cost.py | 185 +++++++++++++++++++------------------- requirements.txt | 1 + test/test_cost_model.py | 48 +++++----- 5 files changed, 130 insertions(+), 117 deletions(-) diff --git a/examples/cost.py b/examples/cost.py index 6cef13b2..791cd7f9 100644 --- a/examples/cost.py +++ b/examples/cost.py @@ -118,7 +118,7 @@ def get_test_density(actx, density_discr): def calibrate_cost_model(ctx): queue = cl.CommandQueue(ctx) actx = PyOpenCLArrayContext(queue) - cost_model = QBXCostModel(actx) + cost_model = QBXCostModel() model_results = [] timing_results = [] @@ -156,7 +156,7 @@ def calibrate_cost_model(ctx): def test_cost_model(ctx, calibration_params): queue = cl.CommandQueue(ctx) actx = PyOpenCLArrayContext(queue) - cost_model = QBXCostModel(actx) + cost_model = QBXCostModel() for lpot_source in test_geometries(actx): lpot_source = lpot_source.copy(cost_model=cost_model) diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index 46661ce0..2d64e141 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -214,8 +214,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): if cost_model is None: from pytential.qbx.cost import QBXCostModel - # FIXME: Yuck - cost_model = QBXCostModel(self._setup_actx) + cost_model = QBXCostModel() self.cost_model = cost_model @@ -489,11 +488,13 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): if per_box: cost_model_result, metadata = self.cost_model.qbx_cost_per_box( - geo_data, kernel, kernel_arguments, calibration_params + actx.queue, geo_data, kernel, kernel_arguments, + calibration_params ) else: cost_model_result, metadata = self.cost_model.qbx_cost_per_stage( - geo_data, kernel, kernel_arguments, calibration_params + actx.queue, geo_data, kernel, kernel_arguments, + calibration_params ) from pytools.obj_array import obj_array_vectorize diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index bcdf56c2..31b67b8d 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -39,8 +39,6 @@ from pytools import memoize_method from functools import partial import sys -from meshmode.array_context import PyOpenCLArrayContext - from boxtree.cost import ( FMMTranslationCostModel, AbstractFMMCostModel, FMMCostModel, _PythonFMMCostModel ) @@ -212,9 +210,10 @@ class AbstractQBXCostModel(AbstractFMMCostModel): """ @abstractmethod - def process_form_qbxl(self, geo_data, p2qbxl_cost, + def process_form_qbxl(self, queue, geo_data, p2qbxl_cost, ndirect_sources_per_target_box): """ + :arg queue: a :class:`pyopencl.CommandQueue` object. :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object or similar object in the host memory. :arg p2qbxl_cost: a :class:`numpy.float64` constant representing the cost of @@ -231,7 +230,7 @@ class AbstractQBXCostModel(AbstractFMMCostModel): pass @abstractmethod - def process_m2qbxl(self, geo_data, m2qbxl_cost): + def process_m2qbxl(self, queue, geo_data, m2qbxl_cost): """ :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object or similar object in the host memory. @@ -246,7 +245,7 @@ class AbstractQBXCostModel(AbstractFMMCostModel): pass @abstractmethod - def process_l2qbxl(self, geo_data, l2qbxl_cost): + def process_l2qbxl(self, queue, geo_data, l2qbxl_cost): """ :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object or similar object in the host memory. @@ -260,7 +259,7 @@ class AbstractQBXCostModel(AbstractFMMCostModel): pass @abstractmethod - def process_eval_qbxl(self, geo_data, qbxl2p_cost): + def process_eval_qbxl(self, queue, geo_data, qbxl2p_cost): """ :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object or similar object in the host memory. @@ -274,7 +273,7 @@ class AbstractQBXCostModel(AbstractFMMCostModel): pass @abstractmethod - def process_eval_target_specific_qbxl(self, geo_data, p2p_tsqbx_cost, + def process_eval_target_specific_qbxl(self, queue, geo_data, p2p_tsqbx_cost, ndirect_sources_per_target_box): """ :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object or @@ -293,7 +292,8 @@ class AbstractQBXCostModel(AbstractFMMCostModel): """ pass - def qbx_cost_factors_for_kernels_from_model(self, nlevels, xlat_cost, context): + def qbx_cost_factors_for_kernels_from_model( + self, queue, nlevels, xlat_cost, context): """Evaluate translation cost factors from symbolic model. The result of this function can be used for process_* methods in this class. @@ -301,6 +301,8 @@ class AbstractQBXCostModel(AbstractFMMCostModel): :class:`boxtree.cost.AbstractFMMCostModel` to support operations specific to QBX. + :arg queue: If not None, the cost factor arrays will be transferred to device + using this queue. :arg nlevels: the number of tree levels. :arg xlat_cost: a :class:`QBXTranslationCostModel`. :arg context: a :class:`dict` of parameters passed as context when @@ -308,7 +310,7 @@ class AbstractQBXCostModel(AbstractFMMCostModel): :return: a :class:`dict`, the translation cost of each step in FMM and QBX. """ cost_factors = self.fmm_cost_factors_for_kernels_from_model( - nlevels, xlat_cost, context + queue, nlevels, xlat_cost, context ) cost_factors.update({ @@ -325,6 +327,9 @@ class AbstractQBXCostModel(AbstractFMMCostModel): "p2p_tsqbx_cost": evaluate(xlat_cost.p2p_tsqbx(), context=context) }) + if queue: + self.cost_factors_to_dev(cost_factors, queue) + return cost_factors @staticmethod @@ -345,7 +350,7 @@ class AbstractQBXCostModel(AbstractFMMCostModel): return metadata - def qbx_cost_per_box(self, geo_data, kernel, kernel_arguments, + def qbx_cost_per_box(self, queue, geo_data, kernel, kernel_arguments, calibration_params): # FIXME: This should support target filtering. lpot_source = geo_data.lpot_source @@ -380,15 +385,15 @@ class AbstractQBXCostModel(AbstractFMMCostModel): ) translation_cost = self.qbx_cost_factors_for_kernels_from_model( - tree.nlevels, xlat_cost, params + queue, tree.nlevels, xlat_cost, params ) ndirect_sources_per_target_box = \ - self.get_ndirect_sources_per_target_box(traversal) + self.get_ndirect_sources_per_target_box(queue, traversal) # get FMM cost per box from parent class result = self.cost_per_box( - traversal, fmm_level_to_order, + queue, traversal, fmm_level_to_order, calibration_params, ndirect_sources_per_target_box=ndirect_sources_per_target_box, box_target_counts_nonchild=box_target_counts_nonchild @@ -396,32 +401,32 @@ class AbstractQBXCostModel(AbstractFMMCostModel): if use_tsqbx: result[target_boxes] += self.process_eval_target_specific_qbxl( - geo_data, translation_cost["p2p_tsqbx_cost"], + queue, geo_data, translation_cost["p2p_tsqbx_cost"], ndirect_sources_per_target_box ) else: result[target_boxes] += self.process_form_qbxl( - geo_data, translation_cost["p2qbxl_cost"], + queue, geo_data, translation_cost["p2qbxl_cost"], ndirect_sources_per_target_box ) result[target_boxes] += self.process_m2qbxl( - geo_data, translation_cost["m2qbxl_cost"] + queue, geo_data, translation_cost["m2qbxl_cost"] ) result[target_boxes] += self.process_l2qbxl( - geo_data, translation_cost["l2qbxl_cost"] + queue, geo_data, translation_cost["l2qbxl_cost"] ) result[target_boxes] += self.process_eval_qbxl( - geo_data, translation_cost["qbxl2p_cost"] + queue, geo_data, translation_cost["qbxl2p_cost"] ) metadata = self.gather_metadata(geo_data, fmm_level_to_order) return result, metadata - def qbx_cost_per_stage(self, geo_data, kernel, kernel_arguments, + def qbx_cost_per_stage(self, queue, geo_data, kernel, kernel_arguments, calibration_params): # FIXME: This should support target filtering. lpot_source = geo_data.lpot_source @@ -455,15 +460,15 @@ class AbstractQBXCostModel(AbstractFMMCostModel): ) translation_cost = self.qbx_cost_factors_for_kernels_from_model( - tree.nlevels, xlat_cost, params + queue, tree.nlevels, xlat_cost, params ) ndirect_sources_per_target_box = \ - self.get_ndirect_sources_per_target_box(traversal) + self.get_ndirect_sources_per_target_box(queue, traversal) # get FMM per-stage cost from parent class result = self.cost_per_stage( - traversal, fmm_level_to_order, + queue, traversal, fmm_level_to_order, calibration_params, ndirect_sources_per_target_box=ndirect_sources_per_target_box, box_target_counts_nonchild=box_target_counts_nonchild @@ -472,28 +477,28 @@ class AbstractQBXCostModel(AbstractFMMCostModel): if use_tsqbx: result["eval_target_specific_qbx_locals"] = self.aggregate_over_boxes( self.process_eval_target_specific_qbxl( - geo_data, translation_cost["p2p_tsqbx_cost"], + queue, geo_data, translation_cost["p2p_tsqbx_cost"], ndirect_sources_per_target_box=ndirect_sources_per_target_box ) ) else: result["form_global_qbx_locals"] = self.aggregate_over_boxes( self.process_form_qbxl( - geo_data, translation_cost["p2qbxl_cost"], + queue, geo_data, translation_cost["p2qbxl_cost"], ndirect_sources_per_target_box ) ) result["translate_box_multipoles_to_qbx_local"] = self.aggregate_over_boxes( - self.process_m2qbxl(geo_data, translation_cost["m2qbxl_cost"]) + self.process_m2qbxl(queue, geo_data, translation_cost["m2qbxl_cost"]) ) result["translate_box_local_to_qbx_local"] = self.aggregate_over_boxes( - self.process_l2qbxl(geo_data, translation_cost["l2qbxl_cost"]) + self.process_l2qbxl(queue, geo_data, translation_cost["l2qbxl_cost"]) ) result["eval_qbx_expansions"] = self.aggregate_over_boxes( - self.process_eval_qbxl(geo_data, translation_cost["qbxl2p_cost"]) + self.process_eval_qbxl(queue, geo_data, translation_cost["qbxl2p_cost"]) ) metadata = self.gather_metadata(geo_data, fmm_level_to_order) @@ -589,26 +594,19 @@ class QBXCostModel(AbstractQBXCostModel, FMMCostModel): using :mod:`pyopencl`. """ def __init__( - self, actx, + self, translation_cost_model_factory=make_pde_aware_translation_cost_model): """ - :arg queue: a :class:`pyopencl.CommandQueue` object on which the execution - of this object runs. :arg translation_cost_model_factory: a function, which takes tree dimension and the number of tree levels as arguments, returns an object of :class:`QBXTranslationCostModel`. """ - if not isinstance(actx, PyOpenCLArrayContext): - raise TypeError("actx must be a PyOpenCLArrayContext") - - # FIXME: Should the cost model own a queue? - self.array_context = actx - FMMCostModel.__init__(self, actx.queue, translation_cost_model_factory) + FMMCostModel.__init__(self, translation_cost_model_factory) @memoize_method - def _fill_array_with_index_knl(self, idx_dtype, array_dtype): + def _fill_array_with_index_knl(self, context, idx_dtype, array_dtype): return ElementwiseKernel( - self.queue.context, + context, Template(r""" ${idx_t} *index, ${array_t} *array, @@ -623,16 +621,16 @@ class QBXCostModel(AbstractQBXCostModel, FMMCostModel): name="fill_array_with_index" ) - def _fill_array_with_index(self, array, index, value): + def _fill_array_with_index(self, queue, array, index, value): idx_dtype = index.dtype array_dtype = array.dtype - knl = self._fill_array_with_index_knl(idx_dtype, array_dtype) - knl(index, array, value, queue=self.queue) + knl = self._fill_array_with_index_knl(queue.context, idx_dtype, array_dtype) + knl(index, array, value, queue=queue) @memoize_method - def count_global_qbx_centers_knl(self, box_id_dtype, particle_id_dtype): + def count_global_qbx_centers_knl(self, context, box_id_dtype, particle_id_dtype): return ElementwiseKernel( - self.queue.context, + context, Template(r""" ${particle_id_t} *nqbx_centers_itgt_box, ${particle_id_t} *global_qbx_center_weight, @@ -662,8 +660,9 @@ class QBXCostModel(AbstractQBXCostModel, FMMCostModel): name="count_global_qbx_centers" ) - def get_nqbx_centers_per_tgt_box(self, geo_data, weights=None): + def get_nqbx_centers_per_tgt_box(self, queue, geo_data, weights=None): """ + :arg queue: a :class:`pyopencl.CommandQueue` object. :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object. :arg weights: a :class:`pyopencl.array.Array` of shape (ncenters,) with particle_id_dtype, the weight of each center in user order. @@ -679,14 +678,14 @@ class QBXCostModel(AbstractQBXCostModel, FMMCostModel): # Build a mask (weight) of whether a target is a global qbx center global_qbx_centers_tree_order = take( - tree.sorted_target_ids, global_qbx_centers, queue=self.queue + tree.sorted_target_ids, global_qbx_centers, queue=queue ) global_qbx_center_weight = cl.array.zeros( - self.queue, tree.ntargets, dtype=tree.particle_id_dtype + queue, tree.ntargets, dtype=tree.particle_id_dtype ) self._fill_array_with_index( - global_qbx_center_weight, global_qbx_centers_tree_order, 1 + queue, global_qbx_center_weight, global_qbx_centers_tree_order, 1 ) if weights is not None: @@ -697,34 +696,35 @@ class QBXCostModel(AbstractQBXCostModel, FMMCostModel): # qbx centers ntarget_boxes = len(traversal.target_boxes) nqbx_centers_itgt_box = cl.array.empty( - self.queue, ntarget_boxes, dtype=tree.particle_id_dtype + queue, ntarget_boxes, dtype=tree.particle_id_dtype ) count_global_qbx_centers_knl = self.count_global_qbx_centers_knl( - tree.box_id_dtype, tree.particle_id_dtype + queue.context, tree.box_id_dtype, tree.particle_id_dtype ) count_global_qbx_centers_knl( nqbx_centers_itgt_box, global_qbx_center_weight, traversal.target_boxes, tree.box_target_starts, - tree.box_target_counts_nonchild + tree.box_target_counts_nonchild, + queue=queue ) return nqbx_centers_itgt_box - def process_form_qbxl(self, geo_data, p2qbxl_cost, + def process_form_qbxl(self, queue, geo_data, p2qbxl_cost, ndirect_sources_per_target_box): - nqbx_centers_itgt_box = self.get_nqbx_centers_per_tgt_box(geo_data) + nqbx_centers_itgt_box = self.get_nqbx_centers_per_tgt_box(queue, geo_data) return (nqbx_centers_itgt_box * ndirect_sources_per_target_box * p2qbxl_cost) @memoize_method - def process_m2qbxl_knl(self, box_id_dtype, particle_id_dtype): + def process_m2qbxl_knl(self, context, box_id_dtype, particle_id_dtype): return ElementwiseKernel( - self.queue.context, + context, Template(r""" ${box_id_t} *idx_to_itgt_box, ${particle_id_t} *nqbx_centers_itgt_box, @@ -752,17 +752,17 @@ class QBXCostModel(AbstractQBXCostModel, FMMCostModel): name="process_m2qbxl" ) - def process_m2qbxl(self, geo_data, m2qbxl_cost): + def process_m2qbxl(self, queue, geo_data, m2qbxl_cost): tree = geo_data.tree() traversal = geo_data.traversal() ntarget_boxes = len(traversal.target_boxes) - nqbx_centers_itgt_box = self.get_nqbx_centers_per_tgt_box(geo_data) + nqbx_centers_itgt_box = self.get_nqbx_centers_per_tgt_box(queue, geo_data) process_m2qbxl_knl = self.process_m2qbxl_knl( - tree.box_id_dtype, tree.particle_id_dtype + queue.context, tree.box_id_dtype, tree.particle_id_dtype ) - nm2qbxl = cl.array.zeros(self.queue, ntarget_boxes, dtype=np.float64) + nm2qbxl = cl.array.zeros(queue, ntarget_boxes, dtype=np.float64) for isrc_level, ssn in enumerate(traversal.from_sep_smaller_by_level): process_m2qbxl_knl( @@ -770,59 +770,60 @@ class QBXCostModel(AbstractQBXCostModel, FMMCostModel): nqbx_centers_itgt_box, ssn.starts, nm2qbxl, - m2qbxl_cost[isrc_level].get().reshape(-1)[0], - queue=self.queue + m2qbxl_cost[isrc_level].get(queue).reshape(-1)[0], + queue=queue ) return nm2qbxl - def process_l2qbxl(self, geo_data, l2qbxl_cost): + def process_l2qbxl(self, queue, geo_data, l2qbxl_cost): tree = geo_data.tree() traversal = geo_data.traversal() - nqbx_centers_itgt_box = self.get_nqbx_centers_per_tgt_box(geo_data) + nqbx_centers_itgt_box = self.get_nqbx_centers_per_tgt_box(queue, geo_data) # l2qbxl_cost_itgt_box = l2qbxl_cost[tree.box_levels[traversal.target_boxes]] l2qbxl_cost_itgt_box = take( l2qbxl_cost, - take(tree.box_levels, traversal.target_boxes, queue=self.queue), - queue=self.queue + take(tree.box_levels, traversal.target_boxes, queue=queue), + queue=queue ) return nqbx_centers_itgt_box * l2qbxl_cost_itgt_box - def process_eval_qbxl(self, geo_data, qbxl2p_cost): + def process_eval_qbxl(self, queue, geo_data, qbxl2p_cost): center_to_targets_starts = geo_data.center_to_tree_targets().starts - center_to_targets_starts = center_to_targets_starts.with_queue(self.queue) + center_to_targets_starts = center_to_targets_starts.with_queue(queue) weights = center_to_targets_starts[1:] - center_to_targets_starts[:-1] nqbx_targets_itgt_box = self.get_nqbx_centers_per_tgt_box( - geo_data, weights=weights + queue, geo_data, weights=weights ) return nqbx_targets_itgt_box * qbxl2p_cost - def process_eval_target_specific_qbxl(self, geo_data, p2p_tsqbx_cost, + def process_eval_target_specific_qbxl(self, queue, geo_data, p2p_tsqbx_cost, ndirect_sources_per_target_box): center_to_targets_starts = geo_data.center_to_tree_targets().starts - center_to_targets_starts = center_to_targets_starts.with_queue(self.queue) + center_to_targets_starts = center_to_targets_starts.with_queue(queue) weights = center_to_targets_starts[1:] - center_to_targets_starts[:-1] nqbx_targets_itgt_box = self.get_nqbx_centers_per_tgt_box( - geo_data, weights=weights + queue, geo_data, weights=weights ) return (nqbx_targets_itgt_box * ndirect_sources_per_target_box * p2p_tsqbx_cost) - def qbx_cost_factors_for_kernels_from_model(self, nlevels, xlat_cost, context): - translation_costs = ( - AbstractQBXCostModel.qbx_cost_factors_for_kernels_from_model( - self, nlevels, xlat_cost, context - ) - ) + def qbx_cost_factors_for_kernels_from_model( + self, queue, nlevels, xlat_cost, context): + if not isinstance(queue, cl.CommandQueue): + raise TypeError( + "An OpenCL command queue must be supplied for cost model") - return self.translation_costs_to_dev(translation_costs) + return AbstractQBXCostModel.qbx_cost_factors_for_kernels_from_model( + self, queue, nlevels, xlat_cost, context + ) class _PythonQBXCostModel(AbstractQBXCostModel, _PythonFMMCostModel): @@ -836,7 +837,7 @@ class _PythonQBXCostModel(AbstractQBXCostModel, _PythonFMMCostModel): """ _PythonFMMCostModel.__init__(self, translation_cost_model_factory) - def process_form_qbxl(self, geo_data, p2qbxl_cost, + def process_form_qbxl(self, queue, geo_data, p2qbxl_cost, ndirect_sources_per_target_box): global_qbx_centers = geo_data.global_qbx_centers() qbx_center_to_target_box = geo_data.qbx_center_to_target_box() @@ -850,7 +851,7 @@ class _PythonQBXCostModel(AbstractQBXCostModel, _PythonFMMCostModel): return np2qbxl * p2qbxl_cost - def process_eval_target_specific_qbxl(self, geo_data, p2p_tsqbx_cost, + def process_eval_target_specific_qbxl(self, queue, geo_data, p2p_tsqbx_cost, ndirect_sources_per_target_box): center_to_targets_starts = geo_data.center_to_tree_targets().starts global_qbx_centers = geo_data.global_qbx_centers() @@ -867,7 +868,7 @@ class _PythonQBXCostModel(AbstractQBXCostModel, _PythonFMMCostModel): return neval_tsqbx * p2p_tsqbx_cost - def process_m2qbxl(self, geo_data, m2qbxl_cost): + def process_m2qbxl(self, queue, geo_data, m2qbxl_cost): traversal = geo_data.traversal() global_qbx_centers = geo_data.global_qbx_centers() qbx_center_to_target_box = geo_data.qbx_center_to_target_box() @@ -899,7 +900,7 @@ class _PythonQBXCostModel(AbstractQBXCostModel, _PythonFMMCostModel): return nm2qbxl - def process_l2qbxl(self, geo_data, l2qbxl_cost): + def process_l2qbxl(self, queue, geo_data, l2qbxl_cost): tree = geo_data.tree() traversal = geo_data.traversal() global_qbx_centers = geo_data.global_qbx_centers() @@ -915,7 +916,7 @@ class _PythonQBXCostModel(AbstractQBXCostModel, _PythonFMMCostModel): return nl2qbxl - def process_eval_qbxl(self, geo_data, qbxl2p_cost): + def process_eval_qbxl(self, queue, geo_data, qbxl2p_cost): traversal = geo_data.traversal() global_qbx_centers = geo_data.global_qbx_centers() center_to_targets_starts = geo_data.center_to_tree_targets().starts @@ -931,7 +932,7 @@ class _PythonQBXCostModel(AbstractQBXCostModel, _PythonFMMCostModel): return neval_qbxl * qbxl2p_cost - def qbx_cost_per_box(self, geo_data, kernel, kernel_arguments, + def qbx_cost_per_box(self, queue, geo_data, kernel, kernel_arguments, calibration_params): """This function transfers *geo_data* to host if necessary """ @@ -940,15 +941,14 @@ class _PythonQBXCostModel(AbstractQBXCostModel, _PythonFMMCostModel): if not isinstance(geo_data, ToHostTransferredGeoDataWrapper): assert isinstance(geo_data, QBXFMMGeometryData) - - queue = cl.CommandQueue(geo_data.cl_context) geo_data = ToHostTransferredGeoDataWrapper(queue, geo_data) + queue.finish() return AbstractQBXCostModel.qbx_cost_per_box( - self, geo_data, kernel, kernel_arguments, calibration_params + self, queue, geo_data, kernel, kernel_arguments, calibration_params ) - def qbx_cost_per_stage(self, geo_data, kernel, kernel_arguments, + def qbx_cost_per_stage(self, queue, geo_data, kernel, kernel_arguments, calibration_params): """This function additionally transfers geo_data to host if necessary """ @@ -957,12 +957,17 @@ class _PythonQBXCostModel(AbstractQBXCostModel, _PythonFMMCostModel): if not isinstance(geo_data, ToHostTransferredGeoDataWrapper): assert isinstance(geo_data, QBXFMMGeometryData) - - queue = cl.CommandQueue(geo_data.cl_context) geo_data = ToHostTransferredGeoDataWrapper(queue, geo_data) + queue.finish() return AbstractQBXCostModel.qbx_cost_per_stage( - self, geo_data, kernel, kernel_arguments, calibration_params + self, queue, geo_data, kernel, kernel_arguments, calibration_params + ) + + def qbx_cost_factors_for_kernels_from_model( + self, queue, nlevels, xlat_cost, context): + return AbstractQBXCostModel.qbx_cost_factors_for_kernels_from_model( + self, None, nlevels, xlat_cost, context ) # }}} diff --git a/requirements.txt b/requirements.txt index b3a861ca..783559b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,7 @@ git+https://github.com/inducer/pyopencl#egg=pyopencl git+https://github.com/inducer/islpy#egg=islpy git+https://github.com/inducer/loopy#egg=loo.py git+https://github.com/inducer/boxtree#egg=boxtree +git+https://github.com/gaohao95/boxtree@remove-queue-from-cost-model#egg=boxtree git+https://github.com/inducer/meshmode#egg=meshmode git+https://github.com/inducer/sumpy#egg=sumpy git+https://github.com/inducer/pyfmmlib#egg=pyfmmlib diff --git a/test/test_cost_model.py b/test/test_cost_model.py index 1aa5e157..b6de4445 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -101,7 +101,7 @@ def test_compare_cl_and_py_cost_model(ctx_factory): # {{{ Construct cost models - cl_cost_model = QBXCostModel(actx) + cl_cost_model = QBXCostModel() python_cost_model = _PythonQBXCostModel() tree = geo_data.tree() @@ -115,25 +115,28 @@ def test_compare_cl_and_py_cost_model(ctx_factory): constant_one_params["p_fmm_lev%d" % ilevel] = 10 cl_cost_factors = cl_cost_model.qbx_cost_factors_for_kernels_from_model( - tree.nlevels, xlat_cost, constant_one_params + queue, tree.nlevels, xlat_cost, constant_one_params ) python_cost_factors = python_cost_model.qbx_cost_factors_for_kernels_from_model( - tree.nlevels, xlat_cost, constant_one_params + None, tree.nlevels, xlat_cost, constant_one_params ) # }}} # {{{ Test process_form_qbxl - cl_ndirect_sources_per_target_box = \ - cl_cost_model.get_ndirect_sources_per_target_box(geo_data_dev.traversal()) + cl_ndirect_sources_per_target_box = ( + cl_cost_model.get_ndirect_sources_per_target_box( + queue, geo_data_dev.traversal() + ) + ) queue.finish() start_time = time.time() cl_p2qbxl = cl_cost_model.process_form_qbxl( - geo_data_dev, cl_cost_factors["p2qbxl_cost"], + queue, geo_data_dev, cl_cost_factors["p2qbxl_cost"], cl_ndirect_sources_per_target_box ) @@ -142,13 +145,16 @@ def test_compare_cl_and_py_cost_model(ctx_factory): str(time.time() - start_time) )) - python_ndirect_sources_per_target_box = \ - python_cost_model.get_ndirect_sources_per_target_box(geo_data.traversal()) + python_ndirect_sources_per_target_box = ( + python_cost_model.get_ndirect_sources_per_target_box( + queue, geo_data.traversal() + ) + ) start_time = time.time() python_p2qbxl = python_cost_model.process_form_qbxl( - geo_data, python_cost_factors["p2qbxl_cost"], + queue, geo_data, python_cost_factors["p2qbxl_cost"], python_ndirect_sources_per_target_box ) @@ -166,7 +172,7 @@ def test_compare_cl_and_py_cost_model(ctx_factory): start_time = time.time() cl_m2qbxl = cl_cost_model.process_m2qbxl( - geo_data_dev, cl_cost_factors["m2qbxl_cost"] + queue, geo_data_dev, cl_cost_factors["m2qbxl_cost"] ) queue.finish() @@ -177,7 +183,7 @@ def test_compare_cl_and_py_cost_model(ctx_factory): start_time = time.time() python_m2qbxl = python_cost_model.process_m2qbxl( - geo_data, python_cost_factors["m2qbxl_cost"] + queue, geo_data, python_cost_factors["m2qbxl_cost"] ) logger.info("Python time for process_m2qbxl: {0}".format( @@ -194,7 +200,7 @@ def test_compare_cl_and_py_cost_model(ctx_factory): start_time = time.time() cl_l2qbxl = cl_cost_model.process_l2qbxl( - geo_data_dev, cl_cost_factors["l2qbxl_cost"] + queue, geo_data_dev, cl_cost_factors["l2qbxl_cost"] ) queue.finish() @@ -205,7 +211,7 @@ def test_compare_cl_and_py_cost_model(ctx_factory): start_time = time.time() python_l2qbxl = python_cost_model.process_l2qbxl( - geo_data, python_cost_factors["l2qbxl_cost"] + queue, geo_data, python_cost_factors["l2qbxl_cost"] ) logger.info("Python time for process_l2qbxl: {0}".format( @@ -222,7 +228,7 @@ def test_compare_cl_and_py_cost_model(ctx_factory): start_time = time.time() cl_eval_qbxl = cl_cost_model.process_eval_qbxl( - geo_data_dev, cl_cost_factors["qbxl2p_cost"] + queue, geo_data_dev, cl_cost_factors["qbxl2p_cost"] ) queue.finish() @@ -233,7 +239,7 @@ def test_compare_cl_and_py_cost_model(ctx_factory): start_time = time.time() python_eval_qbxl = python_cost_model.process_eval_qbxl( - geo_data, python_cost_factors["qbxl2p_cost"] + queue, geo_data, python_cost_factors["qbxl2p_cost"] ) logger.info("Python time for process_eval_qbxl: {0}".format( @@ -250,7 +256,7 @@ def test_compare_cl_and_py_cost_model(ctx_factory): start_time = time.time() cl_eval_target_specific_qbxl = cl_cost_model.process_eval_target_specific_qbxl( - geo_data_dev, cl_cost_factors["p2p_tsqbx_cost"], + queue, geo_data_dev, cl_cost_factors["p2p_tsqbx_cost"], cl_ndirect_sources_per_target_box ) @@ -263,7 +269,7 @@ def test_compare_cl_and_py_cost_model(ctx_factory): python_eval_target_specific_qbxl = \ python_cost_model.process_eval_target_specific_qbxl( - geo_data, python_cost_factors["p2p_tsqbx_cost"], + queue, geo_data, python_cost_factors["p2p_tsqbx_cost"], python_ndirect_sources_per_target_box ) @@ -388,7 +394,7 @@ def test_cost_model(ctx_factory, dim, use_target_specific_qbx, per_box): lpot_source = get_lpot_source(actx, dim).copy( _use_target_specific_qbx=use_target_specific_qbx, - cost_model=QBXCostModel(actx)) + cost_model=QBXCostModel()) places = GeometryCollection(lpot_source) density_discr = places.get_discretization(places.auto_source.geometry) @@ -703,7 +709,7 @@ def test_cost_model_correctness(ctx_factory, dim, off_surface, actx = PyOpenCLArrayContext(queue) cost_model = QBXCostModel( - actx, translation_cost_model_factory=OpCountingTranslationCostModel) + translation_cost_model_factory=OpCountingTranslationCostModel) lpot_source = get_lpot_source(actx, dim).copy( cost_model=cost_model, @@ -813,7 +819,7 @@ def test_cost_model_order_varying_by_level(ctx_factory): return 1 lpot_source = get_lpot_source(actx, 2).copy( - cost_model=QBXCostModel(actx), + cost_model=QBXCostModel(), fmm_level_to_order=level_to_order_constant) places = GeometryCollection(lpot_source) @@ -839,7 +845,7 @@ def test_cost_model_order_varying_by_level(ctx_factory): return metadata["nlevels"] - level lpot_source = get_lpot_source(actx, 2).copy( - cost_model=QBXCostModel(actx), + cost_model=QBXCostModel(), fmm_level_to_order=level_to_order_varying) places = GeometryCollection(lpot_source) -- GitLab From bfebb704e010d907e3313324574015ab625d5622 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Wed, 12 Aug 2020 21:58:48 -0700 Subject: [PATCH 49/55] Temporarily switch to custom boxtree branch for CI --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 783559b6..854183c5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,6 @@ git+https://github.com/inducer/modepy#egg=modepy git+https://github.com/inducer/pyopencl#egg=pyopencl git+https://github.com/inducer/islpy#egg=islpy git+https://github.com/inducer/loopy#egg=loo.py -git+https://github.com/inducer/boxtree#egg=boxtree git+https://github.com/gaohao95/boxtree@remove-queue-from-cost-model#egg=boxtree git+https://github.com/inducer/meshmode#egg=meshmode git+https://github.com/inducer/sumpy#egg=sumpy -- GitLab From 578b5a326d922a003ea405b170111eedd07fc983 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Thu, 13 Aug 2020 22:53:15 -0700 Subject: [PATCH 50/55] Bug fix --- pytential/qbx/cost.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 31b67b8d..9cdf7967 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -328,7 +328,7 @@ class AbstractQBXCostModel(AbstractFMMCostModel): }) if queue: - self.cost_factors_to_dev(cost_factors, queue) + cost_factors = self.cost_factors_to_dev(cost_factors, queue) return cost_factors -- GitLab From cff7decf6e63b0cbc8ff84d84700927483418f55 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sat, 15 Aug 2020 18:58:11 -0700 Subject: [PATCH 51/55] Switch boxtree requirement back to master branch --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 854183c5..b3a861ca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ git+https://github.com/inducer/modepy#egg=modepy git+https://github.com/inducer/pyopencl#egg=pyopencl git+https://github.com/inducer/islpy#egg=islpy git+https://github.com/inducer/loopy#egg=loo.py -git+https://github.com/gaohao95/boxtree@remove-queue-from-cost-model#egg=boxtree +git+https://github.com/inducer/boxtree#egg=boxtree git+https://github.com/inducer/meshmode#egg=meshmode git+https://github.com/inducer/sumpy#egg=sumpy git+https://github.com/inducer/pyfmmlib#egg=pyfmmlib -- GitLab From 8266eb965e255a3823c1287003e2d33a2d2e4d66 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 24 Aug 2020 23:30:09 -0700 Subject: [PATCH 52/55] Address reviewer's comments --- pytential/qbx/__init__.py | 2 +- pytential/qbx/cost.py | 35 ++++++++++++++++------------------- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index 2d64e141..a2ebfe8a 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -110,7 +110,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase): "use if possible". :arg cost_model: Either *None* or an object implementing the :class:`~pytential.qbx.cost.AbstractQBXCostModel` interface, used for - gathering modeled costs (experimental) + gathering modeled costs if provided (experimental) """ # {{{ argument processing diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 9cdf7967..1d49d3a1 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -49,7 +49,6 @@ if sys.version_info >= (3, 0): else: Template = partial(Template, strict_undefined=True, disable_unicode=True) - import logging logger = logging.getLogger(__name__) @@ -61,7 +60,7 @@ __doc__ = """ notice is removed. This module helps predict the running time of each step of QBX, as an extension of -the similar module *boxtree.cost* in boxtree. +the similar module :mod:`boxtree.cost` in boxtree. :class:`QBXTranslationCostModel` describes the translation or evaluation cost of a single operation. For example, *m2qbxl* describes the cost for translating a single @@ -72,8 +71,8 @@ kernel-specific calibration parameter to compute the total cost of each step of in each box. :class:`QBXCostModel` is one implementation of :class:`AbstractQBXCostModel` using OpenCL. -:file:`examples/cost.py` demostrates how the calibration and evaluation are -performed. +:file:`examples/cost.py` in the source distribution demonstrates how the calibration +and evaluation are performed. Translation Cost of a Single Operation ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -305,8 +304,9 @@ class AbstractQBXCostModel(AbstractFMMCostModel): using this queue. :arg nlevels: the number of tree levels. :arg xlat_cost: a :class:`QBXTranslationCostModel`. - :arg context: a :class:`dict` of parameters passed as context when - evaluating symbolic expressions in *xlat_cost*. + :arg context: a :class:`dict` mapping from the symbolic names of parameters + to their values, serving as context when evaluating symbolic expressions + in *xlat_cost*. :return: a :class:`dict`, the translation cost of each step in FMM and QBX. """ cost_factors = self.fmm_cost_factors_for_kernels_from_model( @@ -507,21 +507,17 @@ class AbstractQBXCostModel(AbstractFMMCostModel): @staticmethod def get_unit_calibration_params(): - return dict( - c_l2l=1.0, - c_l2p=1.0, - c_m2l=1.0, - c_m2m=1.0, - c_m2p=1.0, - c_p2l=1.0, - c_p2m=1.0, - c_p2p=1.0, + calibration_params = AbstractFMMCostModel.get_unit_calibration_params() + + calibration_params.update(dict( c_p2qbxl=1.0, c_p2p_tsqbx=1.0, c_qbxl2p=1.0, c_m2qbxl=1.0, c_l2qbxl=1.0 - ) + )) + + return calibration_params _QBX_STAGE_TO_CALIBRATION_PARAMETER = { "form_global_qbx_locals": "c_p2qbxl", @@ -553,7 +549,7 @@ class AbstractQBXCostModel(AbstractFMMCostModel): :arg timing_results: a :class:`list` of timing data. Each timing data can be obtained from `BoundExpression.eval`. :arg time_field_name: a :class:`str`, the field name from the timing result. - Usually this can be "wall_elapsed" or "process_elapsed". + Usually this can be ``"wall_elapsed"`` or ``"process_elapsed"``. :return: a :class:`dict` which maps kernels to calibration parameters. """ cost_per_kernel = {} @@ -830,7 +826,9 @@ class _PythonQBXCostModel(AbstractQBXCostModel, _PythonFMMCostModel): def __init__( self, translation_cost_model_factory=make_pde_aware_translation_cost_model): - """ + """This cost model is a redundant implementation used for testing. It should + not be used outside of tests for :mod:`pytential`. + :arg translation_cost_model_factory: a function, which takes tree dimension and the number of tree levels as arguments, returns an object of :class:`TranslationCostModel`. @@ -942,7 +940,6 @@ class _PythonQBXCostModel(AbstractQBXCostModel, _PythonFMMCostModel): if not isinstance(geo_data, ToHostTransferredGeoDataWrapper): assert isinstance(geo_data, QBXFMMGeometryData) geo_data = ToHostTransferredGeoDataWrapper(queue, geo_data) - queue.finish() return AbstractQBXCostModel.qbx_cost_per_box( self, queue, geo_data, kernel, kernel_arguments, calibration_params -- GitLab From d0315092e81ba2784896e4031ca51d5852292f8d Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Tue, 25 Aug 2020 08:22:53 -0700 Subject: [PATCH 53/55] Minor documentation tweaks [ci skip] --- pytential/qbx/cost.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 1d49d3a1..04838c76 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -307,7 +307,8 @@ class AbstractQBXCostModel(AbstractFMMCostModel): :arg context: a :class:`dict` mapping from the symbolic names of parameters to their values, serving as context when evaluating symbolic expressions in *xlat_cost*. - :return: a :class:`dict`, the translation cost of each step in FMM and QBX. + :return: a :class:`dict`, mapping from stage names to the translation costs + of those stages in FMM and QBX. """ cost_factors = self.fmm_cost_factors_for_kernels_from_model( queue, nlevels, xlat_cost, context -- GitLab From 047a9dd37dbd78a8e057dbddf1269e18f87f055b Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Tue, 25 Aug 2020 23:50:08 -0700 Subject: [PATCH 54/55] More documentation tweaks --- pytential/qbx/cost.py | 75 ++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 40 deletions(-) diff --git a/pytential/qbx/cost.py b/pytential/qbx/cost.py index 04838c76..6071dea5 100644 --- a/pytential/qbx/cost.py +++ b/pytential/qbx/cost.py @@ -191,7 +191,7 @@ def make_taylor_translation_cost_model(dim, nlevels): # }}} -# {{{ cost model +# {{{ abstract cost model class AbstractQBXCostModel(AbstractFMMCostModel): """An interface to obtain both QBX operation counts and calibrated (e.g. in @@ -213,61 +213,57 @@ class AbstractQBXCostModel(AbstractFMMCostModel): ndirect_sources_per_target_box): """ :arg queue: a :class:`pyopencl.CommandQueue` object. - :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object or - similar object in the host memory. + :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object. :arg p2qbxl_cost: a :class:`numpy.float64` constant representing the cost of adding a source to a QBX local expansion. :arg ndirect_sources_per_target_box: a :class:`numpy.ndarray` or - :class:`pyopencl.array.Array` of shape (ntarget_boxes,), with the ith - entry representing the number of direct evaluation sources (list 1, - list 3 close and list 4 close) for target_boxes[i]. + :class:`pyopencl.array.Array` of shape ``(ntarget_boxes,)``, with the + *i*th entry representing the number of direct evaluation sources (list 1, + list 3 close and list 4 close) for ``target_boxes[i]``. :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape - (ntarget_boxes,), with the ith entry representing the cost of adding all - direct evaluation sources to QBX local expansions of centers in - target_boxes[i]. + ``(ntarget_boxes,)``, with the *i*th entry representing the cost of + adding all direct evaluation sources to QBX local expansions of centers + in ``target_boxes[i]``. """ pass @abstractmethod def process_m2qbxl(self, queue, geo_data, m2qbxl_cost): """ - :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object or - similar object in the host memory. + :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object. :arg m2qbxl_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` - of shape (nlevels,) where the ith entry represents the translation cost - from multipole expansion at level i to a QBX center. + of shape ``(nlevels,)`` where the *i*th entry represents the translation + cost from multipole expansion at level *i* to a QBX center. :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape - (ntarget_boxes,), with the ith entry representing the cost of translating - multipole expansions of list 3 boxes at all source levels to all QBX - centers in target_boxes[i]. + ``(ntarget_boxes,)``, with the *i*th entry representing the cost of + translating multipole expansions of list 3 boxes at all source levels to + all QBX centers in ``target_boxes[i]``. """ pass @abstractmethod def process_l2qbxl(self, queue, geo_data, l2qbxl_cost): """ - :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object or - similar object in the host memory. + :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object. :arg l2qbxl_cost: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` - of shape (nlevels,) where the ith entry represents the translation cost - from a box local expansion to a QBX local expansion. + of shape ``(nlevels,)`` where each entry represents the translation + cost from a box local expansion to a QBX local expansion. :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape - (ntarget_boxes,), with the ith entry representing the cost of translating - box local expansions to all QBX local expansions. + ``(ntarget_boxes,)``, with each entry representing the cost of + translating box local expansions to all QBX local expansions. """ pass @abstractmethod def process_eval_qbxl(self, queue, geo_data, qbxl2p_cost): """ - :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object or - similar object in the host memory. + :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object. :arg qbxl2p_cost: a :class:`numpy.float64` constant, representing the evaluation cost of a target from its QBX local expansion. :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape - (ntarget_boxes,), with the ith entry representing the cost of evaluating - all targets associated with QBX centers in target_boxes[i] from QBX local - expansions. + ``(ntarget_boxes,)``, with the *i*th entry representing the cost of + evaluating all targets associated with QBX centers in ``target_boxes[i]`` + from QBX local expansions. """ pass @@ -275,19 +271,18 @@ class AbstractQBXCostModel(AbstractFMMCostModel): def process_eval_target_specific_qbxl(self, queue, geo_data, p2p_tsqbx_cost, ndirect_sources_per_target_box): """ - :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object or - similar object in the host memory. + :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object. :arg p2p_tsqbx_cost: a :class:`numpy.float64` constant representing the evaluation cost of a target from a direct evaluation source of the target box containing the expansion center. :arg ndirect_sources_per_target_box: a :class:`numpy.ndarray` or - :class:`pyopencl.array.Array` of shape (ntarget_boxes,), with the ith - entry representing the number of direct evaluation sources (list 1, - list 3 close and list 4 close) for target_boxes[i]. + :class:`pyopencl.array.Array` of shape ``(ntarget_boxes,)``, with the + *i*th entry representing the number of direct evaluation sources + (list 1, list 3 close and list 4 close) for ``target_boxes[i]``. :return: a :class:`numpy.ndarray` or :class:`pyopencl.array.Array` of shape - (ntarget_boxes,), with the ith entry representing the evaluation cost of - all targets associated with centers in target_boxes[i] from the direct - evaluation sources of target_boxes[i]. + ``(ntarget_boxes,)``, with the *i*th entry representing the evaluation + cost of all targets associated with centers in ``target_boxes[i]`` from + the direct evaluation sources of ``target_boxes[i]``. """ pass @@ -661,12 +656,12 @@ class QBXCostModel(AbstractQBXCostModel, FMMCostModel): """ :arg queue: a :class:`pyopencl.CommandQueue` object. :arg geo_data: a :class:`pytential.qbx.geometry.QBXFMMGeometryData` object. - :arg weights: a :class:`pyopencl.array.Array` of shape (ncenters,) with + :arg weights: a :class:`pyopencl.array.Array` of shape ``(ncenters,)`` with particle_id_dtype, the weight of each center in user order. - :return: a :class:`pyopencl.array.Array` of shape (ntarget_boxes,) with type - *particle_id_dtype* where the ith entry represents the number of - `geo_data.global_qbx_centers` in target_boxes[i], optionally weighted by - *weights*. + :return: a :class:`pyopencl.array.Array` of shape ``(ntarget_boxes,)`` with + type *particle_id_dtype* where the *i*th entry represents the number of + `geo_data.global_qbx_centers` in ``target_boxes[i]``, optionally weighted + by *weights*. """ traversal = geo_data.traversal() tree = geo_data.tree() -- GitLab From 4f89b76bb41b7fbaddcaca26c11dd821a58d3eb6 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 30 Aug 2020 17:21:19 -0700 Subject: [PATCH 55/55] Set global logging level inside __main__ --- test/test_cost_model.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/test_cost_model.py b/test/test_cost_model.py index b6de4445..71beb449 100644 --- a/test/test_cost_model.py +++ b/test/test_cost_model.py @@ -48,10 +48,7 @@ from pytential.qbx.cost import ( import time import logging -import os -logging.basicConfig(level=os.environ.get("LOGLEVEL", "WARNING")) logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) # {{{ Compare the time and result of OpenCL implementation and Python implementation @@ -867,8 +864,15 @@ def test_cost_model_order_varying_by_level(ctx_factory): # You can test individual routines by typing # $ python test_cost_model.py 'test_routine()' +# You can specify the log level by setting 'LOGLEVEL' enviroment variable, for +# example +# $ LOGLEVEL=INFO python test_cost_model.py 'test_compare_cl_and_py_cost_model( +# $ cl.create_some_context)' if __name__ == "__main__": + import os + logging.basicConfig(level=os.environ.get("LOGLEVEL", "WARNING")) + import sys if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab