diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 750bf6f4cc5018fc14c9195ec688d45bea21d129..6b6cf7a7e311ae56d81c948859b6ad841fec0c01 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,12 +1,19 @@ -Python 3.5 POCL: +# Environment variables +# +# * PYTEST_ADDOPTS is used to filter test runs. The default value is "-k-slowtest", +# which skips the slow running tests. +# * SKIP_EXAMPLES, if non-empty, can be used to skip the examples job. + +Python 2.7 POCL: script: - - export PY_EXE=python3.5 + - export PY_EXE=python2.7 - export PYOPENCL_TEST=portable + - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} - export EXTRA_INSTALL="numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: - - python3.5 + - python2.7 - pocl - large-node except: @@ -16,6 +23,7 @@ Python 3.6 POCL: script: - export PY_EXE=python3.6 - export PYOPENCL_TEST=portable + - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} - export EXTRA_INSTALL="numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" @@ -28,6 +36,7 @@ Python 3.6 POCL: Python 3.6 POCL Examples: script: + - test -n "$SKIP_EXAMPLES" && exit - export PY_EXE=python3.6 - export PYOPENCL_TEST=portable - export EXTRA_INSTALL="numpy mako pyvisfile matplotlib" @@ -43,8 +52,9 @@ Python 3.6 POCL Examples: Python 3.5 Conda: script: - export SUMPY_FORCE_SYMBOLIC_BACKEND=symengine - - CONDA_ENVIRONMENT=.test-conda-env-py3.yml - - REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt + - export CONDA_ENVIRONMENT=.test-conda-env-py3.yml + - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} + - export REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - ". ./build-and-test-py-project-within-miniconda.sh" tags: @@ -53,27 +63,13 @@ Python 3.5 Conda: except: - tags -Python 2.7 POCL: - script: - - export PY_EXE=python2.7 - - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="numpy mako" - - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - - ". ./build-and-test-py-project.sh" - tags: - - python2.7 - - pocl - - large-node - except: - - tags - Python 3.5 Conda Apple: script: - export LC_ALL=en_US.UTF-8 - export LANG=en_US.UTF-8 - - export PYTEST_ADDOPTS=-k-slowtest - - CONDA_ENVIRONMENT=.test-conda-env-py3-macos.yml - - REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt + - export CONDA_ENVIRONMENT=.test-conda-env-py3-macos.yml + - export PYTEST_ADDOPTS=${PYTEST_ADDOPTS:--k-slowtest} + - export REQUIREMENTS_TXT=.test-conda-env-py3-requirements.txt - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - ". ./build-and-test-py-project-within-miniconda.sh" tags: diff --git a/pytential/qbx/fmm.py b/pytential/qbx/fmm.py index 4be9e5ca093b5890024f92fb1307ebf42e4fb5f5..4cfd0c816f0e35542a3def17073eaa5f9850b08a 100644 --- a/pytential/qbx/fmm.py +++ b/pytential/qbx/fmm.py @@ -521,397 +521,4 @@ def drive_fmm(expansion_wrangler, src_weights): # }}} -# {{{ performance data - -def assemble_performance_data(geo_data, uses_pde_expansions, - translation_source_power=None, translation_target_power=None, - translation_max_power=None, - summarize_parallel=None, merge_close_lists=True): - """ - :arg uses_pde_expansions: A :class:`bool` indicating whether the FMM - uses translation operators that make use of the knowledge that the - potential satisfies a PDE. - :arg summarize_parallel: a function of two arguments - *(parallel_array, sym_multipliers)* used to process an array of - workloads of 'parallelizable units'. By default, all workloads are - summed into one number encompassing the total workload. - :arg merge_close_lists: A :class:`bool` indicating whether or not all - boxes requiring direct evaluation should be merged into a single - interaction list. If *False*, *part_direct* and *p2qbxl* will be - suffixed with the originating list as follows: - - * *_neighbor* (List 1) - * *_sep_smaller* (List 3 close) - * *_sep_bigger* (List 4 close). - """ - - # FIXME: This should suport target filtering. - - if summarize_parallel is None: - def summarize_parallel(parallel_array, sym_multipliers): - return np.sum(parallel_array) * sym_multipliers - - from collections import OrderedDict - result = OrderedDict() - - from pymbolic import var - p_fmm = var("p_fmm") - p_qbx = var("p_qbx") - - nqbtl = geo_data.non_qbx_box_target_lists() - - with cl.CommandQueue(geo_data.cl_context) as queue: - tree = geo_data.tree().get(queue=queue) - traversal = geo_data.traversal(merge_close_lists).get(queue=queue) - box_target_counts_nonchild = ( - nqbtl.box_target_counts_nonchild.get(queue=queue)) - - d = tree.dimensions - if uses_pde_expansions: - ncoeffs_fmm = p_fmm ** (d-1) - ncoeffs_qbx = p_qbx ** (d-1) - - if d == 2: - default_translation_source_power = 1 - default_translation_target_power = 1 - default_translation_max_power = 0 - - elif d == 3: - # Based on a reading of FMMlib, i.e. a point-and-shoot FMM. - default_translation_source_power = 0 - default_translation_target_power = 0 - default_translation_max_power = 3 - - else: - raise ValueError("Don't know how to estimate expansion complexities " - "for dimension %d" % d) - - else: - ncoeffs_fmm = p_fmm ** d - ncoeffs_qbx = p_qbx ** d - default_translation_source_power = d - default_translation_target_power = d - - if translation_source_power is None: - translation_source_power = default_translation_source_power - if translation_target_power is None: - translation_target_power = default_translation_target_power - if translation_max_power is None: - translation_max_power = default_translation_max_power - - def xlat_cost(p_source, p_target): - from pymbolic.primitives import Max - return ( - p_source ** translation_source_power - * p_target ** translation_target_power - * Max((p_source, p_target)) ** translation_max_power - ) - - result.update( - nlevels=tree.nlevels, - nboxes=tree.nboxes, - nsources=tree.nsources, - ntargets=tree.ntargets) - - # {{{ construct local multipoles - - result["form_mp"] = tree.nsources*ncoeffs_fmm - - # }}} - - # {{{ propagate multipoles upward - - result["prop_upward"] = tree.nboxes * xlat_cost(p_fmm, p_fmm) - - # }}} - - # {{{ direct evaluation to point targets (lists 1, 3 close, 4 close) - - def process_direct(): - # box -> nsources * ntargets - npart_direct_list1 = np.zeros(len(traversal.target_boxes), dtype=np.intp) - npart_direct_list3 = np.zeros(len(traversal.target_boxes), dtype=np.intp) - npart_direct_list4 = np.zeros(len(traversal.target_boxes), dtype=np.intp) - - for itgt_box, tgt_ibox in enumerate(traversal.target_boxes): - ntargets = box_target_counts_nonchild[tgt_ibox] - - npart_direct_list1_srcs = 0 - start, end = traversal.neighbor_source_boxes_starts[itgt_box:itgt_box+2] - for src_ibox in traversal.neighbor_source_boxes_lists[start:end]: - nsources = tree.box_source_counts_nonchild[src_ibox] - - npart_direct_list1_srcs += nsources - - npart_direct_list1[itgt_box] = ntargets * npart_direct_list1_srcs - - if merge_close_lists: - continue - - npart_direct_list3_srcs = 0 - - # Could be None, if not using targets with extent. - if traversal.from_sep_close_smaller_starts is not None: - start, end = ( - traversal.from_sep_close_smaller_starts[itgt_box:itgt_box+2]) - for src_ibox in traversal.from_sep_close_smaller_lists[start:end]: - nsources = tree.box_source_counts_nonchild[src_ibox] - - npart_direct_list3_srcs += nsources - - npart_direct_list3[itgt_box] = ntargets * npart_direct_list3_srcs - - npart_direct_list4_srcs = 0 - - # Could be None, if not using targets with extent. - if traversal.from_sep_close_bigger_starts is not None: - start, end = ( - traversal.from_sep_close_bigger_starts[itgt_box:itgt_box+2]) - for src_ibox in traversal.from_sep_close_bigger_lists[start:end]: - nsources = tree.box_source_counts_nonchild[src_ibox] - - npart_direct_list4_srcs += nsources - - npart_direct_list4[itgt_box] = ntargets * npart_direct_list4_srcs - - if merge_close_lists: - result["part_direct"] = summarize_parallel(npart_direct_list1, 1) - else: - result["part_direct_neighbor"] = ( - summarize_parallel(npart_direct_list1, 1)) - result["part_direct_sep_smaller"] = ( - summarize_parallel(npart_direct_list3, 1)) - result["part_direct_sep_bigger"] = ( - summarize_parallel(npart_direct_list4, 1)) - - process_direct() - - # }}} - - # {{{ translate separated siblings' ("list 2") mpoles to local - - def process_list2(): - nm2l = np.zeros(len(traversal.target_or_target_parent_boxes), dtype=np.intp) - - for itgt_box, tgt_ibox in enumerate(traversal.target_or_target_parent_boxes): - start, end = traversal.from_sep_siblings_starts[itgt_box:itgt_box+2] - - nm2l[itgt_box] += end-start - - result["m2l"] = summarize_parallel(nm2l, xlat_cost(p_fmm, p_fmm)) - - process_list2() - - # }}} - - # {{{ evaluate sep. smaller mpoles ("list 3") at particles - - def process_list3(): - nmp_eval = np.zeros( - (tree.nlevels, len(traversal.target_boxes)), - dtype=np.intp) - - assert tree.nlevels == len(traversal.from_sep_smaller_by_level) - - for ilevel, sep_smaller_list in enumerate( - traversal.from_sep_smaller_by_level): - for itgt_box, tgt_ibox in enumerate( - traversal.target_boxes_sep_smaller_by_source_level[ilevel]): - ntargets = box_target_counts_nonchild[tgt_ibox] - start, end = sep_smaller_list.starts[itgt_box:itgt_box+2] - nmp_eval[ilevel, sep_smaller_list.nonempty_indices[itgt_box]] = ( - ntargets * (end-start) - ) - - result["mp_eval"] = summarize_parallel(nmp_eval, ncoeffs_fmm) - - process_list3() - - # }}} - - # {{{ form locals for separated bigger source boxes ("list 4") - - def process_list4(): - nform_local = np.zeros( - len(traversal.target_or_target_parent_boxes), - dtype=np.intp) - - for itgt_box, tgt_ibox in enumerate(traversal.target_or_target_parent_boxes): - start, end = traversal.from_sep_bigger_starts[itgt_box:itgt_box+2] - - nform_local_box = 0 - for src_ibox in traversal.from_sep_bigger_lists[start:end]: - nsources = tree.box_source_counts_nonchild[src_ibox] - - nform_local_box += nsources - - nform_local[itgt_box] = nform_local_box - - result["form_local"] = summarize_parallel(nform_local, ncoeffs_fmm) - - process_list4() - - # }}} - - # {{{ propagate local_exps downward - - result["prop_downward"] = tree.nboxes * xlat_cost(p_fmm, p_fmm) - - # }}} - - # {{{ evaluate locals - - result["eval_part"] = tree.ntargets * ncoeffs_fmm - - # }}} - - # {{{ form global qbx locals - - global_qbx_centers = geo_data.global_qbx_centers() - - # If merge_close_lists is False above, then this builds another traversal - # (which is OK). - qbx_center_to_target_box = geo_data.qbx_center_to_target_box() - center_to_targets_starts = geo_data.center_to_tree_targets().starts - qbx_center_to_target_box_source_level = np.empty( - (tree.nlevels,), dtype=object - ) - - for src_level in range(tree.nlevels): - qbx_center_to_target_box_source_level[src_level] = ( - geo_data.qbx_center_to_target_box_source_level(src_level) - ) - - with cl.CommandQueue(geo_data.cl_context) as queue: - global_qbx_centers = global_qbx_centers.get( - queue=queue) - qbx_center_to_target_box = qbx_center_to_target_box.get( - queue=queue) - center_to_targets_starts = center_to_targets_starts.get( - queue=queue) - for src_level in range(tree.nlevels): - qbx_center_to_target_box_source_level[src_level] = ( - qbx_center_to_target_box_source_level[src_level].get(queue=queue) - ) - - def process_form_qbxl(): - ncenters = geo_data.ncenters - - result["ncenters"] = ncenters - - # center -> nsources - np2qbxl_list1 = np.zeros(len(global_qbx_centers), dtype=np.intp) - np2qbxl_list3 = np.zeros(len(global_qbx_centers), dtype=np.intp) - np2qbxl_list4 = np.zeros(len(global_qbx_centers), dtype=np.intp) - - for itgt_center, tgt_icenter in enumerate(global_qbx_centers): - itgt_box = qbx_center_to_target_box[tgt_icenter] - - np2qbxl_list1_srcs = 0 - start, end = traversal.neighbor_source_boxes_starts[itgt_box:itgt_box+2] - for src_ibox in traversal.neighbor_source_boxes_lists[start:end]: - nsources = tree.box_source_counts_nonchild[src_ibox] - - np2qbxl_list1_srcs += nsources - - np2qbxl_list1[itgt_center] = np2qbxl_list1_srcs - - if merge_close_lists: - continue - - np2qbxl_list3_srcs = 0 - - # Could be None, if not using targets with extent. - if traversal.from_sep_close_smaller_starts is not None: - start, end = ( - traversal.from_sep_close_smaller_starts[itgt_box:itgt_box+2]) - for src_ibox in traversal.from_sep_close_smaller_lists[start:end]: - nsources = tree.box_source_counts_nonchild[src_ibox] - - np2qbxl_list3_srcs += nsources - - np2qbxl_list3[itgt_center] = np2qbxl_list3_srcs - - np2qbxl_list4_srcs = 0 - - # Could be None, if not using targets with extent. - if traversal.from_sep_close_bigger_starts is not None: - start, end = ( - traversal.from_sep_close_bigger_starts[itgt_box:itgt_box+2]) - for src_ibox in traversal.from_sep_close_bigger_lists[start:end]: - nsources = tree.box_source_counts_nonchild[src_ibox] - - np2qbxl_list4_srcs += nsources - - np2qbxl_list4[itgt_center] = np2qbxl_list4_srcs - - if merge_close_lists: - result["p2qbxl"] = summarize_parallel(np2qbxl_list1, ncoeffs_qbx) - else: - result["p2qbxl_neighbor"] = ( - summarize_parallel(np2qbxl_list1, ncoeffs_qbx)) - result["p2qbxl_sep_smaller"] = ( - summarize_parallel(np2qbxl_list3, ncoeffs_qbx)) - result["p2qbxl_sep_bigger"] = ( - summarize_parallel(np2qbxl_list4, ncoeffs_qbx)) - - process_form_qbxl() - - # }}} - - # {{{ translate from list 3 multipoles to qbx local expansions - - def process_m2qbxl(): - nm2qbxl = np.zeros( - (tree.nlevels, len(global_qbx_centers)), - dtype=np.intp) - - assert tree.nlevels == len(traversal.from_sep_smaller_by_level) - - for isrc_level, ssn in enumerate(traversal.from_sep_smaller_by_level): - - for itgt_center, tgt_icenter in enumerate(global_qbx_centers): - icontaining_tgt_box = qbx_center_to_target_box_source_level[ - isrc_level][tgt_icenter] - - if icontaining_tgt_box == -1: - continue - - start, stop = ( - ssn.starts[icontaining_tgt_box], - ssn.starts[icontaining_tgt_box+1]) - - nm2qbxl[isrc_level, itgt_center] += stop-start - - result["m2qbxl"] = summarize_parallel(nm2qbxl, xlat_cost(p_fmm, p_qbx)) - - process_m2qbxl() - - # }}} - - # {{{ translate from box local expansions to qbx local expansions - - result["l2qbxl"] = geo_data.ncenters * xlat_cost(p_fmm, p_qbx) - - # }}} - - # {{{ evaluate qbx local expansions - - def process_eval_qbxl(): - nqbx_eval = np.zeros(len(global_qbx_centers), dtype=np.intp) - - for isrc_center, src_icenter in enumerate(global_qbx_centers): - start, end = center_to_targets_starts[src_icenter:src_icenter+2] - nqbx_eval[isrc_center] += end-start - - result["qbxl2p"] = summarize_parallel(nqbx_eval, ncoeffs_qbx) - - process_eval_qbxl() - - # }}} - - return result - -# }}} - # vim: foldmethod=marker diff --git a/pytential/qbx/performance.py b/pytential/qbx/performance.py new file mode 100644 index 0000000000000000000000000000000000000000..6442a475cd477287077ae7b0aaa2732d801d03a2 --- /dev/null +++ b/pytential/qbx/performance.py @@ -0,0 +1,434 @@ +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2013 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +from six.moves import range +import numpy as np # noqa +import pyopencl as cl # noqa +import pyopencl.array # noqa + + +import logging +logger = logging.getLogger(__name__) + + +__doc__ = """ +.. autofunction:: assemble_performance_data +""" + + +# {{{ assemble_performance_data + +def assemble_performance_data(geo_data, uses_pde_expansions, + translation_source_power=None, translation_target_power=None, + translation_max_power=None, + summarize_parallel=None, merge_close_lists=True): + """ + :arg uses_pde_expansions: A :class:`bool` indicating whether the FMM + uses translation operators that make use of the knowledge that the + potential satisfies a PDE. + :arg summarize_parallel: a function of two arguments + *(parallel_array, sym_multipliers)* used to process an array of + workloads of 'parallelizable units'. By default, all workloads are + summed into one number encompassing the total workload. + :arg merge_close_lists: A :class:`bool` indicating whether or not all + boxes requiring direct evaluation should be merged into a single + interaction list. If *False*, *part_direct* and *p2qbxl* will be + suffixed with the originating list as follows: + + * *_neighbor* (List 1) + * *_sep_smaller* (List 3 close) + * *_sep_bigger* (List 4 close). + """ + + # FIXME: This should suport target filtering. + + if summarize_parallel is None: + def summarize_parallel(parallel_array, sym_multipliers): + return np.sum(parallel_array) * sym_multipliers + + from collections import OrderedDict + result = OrderedDict() + + from pymbolic import var + p_fmm = var("p_fmm") + p_qbx = var("p_qbx") + + nqbtl = geo_data.non_qbx_box_target_lists() + + with cl.CommandQueue(geo_data.cl_context) as queue: + tree = geo_data.tree().get(queue=queue) + traversal = geo_data.traversal(merge_close_lists).get(queue=queue) + box_target_counts_nonchild = ( + nqbtl.box_target_counts_nonchild.get(queue=queue)) + + d = tree.dimensions + if uses_pde_expansions: + ncoeffs_fmm = p_fmm ** (d-1) + ncoeffs_qbx = p_qbx ** (d-1) + + if d == 2: + default_translation_source_power = 1 + default_translation_target_power = 1 + default_translation_max_power = 0 + + elif d == 3: + # Based on a reading of FMMlib, i.e. a point-and-shoot FMM. + default_translation_source_power = 0 + default_translation_target_power = 0 + default_translation_max_power = 3 + + else: + raise ValueError("Don't know how to estimate expansion complexities " + "for dimension %d" % d) + + else: + ncoeffs_fmm = p_fmm ** d + ncoeffs_qbx = p_qbx ** d + default_translation_source_power = d + default_translation_target_power = d + + if translation_source_power is None: + translation_source_power = default_translation_source_power + if translation_target_power is None: + translation_target_power = default_translation_target_power + if translation_max_power is None: + translation_max_power = default_translation_max_power + + def xlat_cost(p_source, p_target): + from pymbolic.primitives import Max + return ( + p_source ** translation_source_power + * p_target ** translation_target_power + * Max((p_source, p_target)) ** translation_max_power + ) + + result.update( + nlevels=tree.nlevels, + nboxes=tree.nboxes, + nsources=tree.nsources, + ntargets=tree.ntargets) + + # {{{ construct local multipoles + + result["form_mp"] = tree.nsources*ncoeffs_fmm + + # }}} + + # {{{ propagate multipoles upward + + result["prop_upward"] = tree.nboxes * xlat_cost(p_fmm, p_fmm) + + # }}} + + # {{{ direct evaluation to point targets (lists 1, 3 close, 4 close) + + def process_direct(): + # box -> nsources * ntargets + npart_direct_list1 = np.zeros(len(traversal.target_boxes), dtype=np.intp) + npart_direct_list3 = np.zeros(len(traversal.target_boxes), dtype=np.intp) + npart_direct_list4 = np.zeros(len(traversal.target_boxes), dtype=np.intp) + + for itgt_box, tgt_ibox in enumerate(traversal.target_boxes): + ntargets = box_target_counts_nonchild[tgt_ibox] + + npart_direct_list1_srcs = 0 + start, end = traversal.neighbor_source_boxes_starts[itgt_box:itgt_box+2] + for src_ibox in traversal.neighbor_source_boxes_lists[start:end]: + nsources = tree.box_source_counts_nonchild[src_ibox] + + npart_direct_list1_srcs += nsources + + npart_direct_list1[itgt_box] = ntargets * npart_direct_list1_srcs + + if merge_close_lists: + continue + + npart_direct_list3_srcs = 0 + + # Could be None, if not using targets with extent. + if traversal.from_sep_close_smaller_starts is not None: + start, end = ( + traversal.from_sep_close_smaller_starts[itgt_box:itgt_box+2]) + for src_ibox in traversal.from_sep_close_smaller_lists[start:end]: + nsources = tree.box_source_counts_nonchild[src_ibox] + + npart_direct_list3_srcs += nsources + + npart_direct_list3[itgt_box] = ntargets * npart_direct_list3_srcs + + npart_direct_list4_srcs = 0 + + # Could be None, if not using targets with extent. + if traversal.from_sep_close_bigger_starts is not None: + start, end = ( + traversal.from_sep_close_bigger_starts[itgt_box:itgt_box+2]) + for src_ibox in traversal.from_sep_close_bigger_lists[start:end]: + nsources = tree.box_source_counts_nonchild[src_ibox] + + npart_direct_list4_srcs += nsources + + npart_direct_list4[itgt_box] = ntargets * npart_direct_list4_srcs + + if merge_close_lists: + result["part_direct"] = summarize_parallel(npart_direct_list1, 1) + else: + result["part_direct_neighbor"] = ( + summarize_parallel(npart_direct_list1, 1)) + result["part_direct_sep_smaller"] = ( + summarize_parallel(npart_direct_list3, 1)) + result["part_direct_sep_bigger"] = ( + summarize_parallel(npart_direct_list4, 1)) + + process_direct() + + # }}} + + # {{{ translate separated siblings' ("list 2") mpoles to local + + def process_list2(): + nm2l = np.zeros(len(traversal.target_or_target_parent_boxes), dtype=np.intp) + + for itgt_box, tgt_ibox in enumerate(traversal.target_or_target_parent_boxes): + start, end = traversal.from_sep_siblings_starts[itgt_box:itgt_box+2] + + nm2l[itgt_box] += end-start + + result["m2l"] = summarize_parallel(nm2l, xlat_cost(p_fmm, p_fmm)) + + process_list2() + + # }}} + + # {{{ evaluate sep. smaller mpoles ("list 3") at particles + + def process_list3(): + nmp_eval = np.zeros( + (tree.nlevels, len(traversal.target_boxes)), + dtype=np.intp) + + assert tree.nlevels == len(traversal.from_sep_smaller_by_level) + + for ilevel, sep_smaller_list in enumerate( + traversal.from_sep_smaller_by_level): + for itgt_box, tgt_ibox in enumerate( + traversal.target_boxes_sep_smaller_by_source_level[ilevel]): + ntargets = box_target_counts_nonchild[tgt_ibox] + start, end = sep_smaller_list.starts[itgt_box:itgt_box+2] + nmp_eval[ilevel, sep_smaller_list.nonempty_indices[itgt_box]] = ( + ntargets * (end-start) + ) + + result["mp_eval"] = summarize_parallel(nmp_eval, ncoeffs_fmm) + + process_list3() + + # }}} + + # {{{ form locals for separated bigger source boxes ("list 4") + + def process_list4(): + nform_local = np.zeros( + len(traversal.target_or_target_parent_boxes), + dtype=np.intp) + + for itgt_box, tgt_ibox in enumerate(traversal.target_or_target_parent_boxes): + start, end = traversal.from_sep_bigger_starts[itgt_box:itgt_box+2] + + nform_local_box = 0 + for src_ibox in traversal.from_sep_bigger_lists[start:end]: + nsources = tree.box_source_counts_nonchild[src_ibox] + + nform_local_box += nsources + + nform_local[itgt_box] = nform_local_box + + result["form_local"] = summarize_parallel(nform_local, ncoeffs_fmm) + + process_list4() + + # }}} + + # {{{ propagate local_exps downward + + result["prop_downward"] = tree.nboxes * xlat_cost(p_fmm, p_fmm) + + # }}} + + # {{{ evaluate locals + + result["eval_part"] = tree.ntargets * ncoeffs_fmm + + # }}} + + # {{{ form global qbx locals + + global_qbx_centers = geo_data.global_qbx_centers() + + # If merge_close_lists is False above, then this builds another traversal + # (which is OK). + qbx_center_to_target_box = geo_data.qbx_center_to_target_box() + center_to_targets_starts = geo_data.center_to_tree_targets().starts + qbx_center_to_target_box_source_level = np.empty( + (tree.nlevels,), dtype=object + ) + + for src_level in range(tree.nlevels): + qbx_center_to_target_box_source_level[src_level] = ( + geo_data.qbx_center_to_target_box_source_level(src_level) + ) + + with cl.CommandQueue(geo_data.cl_context) as queue: + global_qbx_centers = global_qbx_centers.get( + queue=queue) + qbx_center_to_target_box = qbx_center_to_target_box.get( + queue=queue) + center_to_targets_starts = center_to_targets_starts.get( + queue=queue) + for src_level in range(tree.nlevels): + qbx_center_to_target_box_source_level[src_level] = ( + qbx_center_to_target_box_source_level[src_level].get(queue=queue) + ) + + def process_form_qbxl(): + ncenters = geo_data.ncenters + + result["ncenters"] = ncenters + + # center -> nsources + np2qbxl_list1 = np.zeros(len(global_qbx_centers), dtype=np.intp) + np2qbxl_list3 = np.zeros(len(global_qbx_centers), dtype=np.intp) + np2qbxl_list4 = np.zeros(len(global_qbx_centers), dtype=np.intp) + + for itgt_center, tgt_icenter in enumerate(global_qbx_centers): + itgt_box = qbx_center_to_target_box[tgt_icenter] + + np2qbxl_list1_srcs = 0 + start, end = traversal.neighbor_source_boxes_starts[itgt_box:itgt_box+2] + for src_ibox in traversal.neighbor_source_boxes_lists[start:end]: + nsources = tree.box_source_counts_nonchild[src_ibox] + + np2qbxl_list1_srcs += nsources + + np2qbxl_list1[itgt_center] = np2qbxl_list1_srcs + + if merge_close_lists: + continue + + np2qbxl_list3_srcs = 0 + + # Could be None, if not using targets with extent. + if traversal.from_sep_close_smaller_starts is not None: + start, end = ( + traversal.from_sep_close_smaller_starts[itgt_box:itgt_box+2]) + for src_ibox in traversal.from_sep_close_smaller_lists[start:end]: + nsources = tree.box_source_counts_nonchild[src_ibox] + + np2qbxl_list3_srcs += nsources + + np2qbxl_list3[itgt_center] = np2qbxl_list3_srcs + + np2qbxl_list4_srcs = 0 + + # Could be None, if not using targets with extent. + if traversal.from_sep_close_bigger_starts is not None: + start, end = ( + traversal.from_sep_close_bigger_starts[itgt_box:itgt_box+2]) + for src_ibox in traversal.from_sep_close_bigger_lists[start:end]: + nsources = tree.box_source_counts_nonchild[src_ibox] + + np2qbxl_list4_srcs += nsources + + np2qbxl_list4[itgt_center] = np2qbxl_list4_srcs + + if merge_close_lists: + result["p2qbxl"] = summarize_parallel(np2qbxl_list1, ncoeffs_qbx) + else: + result["p2qbxl_neighbor"] = ( + summarize_parallel(np2qbxl_list1, ncoeffs_qbx)) + result["p2qbxl_sep_smaller"] = ( + summarize_parallel(np2qbxl_list3, ncoeffs_qbx)) + result["p2qbxl_sep_bigger"] = ( + summarize_parallel(np2qbxl_list4, ncoeffs_qbx)) + + process_form_qbxl() + + # }}} + + # {{{ translate from list 3 multipoles to qbx local expansions + + def process_m2qbxl(): + nm2qbxl = np.zeros( + (tree.nlevels, len(global_qbx_centers)), + dtype=np.intp) + + assert tree.nlevels == len(traversal.from_sep_smaller_by_level) + + for isrc_level, ssn in enumerate(traversal.from_sep_smaller_by_level): + + for itgt_center, tgt_icenter in enumerate(global_qbx_centers): + icontaining_tgt_box = qbx_center_to_target_box_source_level[ + isrc_level][tgt_icenter] + + if icontaining_tgt_box == -1: + continue + + start, stop = ( + ssn.starts[icontaining_tgt_box], + ssn.starts[icontaining_tgt_box+1]) + + nm2qbxl[isrc_level, itgt_center] += stop-start + + result["m2qbxl"] = summarize_parallel(nm2qbxl, xlat_cost(p_fmm, p_qbx)) + + process_m2qbxl() + + # }}} + + # {{{ translate from box local expansions to qbx local expansions + + result["l2qbxl"] = geo_data.ncenters * xlat_cost(p_fmm, p_qbx) + + # }}} + + # {{{ evaluate qbx local expansions + + def process_eval_qbxl(): + nqbx_eval = np.zeros(len(global_qbx_centers), dtype=np.intp) + + for isrc_center, src_icenter in enumerate(global_qbx_centers): + start, end = center_to_targets_starts[src_icenter:src_icenter+2] + nqbx_eval[isrc_center] += end-start + + result["qbxl2p"] = summarize_parallel(nqbx_eval, ncoeffs_qbx) + + process_eval_qbxl() + + # }}} + + return result + +# }}} + +# vim: foldmethod=marker diff --git a/test/test_layer_pot_identity.py b/test/test_layer_pot_identity.py index 942b6ed282ae1eb250c5d6bea645bc834dda84eb..e6ec4a9c88cf807c0effa5886f6e16608d1aaae6 100644 --- a/test/test_layer_pot_identity.py +++ b/test/test_layer_pot_identity.py @@ -233,12 +233,12 @@ class DynamicTestCase(object): if (self.geometry.mesh_name == "sphere" and self.k != 0 and self.fmm_backend == "sumpy"): - pytest.skip("both direct eval and generating the FMM kernels " + raise ValueError("both direct eval and generating the FMM kernels " "are too slow") if (self.geometry.mesh_name == "sphere" and self.expr.zero_op_name == "green_grad"): - pytest.skip("does not achieve sufficient precision") + raise ValueError("does not achieve sufficient precision") if self.fmm_backend == "fmmlib": pytest.importorskip("pyfmmlib") @@ -246,22 +246,28 @@ class DynamicTestCase(object): # {{{ integral identity tester + +@pytest.mark.slowtest +@pytest.mark.parametrize("case", [ + DynamicTestCase(SphereGeometry(), GreenExpr(), 0), +]) +def test_identity_convergence_slow(ctx_getter, case): + test_identity_convergence(case) + + @pytest.mark.parametrize("case", [ - tc - for geom in [ - StarfishGeometry(), - SphereGeometry(), - ] - for tc in [ - DynamicTestCase(geom, GreenExpr(), 0), - DynamicTestCase(geom, GreenExpr(), 1.2), - DynamicTestCase(geom, GradGreenExpr(), 0), - DynamicTestCase(geom, GradGreenExpr(), 1.2), - DynamicTestCase(geom, ZeroCalderonExpr(), 0), - - DynamicTestCase(geom, GreenExpr(), 0, fmm_backend="fmmlib"), - DynamicTestCase(geom, GreenExpr(), 1.2, fmm_backend="fmmlib"), - ]]) + # 2d + DynamicTestCase(StarfishGeometry(), GreenExpr(), 0), + DynamicTestCase(StarfishGeometry(), GreenExpr(), 1.2), + DynamicTestCase(StarfishGeometry(), GradGreenExpr(), 0), + DynamicTestCase(StarfishGeometry(), GradGreenExpr(), 1.2), + DynamicTestCase(StarfishGeometry(), ZeroCalderonExpr(), 0), + DynamicTestCase(StarfishGeometry(), GreenExpr(), 0, fmm_backend="fmmlib"), + DynamicTestCase(StarfishGeometry(), GreenExpr(), 1.2, fmm_backend="fmmlib"), + # 3d + DynamicTestCase(SphereGeometry(), GreenExpr(), 0, fmm_backend="fmmlib"), + DynamicTestCase(SphereGeometry(), GreenExpr(), 1.2, fmm_backend="fmmlib") +]) def test_identity_convergence(ctx_getter, case, visualize=False): logging.basicConfig(level=logging.INFO) diff --git a/test/test_performance_model.py b/test/test_performance_model.py new file mode 100644 index 0000000000000000000000000000000000000000..6658857e15a09cd82d3b42fdf7ecbcd30fc2c04d --- /dev/null +++ b/test/test_performance_model.py @@ -0,0 +1,131 @@ +from __future__ import division, print_function + +__copyright__ = "Copyright (C) 2018 Matt Wala" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import numpy as np +import numpy.linalg as la # noqa +import pyopencl as cl +import pyopencl.clmath # noqa +import pytest +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + +from pytential import bind, sym, norm # noqa + + +# {{{ global params + +TARGET_ORDER = 8 +OVSMP_FACTOR = 5 +TCF = 0.9 +QBX_ORDER = 5 +FMM_ORDER = 10 + +DEFAULT_LPOT_KWARGS = { + "_box_extent_norm": "l2", + "_from_sep_smaller_crit": "static_l2", + } + +# }}} + + +@pytest.mark.parametrize("dim", (2, 3)) +def test_performance_model(ctx_getter, dim): + cl_ctx = ctx_getter() + queue = cl.CommandQueue(cl_ctx) + + # {{{ get lpot source + + from meshmode.discretization import Discretization + from meshmode.discretization.poly_element import ( + InterpolatoryQuadratureSimplexGroupFactory) + + target_order = TARGET_ORDER + + if dim == 2: + from meshmode.mesh.generation import starfish, make_curve_mesh + mesh = make_curve_mesh(starfish, np.linspace(0, 1, 50), order=target_order) + elif dim == 3: + from meshmode.mesh.generation import generate_icosphere + mesh = generate_icosphere(r=1, order=target_order) + else: + raise ValueError("unknown dimension: %d" % dim) + + pre_density_discr = Discretization( + queue.context, mesh, + InterpolatoryQuadratureSimplexGroupFactory(target_order)) + + lpot_kwargs = DEFAULT_LPOT_KWARGS.copy() + lpot_kwargs.update( + _expansion_stick_out_factor=TCF, + fmm_order=FMM_ORDER, qbx_order=QBX_ORDER, + ) + + from pytential.qbx import QBXLayerPotentialSource + lpot_source = QBXLayerPotentialSource( + pre_density_discr, OVSMP_FACTOR*target_order, + **lpot_kwargs,) + + lpot_source, _ = lpot_source.with_refinement() + + # }}} + + # {{{ run performance model + + costs = {} + + def inspect_geo_data(insn, bound_expr, geo_data): + from pytential.qbx.performance import assemble_performance_data + costs["costs"] = assemble_performance_data( + geo_data, uses_pde_expansions=True, merge_close_lists=False) + return False + + lpot_source = lpot_source.copy(geometry_data_inspector=inspect_geo_data) + density_discr = lpot_source.density_discr + nodes = density_discr.nodes().with_queue(queue) + sigma = cl.clmath.sin(10 * nodes[0]) + + from sumpy.kernel import LaplaceKernel + sigma_sym = sym.var("sigma") + k_sym = LaplaceKernel(lpot_source.ambient_dim) + sym_op = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) + + bound_op = bind(lpot_source, sym_op) + bound_op(queue, sigma=sigma) + + # }}} + + +# You can test individual routines by typing +# $ python test_layer_pot.py 'test_routine()' + +if __name__ == "__main__": + import sys + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + + +# vim: foldmethod=marker