diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..149f0473317cd4ffbe6e1f35ed5d2a8f02105369 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,50 @@ +name: CI +on: + push: + branches: + - master + pull_request: + paths-ignore: + - 'doc/*.rst' + schedule: + - cron: '17 3 * * 0' + +jobs: + flake8: + name: Flake8 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - + uses: actions/setup-python@v1 + with: + python-version: '3.x' + - name: "Main Script" + run: | + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh + . ./prepare-and-run-flake8.sh sumpy test + + pytest3: + name: Conda Pytest Py3 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: "Main Script" + run: | + grep -v symengine .test-conda-env-py3.yml > .test-conda-env.yml + CONDA_ENVIRONMENT=.test-conda-env.yml + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh + . ./build-and-test-py-project-within-miniconda.sh + + pytest3symengine: + name: Conda Pytest Py3 Symengine + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: "Main Script" + run: | + CONDA_ENVIRONMENT=.test-conda-env-py3.yml + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh + . ./build-and-test-py-project-within-miniconda.sh + +# vim: sw=4 diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d74930eb8b76f1a0ee99701eb03a8e4c69ca1d7f..8173bd209ee80755e4fc0d3f47ad6b605b1fdcea 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -17,26 +17,10 @@ # reports: # junit: test/pytest.xml -Python 2.7 POCL: - script: - - export PY_EXE=python2.7 - - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="pybind11 numpy mako" - - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - - ". ./build-and-test-py-project.sh" - tags: - - python2.7 - - pocl - except: - - tags - artifacts: - reports: - junit: test/pytest.xml - Python 3 POCL: script: - export PY_EXE=python3 - - export PYOPENCL_TEST=portable + - export PYOPENCL_TEST=portable:pthread - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" @@ -71,11 +55,29 @@ Python 3.6 Conda: # Disable caching to ensure SymEngine code generation is exercised. - export SUMPY_NO_CACHE=1 - export SUMPY_FORCE_SYMBOLIC_BACKEND=symengine + - export PYOPENCL_TEST=portable:pthread - CONDA_ENVIRONMENT=.test-conda-env-py3.yml - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - ". ./build-and-test-py-project-within-miniconda.sh" tags: - - linux + - porter + except: + - tags + artifacts: + reports: + junit: test/pytest.xml + +Python 3.6 POCL Titan X: + script: + # Disable caching to ensure SymEngine code generation is exercised. + - export SUMPY_NO_CACHE=1 + - export SUMPY_FORCE_SYMBOLIC_BACKEND=symengine + - export PYOPENCL_TEST=portable:titan + - CONDA_ENVIRONMENT=.test-conda-env-py3.yml + - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh + - ". ./build-and-test-py-project-within-miniconda.sh" + tags: + - nvidia-titan-x except: - tags artifacts: @@ -106,7 +108,7 @@ Benchmarks: - sed 's/python=3/python=3.6/' .test-conda-env-py3.yml > .test-conda-env.yml - CONDA_ENVIRONMENT=.test-conda-env.yml - PROJECT=sumpy - - PYOPENCL_TEST=portable + - PYOPENCL_TEST=portable:pthread - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-benchmark-py-project.sh - ". ./build-and-benchmark-py-project.sh" tags: diff --git a/.test-conda-env-py3.yml b/.test-conda-env-py3.yml index 2264e926055b28bf7bde2c3ddaa54f38bed42221..f9ede8fcf96f730ed9d14ce03d0504b81ab7ee13 100644 --- a/.test-conda-env-py3.yml +++ b/.test-conda-env-py3.yml @@ -1,14 +1,14 @@ name: test-conda-env-py3 channels: -- inducer - conda-forge -- defaults +- nodefaults dependencies: - git -- conda-forge::numpy -- conda-forge::sympy +- numpy +- sympy - pocl +- pocl-cuda - islpy - pyopencl - python=3 diff --git a/README.rst b/README.rst index 884bef17cbaea4675daced30e117881e603fb7dd..63c0a5e2599bad6c7792ae07b80f3eff81913a24 100644 --- a/README.rst +++ b/README.rst @@ -4,9 +4,9 @@ sumpy: n-body kernels and translation operators .. image:: https://gitlab.tiker.net/inducer/sumpy/badges/master/pipeline.svg :alt: Gitlab Build Status :target: https://gitlab.tiker.net/inducer/sumpy/commits/master -.. image:: https://dev.azure.com/ak-spam/inducer/_apis/build/status/inducer.sumpy?branchName=master - :alt: Azure Build Status - :target: https://dev.azure.com/ak-spam/inducer/_build/latest?definitionId=17&branchName=master +.. image:: https://github.com/inducer/sumpy/workflows/CI/badge.svg?branch=master&event=push + :alt: Github Build Status + :target: https://github.com/inducer/sumpy/actions?query=branch%3Amaster+workflow%3ACI+event%3Apush .. image:: https://badge.fury.io/py/sumpy.png :alt: Python Package Index Release Page :target: https://pypi.org/project/sumpy/ diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index 9f1f92a26ed125d3614d9a46d3436711e0b2b942..0000000000000000000000000000000000000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,96 +0,0 @@ -jobs: -- - job: 'Python2' - pool: - vmImage: 'ubuntu-latest' - - steps: - - - script: | - set -e - sed 's/python=3/python=2.7/' .test-conda-env-py3.yml > .test-conda-env-py2-pre.yml - grep -v symengine .test-conda-env-py2-pre.yml > .test-conda-env-py2.yml - cat .test-conda-env-py2.yml - CONDA_ENVIRONMENT=.test-conda-env-py2.yml - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - . ./build-and-test-py-project-within-miniconda.sh - - displayName: 'Pytest Conda' - - - task: PublishTestResults@2 - inputs: - testResultsFormat: 'JUnit' - testResultsFiles: 'test/pytest.xml' - -- - job: 'Python3' - pool: - vmImage: 'ubuntu-latest' - - steps: - - - script: | - set -e - grep -v symengine .test-conda-env-py3.yml > .test-conda-env.yml - CONDA_ENVIRONMENT=.test-conda-env.yml - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - . ./build-and-test-py-project-within-miniconda.sh - - displayName: 'Pytest Conda' - - - task: PublishTestResults@2 - inputs: - testResultsFormat: 'JUnit' - testResultsFiles: 'test/pytest.xml' - -- - job: 'Python3Symengine' - pool: - vmImage: 'ubuntu-latest' - - steps: - - - script: | - set -e - CONDA_ENVIRONMENT=.test-conda-env-py3.yml - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - . ./build-and-test-py-project-within-miniconda.sh - - displayName: 'Pytest Conda' - - - - task: PublishTestResults@2 - inputs: - testResultsFormat: 'JUnit' - testResultsFiles: 'test/pytest.xml' - -- - job: 'Flake8' - pool: - vmImage: 'ubuntu-latest' - strategy: - matrix: - Python37: - python.version: '3.7' - - steps: - - - task: UsePythonVersion@0 - inputs: - versionSpec: '$(python.version)' - - - - script: | - set -e - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh - . ./prepare-and-run-flake8.sh sumpy test - - displayName: 'Flake8' - -schedules: -- - cron: "0 0 * * 0" - displayName: Weekly build - branches: - include: - - master diff --git a/benchmarks/bench_translations.py b/benchmarks/bench_translations.py index 621ac275d9eb7fcf05763cf5c62d3436dda15e6b..1ebc0d88fbb8be06136d5028441ad6c5076c13d2 100644 --- a/benchmarks/bench_translations.py +++ b/benchmarks/bench_translations.py @@ -65,9 +65,15 @@ class TranslationBenchmarkSuite: dvec = sym.make_sym_vector("d", knl.dim) src_rscale = sym.Symbol("src_rscale") tgt_rscale = sym.Symbol("tgt_rscale") - result = l_expn.translate_from(m_expn, src_coeff_exprs, src_rscale, - dvec, tgt_rscale) sac = SymbolicAssignmentCollection() + try: + result = l_expn.translate_from(m_expn, src_coeff_exprs, src_rscale, + dvec, tgt_rscale, sac) + except TypeError: + # Support older interface to make it possible to compare + # in CI run + result = l_expn.translate_from(m_expn, src_coeff_exprs, src_rscale, + dvec, tgt_rscale) for i, expr in enumerate(result): sac.assign_unique("coeff%d" % i, expr) sac.run_global_cse() @@ -77,7 +83,7 @@ class TranslationBenchmarkSuite: return sum([counter.rec(insn.expression)+1 for insn in insns]) track_m2l_op_count.unit = "ops" - track_m2l_op_count.timeout = 200.0 + track_m2l_op_count.timeout = 300.0 class LaplaceVolumeTaylorTranslation(TranslationBenchmarkSuite): diff --git a/sumpy/__init__.py b/sumpy/__init__.py index fd0fc5154e04882ef45a0d6f8e282362e6f39f27..d7eb1bf2cd6860ec0be0c228962a582bd6191b69 100644 --- a/sumpy/__init__.py +++ b/sumpy/__init__.py @@ -26,7 +26,8 @@ import os from sumpy.p2p import P2P, P2PFromCSR from sumpy.p2e import P2EFromSingleBox, P2EFromCSR from sumpy.e2p import E2PFromSingleBox, E2PFromCSR -from sumpy.e2e import E2EFromCSR, E2EFromChildren, E2EFromParent +from sumpy.e2e import (E2EFromCSR, E2EFromChildren, E2EFromParent, + E2EFromCSRTranslationInvariant, E2EFromCSRTranslationClassesPrecompute) from sumpy.version import VERSION_TEXT from pytools.persistent_dict import WriteOncePersistentDict @@ -34,7 +35,8 @@ __all__ = [ "P2P", "P2PFromCSR", "P2EFromSingleBox", "P2EFromCSR", "E2PFromSingleBox", "E2PFromCSR", - "E2EFromCSR", "E2EFromChildren", "E2EFromParent"] + "E2EFromCSR", "E2EFromChildren", "E2EFromParent", + "E2EFromCSRTranslationInvariant", "E2EFromCSRTranslationClassesPrecompute"] code_cache = WriteOncePersistentDict("sumpy-code-cache-v6-"+VERSION_TEXT) diff --git a/sumpy/cse.py b/sumpy/cse.py index 823ef54667ff6c1b8895eab3d59e9b3bd5316f7a..476eb147526b8950a1e4a3db5402a2e9e41b8a54 100644 --- a/sumpy/cse.py +++ b/sumpy/cse.py @@ -145,7 +145,6 @@ class FuncArgTracker(object): for func_i, func in enumerate(funcs): func_argset = set() - for func_arg in func.args: arg_number = self.get_or_add_value_number(func_arg) func_argset.add(arg_number) diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 12e80b5f5d79781ab7ff78bcad8954e3e6ef3271..c5451e272f2f3e094bf2b7826916991bf83c7420 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -111,7 +111,7 @@ class E2EBase(KernelCacheWrapper): for i, coeff_i in enumerate( self.tgt_expansion.translate_from( self.src_expansion, src_coeff_exprs, src_rscale, - dvec, tgt_rscale))] + dvec, tgt_rscale, sac))] sac.run_global_cse() @@ -256,6 +256,263 @@ class E2EFromCSR(E2EBase): # }}} +# {{{ + +class E2EFromCSRTranslationInvariant(E2EFromCSR): + """Implements translation from a "compressed sparse row"-like source box + list for translation invariant Kernels. + """ + default_name = "e2e_from_csr_translation_invariant" + + def get_translation_loopy_insns(self): + from sumpy.symbolic import make_sym_vector + dvec = make_sym_vector("d", self.dim) + + src_coeff_exprs = [sym.Symbol("src_coeff%d" % i) + for i in range(len(self.src_expansion))] + src_rscale = sym.Symbol("src_rscale") + + tgt_rscale = sym.Symbol("tgt_rscale") + + nprecomputed_exprs = \ + self.tgt_expansion.m2l_global_precompute_nexpr(self.src_expansion) + + precomputed_exprs = [sym.Symbol("precomputed_expr%d" % i) + for i in range(nprecomputed_exprs)] + + from sumpy.assignment_collection import SymbolicAssignmentCollection + sac = SymbolicAssignmentCollection() + tgt_coeff_names = [ + sac.assign_unique("coeff%d" % i, coeff_i) + for i, coeff_i in enumerate( + self.tgt_expansion.translate_from( + self.src_expansion, src_coeff_exprs, src_rscale, + dvec, tgt_rscale, sac, + precomputed_exprs=precomputed_exprs))] + + sac.run_global_cse() + + from sumpy.codegen import to_loopy_insns + return to_loopy_insns( + six.iteritems(sac.assignments), + vector_names=set(["d"]), + pymbolic_expr_maps=[self.tgt_expansion.get_code_transformer()], + retain_names=tgt_coeff_names, + complex_dtype=np.complex128 # FIXME + ) + + def get_kernel(self): + ncoeff_src = len(self.src_expansion) + ncoeff_tgt = len(self.tgt_expansion) + nprecomputed_exprs = \ + self.tgt_expansion.m2l_global_precompute_nexpr(self.src_expansion) + + # To clarify terminology: + # + # isrc_box -> The index in a list of (in this case, source) boxes + # src_ibox -> The (global) box number for the (in this case, source) box + # + # (same for itgt_box, tgt_ibox) + + from sumpy.tools import gather_loopy_arguments + loopy_knl = lp.make_kernel( + [ + "{[itgt_box]: 0<=itgt_box tgt_ibox = target_boxes[itgt_box] + + <> tgt_center[idim] = centers[idim, tgt_ibox] \ + + <> isrc_start = src_box_starts[itgt_box] + <> isrc_stop = src_box_starts[itgt_box+1] + + for isrc_box + <> src_ibox = src_box_lists[isrc_box] \ + {id=read_src_ibox} + + <> src_center[idim] = centers[idim, src_ibox] {dup=idim} + <> d[idim] = tgt_center[idim] - src_center[idim] \ + {dup=idim} + <> translation_class = \ + m2l_translation_classes_lists[isrc_box] + <> translation_class_rel = translation_class - \ + translation_classes_level_start + """] + [""" + <> precomputed_expr{idx} = \ + m2l_precomputed_exprs[translation_class_rel, {idx}] + """.format(idx=idx) for idx in range( + nprecomputed_exprs)] + [""" + <> src_coeff{coeffidx} = \ + src_expansions[src_ibox - src_base_ibox, {coeffidx}] \ + {{dep=read_src_ibox}} + """.format(coeffidx=i) for i in range(ncoeff_src)] + [ + + ] + self.get_translation_loopy_insns() + [""" + end + + """] + [""" + tgt_expansions[tgt_ibox - tgt_base_ibox, {coeffidx}] = \ + simul_reduce(sum, isrc_box, coeff{coeffidx}) \ + {{id_prefix=write_expn}} + """.format(coeffidx=i) for i in range(ncoeff_tgt)] + [""" + end + """], + [ + lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), + lp.ValueArg("src_rscale,tgt_rscale", None), + lp.GlobalArg("src_box_starts, src_box_lists", + None, shape=None, strides=(1,), offset=lp.auto), + lp.ValueArg("aligned_nboxes,tgt_base_ibox,src_base_ibox", + np.int32), + lp.ValueArg("nsrc_level_boxes,ntgt_level_boxes", + np.int32), + lp.ValueArg("translation_classes_level_start", + np.int32), + lp.GlobalArg("src_expansions", None, + shape=("nsrc_level_boxes", ncoeff_src), offset=lp.auto), + lp.GlobalArg("tgt_expansions", None, + shape=("ntgt_level_boxes", ncoeff_tgt), offset=lp.auto), + lp.ValueArg("ntranslation_classes, ntranslation_classes_lists", + np.int32), + lp.GlobalArg("m2l_translation_classes_lists", np.int32, + shape=("ntranslation_classes_lists"), strides=(1,), + offset=lp.auto), + lp.GlobalArg("m2l_precomputed_exprs", None, + shape=("ntranslation_classes", nprecomputed_exprs), + offset=lp.auto), + "..." + ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), + name=self.name, + assumptions="ntgt_boxes>=1", + silenced_warnings="write_race(write_expn*)", + default_offset=lp.auto, + fixed_parameters=dict(dim=self.dim, + nprecomputed_exprs=nprecomputed_exprs), + lang_version=MOST_RECENT_LANGUAGE_VERSION + ) + + for expn in [self.src_expansion, self.tgt_expansion]: + loopy_knl = expn.prepare_loopy_kernel(loopy_knl) + + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") + loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) + + return loopy_knl + + +class E2EFromCSRTranslationClassesPrecompute(E2EFromCSR): + """Implements precomputing the translation classes dependent + derivatives. + """ + default_name = "e2e_from_csr_translation_classes_precompute" + + def get_translation_loopy_insns(self): + from sumpy.symbolic import make_sym_vector + dvec = make_sym_vector("d", self.dim) + + src_rscale = sym.Symbol("src_rscale") + tgt_rscale = sym.Symbol("tgt_rscale") + + from sumpy.assignment_collection import SymbolicAssignmentCollection + sac = SymbolicAssignmentCollection() + tgt_coeff_names = [ + sac.assign_unique("precomputed_expr%d" % i, coeff_i) + for i, coeff_i in enumerate( + self.tgt_expansion.m2l_global_precompute_exprs( + self.src_expansion, src_rscale, + dvec, tgt_rscale, sac))] + + sac.run_global_cse() + + from sumpy.codegen import to_loopy_insns + return to_loopy_insns( + six.iteritems(sac.assignments), + vector_names=set(["d"]), + pymbolic_expr_maps=[self.tgt_expansion.get_code_transformer()], + retain_names=tgt_coeff_names, + complex_dtype=np.complex128 # FIXME + ) + + def get_kernel(self): + nprecomputed_exprs = \ + self.tgt_expansion.m2l_global_precompute_nexpr(self.src_expansion) + from sumpy.tools import gather_loopy_arguments + loopy_knl = lp.make_kernel( + [ + "{[itr_class]: 0<=itr_class d[idim] = m2l_translation_vectors[idim, \ + itr_class + translation_classes_level_start] {dup=idim} + + """] + self.get_translation_loopy_insns() + [""" + m2l_precomputed_exprs[itr_class, {idx}] = precomputed_expr{idx} + """.format(idx=i) for i in range(nprecomputed_exprs)] + [""" + end + """], + [ + lp.ValueArg("src_rscale", None), + lp.GlobalArg("m2l_precomputed_exprs", None, + shape=("ntranslation_classes", nprecomputed_exprs), + offset=lp.auto), + lp.GlobalArg("m2l_translation_vectors", None, + shape=("dim", "ntranslation_vectors")), + lp.ValueArg("ntranslation_classes", np.int32), + lp.ValueArg("ntranslation_vectors", np.int32), + lp.ValueArg("translation_classes_level_start", np.int32), + "..." + ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), + name=self.name, + assumptions="ntranslation_classes>=1", + default_offset=lp.auto, + fixed_parameters=dict(dim=self.dim), + lang_version=MOST_RECENT_LANGUAGE_VERSION + ) + + for expn in [self.src_expansion, self.tgt_expansion]: + loopy_knl = expn.prepare_loopy_kernel(loopy_knl) + + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") + loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) + + return loopy_knl + + def get_optimized_kernel(self): + # FIXME + knl = self.get_kernel() + knl = lp.split_iname(knl, "itr_class", 16, outer_tag="g.0") + + return knl + + def __call__(self, queue, **kwargs): + """ + :arg src_rscale: + :arg translation_classes_level_start: + :arg ntranslation_classes: + :arg m2l_precomputed_exprs: + :arg m2l_translation_vectors: + """ + knl = self.get_cached_optimized_kernel() + + m2l_translation_vectors = kwargs.pop("m2l_translation_vectors") + # "1" may be passed for rscale, which won't have its type + # meaningfully inferred. Make the type of rscale explicit. + src_rscale = m2l_translation_vectors.dtype.type(kwargs.pop("src_rscale")) + + return knl(queue, + src_rscale=src_rscale, + m2l_translation_vectors=m2l_translation_vectors, + **kwargs) + +# }}} + + # {{{ translation from a box's children class E2EFromChildren(E2EBase): diff --git a/sumpy/e2p.py b/sumpy/e2p.py index 7b0072ad55708ba205ceed3448914ad0d8eda281..41e6ea3e983984740d4bb3af0df7f7287c826991 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -91,13 +91,12 @@ class E2PBase(KernelCacheWrapper): coeff_exprs = [sym.Symbol("coeff%d" % i) for i in range(len(self.expansion.get_coefficient_identifiers()))] - value = self.expansion.evaluate(coeff_exprs, bvec, rscale) - result_names = [ - sac.assign_unique("result_%d_p" % i, - knl.postprocess_at_target(value, bvec)) - for i, knl in enumerate(self.kernels) - ] + result_names = [] + for i, knl in enumerate(self.kernels): + value = self.expansion.evaluate(coeff_exprs, bvec, rscale, sac, knl=knl) + name = sac.assign_unique("result_%d_p" % i, value) + result_names.append(name) sac.run_global_cse() diff --git a/sumpy/expansion/__init__.py b/sumpy/expansion/__init__.py index 9d3a917a9a16516bf6968a0446eccc0886554ed2..e01ec5e1872bba94e3a9b288a146491c2fe68b8e 100644 --- a/sumpy/expansion/__init__.py +++ b/sumpy/expansion/__init__.py @@ -57,6 +57,7 @@ class ExpansionBase(object): .. automethod:: translate_from .. automethod:: __eq__ .. automethod:: __ne__ + .. automethod:: get_kernel_derivative_taker """ def __init__(self, kernel, order, use_rscale=None): @@ -114,20 +115,25 @@ class ExpansionBase(object): """ raise NotImplementedError - def coefficients_from_source(self, avec, bvec, rscale): + def coefficients_from_source(self, avec, bvec, rscale, sac): """Form an expansion from a source point. :arg avec: vector from source to center. :arg bvec: vector from center to target. Not usually necessary, except for line-Taylor expansion. + :arg sac: A SymbolicAssignmentCollction used for storing + temporary expressions. :returns: a list of :mod:`sympy` expressions representing the coefficients of the expansion. """ raise NotImplementedError - def evaluate(self, coeffs, bvec, rscale): + def evaluate(self, coeffs, bvec, rscale, sac): """ + :arg sac: A SymbolicAssignmentCollction used for storing + temporary expressions. + :return: a :mod:`sympy` expression corresponding to the evaluated expansion with the coefficients in *coeffs*. @@ -136,7 +142,7 @@ class ExpansionBase(object): raise NotImplementedError def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, - dvec, tgt_rscale): + dvec, tgt_rscale, sac): raise NotImplementedError def update_persistent_hash(self, key_hash, key_builder): @@ -155,6 +161,14 @@ class ExpansionBase(object): def __ne__(self, other): return not self.__eq__(other) + def get_kernel_derivative_taker(self, dvec, rscale, sac): + """Return a MiDerivativeTaker instance that supports taking + derivatives of the kernel with respect to dvec + """ + from sumpy.tools import MiDerivativeTaker + return MiDerivativeTaker(self.kernel.get_expression(dvec), dvec, rscale, + sac) + # }}} @@ -635,6 +649,11 @@ class LaplaceConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): def __init__(self, kernel, order, use_rscale): self.expansion_terms_wrangler_key = (order, kernel.dim) + def get_kernel_derivative_taker(self, dvec, rscale, sac): + from sumpy.tools import LaplaceDerivativeTaker + return LaplaceDerivativeTaker(self.kernel.get_expression(dvec), dvec, + rscale, sac) + class HelmholtzConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): @@ -646,6 +665,11 @@ class HelmholtzConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): helmholtz_k_name = kernel.get_base_kernel().helmholtz_k_name self.expansion_terms_wrangler_key = (order, kernel.dim, helmholtz_k_name) + def get_kernel_derivative_taker(self, dvec, rscale, sac): + from sumpy.tools import HelmholtzYukawaDerivativeTaker + return HelmholtzYukawaDerivativeTaker(self.kernel.get_expression(dvec), dvec, + rscale, sac) + class BiharmonicConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): @@ -656,6 +680,11 @@ class BiharmonicConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): def __init__(self, kernel, order, use_rscale): self.expansion_terms_wrangler_key = (order, kernel.dim) + def get_kernel_derivative_taker(self, dvec, rscale, sac): + from sumpy.tools import RadialDerivativeTaker + return RadialDerivativeTaker(self.kernel.get_expression(dvec), dvec, + rscale, sac) + # }}} diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index c32778ae14c525c2a7acdffbc2850ead0a160691..7e646e8f160cb070e08c3b817d87742784dd2ad5 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -24,12 +24,16 @@ THE SOFTWARE. from six.moves import range, zip import sumpy.symbolic as sym +from pytools import single_valued from sumpy.expansion import ( ExpansionBase, VolumeTaylorExpansion, LaplaceConformingVolumeTaylorExpansion, HelmholtzConformingVolumeTaylorExpansion, BiharmonicConformingVolumeTaylorExpansion) +from sumpy.tools import (matvec_toeplitz_upper_triangular, + fft_toeplitz_upper_triangular, add_to_sac) + class LocalExpansionBase(ExpansionBase): pass @@ -58,7 +62,7 @@ class LineTaylorLocalExpansion(LocalExpansionBase): def get_coefficient_identifiers(self): return list(range(self.order+1)) - def coefficients_from_source(self, avec, bvec, rscale): + def coefficients_from_source(self, avec, bvec, rscale, sac): # no point in heeding rscale here--just ignore it if bvec is None: raise RuntimeError("cannot use line-Taylor expansions in a setting " @@ -99,7 +103,7 @@ class LineTaylorLocalExpansion(LocalExpansionBase): .subs("tau", 0) for i in self.get_coefficient_identifiers()] - def evaluate(self, coeffs, bvec, rscale): + def evaluate(self, coeffs, bvec, rscale, sac, knl=None): # no point in heeding rscale here--just ignore it from pytools import factorial return sym.Add(*( @@ -116,33 +120,138 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): Coefficients represent derivative values of the kernel. """ - def coefficients_from_source(self, avec, bvec, rscale): - from sumpy.tools import MiDerivativeTaker - ppkernel = self.kernel.postprocess_at_source( - self.kernel.get_expression(avec), avec) + def coefficients_from_source(self, avec, bvec, rscale, sac): + from sumpy.tools import MiDerivativeTakerWrapper + + result = [] + taker = self.get_kernel_derivative_taker(avec, rscale, sac) + expr_dict = {(0,)*self.dim: 1} + expr_dict = self.kernel.get_derivative_transformation_at_source(expr_dict) + pp_nderivatives = single_valued(sum(mi) for mi in expr_dict.keys()) + + for mi in self.get_coefficient_identifiers(): + wrapper = MiDerivativeTakerWrapper(taker, mi) + mi_expr = self.kernel.postprocess_at_source(wrapper, avec) + # By passing `rscale` to the derivative taker we are taking a scaled + # version of the derivative which is `expr.diff(mi)*rscale**sum(mi)` + # which might be implemented efficiently for kernels like Laplace. + # One caveat is that `postprocess_at_source` might take more + # derivatives which would multiply the expression by more `rscale`s + # than necessary as the derivative taker does not know about + # `postprocess_at_source`. This is corrected by dividing by `rscale`. + expr = mi_expr / rscale ** pp_nderivatives + result.append(expr) - taker = MiDerivativeTaker(ppkernel, avec) - return [ - taker.diff(mi) * rscale ** sum(mi) - for mi in self.get_coefficient_identifiers()] + return result - def evaluate(self, coeffs, bvec, rscale): + def evaluate(self, coeffs, bvec, rscale, sac, knl=None): evaluated_coeffs = ( self.expansion_terms_wrangler.get_full_kernel_derivatives_from_stored( coeffs, rscale)) - bvec = [b*rscale**-1 for b in bvec] + bvec_scaled = [b*rscale**-1 for b in bvec] from sumpy.tools import mi_power, mi_factorial - return sum( + result = sum( coeff - * mi_power(bvec, mi, evaluate=False) + * mi_power(bvec_scaled, mi, evaluate=False) / mi_factorial(mi) for coeff, mi in zip( evaluated_coeffs, self.get_full_coefficient_identifiers())) + if knl is None: + knl = self.kernel + return knl.postprocess_at_target(result, bvec) + + def m2l_global_precompute_nexpr(self, src_expansion): + return len(self.m2l_global_precompute_mis(src_expansion)[1]) + + def m2l_global_precompute_mis(self, src_expansion): + from pytools import generate_nonnegative_integer_tuples_below as gnitb + from sumpy.tools import add_mi + + max_mi = [0]*self.dim + for i in range(self.dim): + max_mi[i] = max(mi[i] for mi in + src_expansion.get_coefficient_identifiers()) + max_mi[i] += max(mi[i] for mi in + self.get_coefficient_identifiers()) + + toeplitz_matrix_coeffs = list(gnitb([m + 1 for m in max_mi])) + + needed_vector_terms = [] + # For eg: 2D full Taylor Laplace, we only need kernel derivatives + # (n1+n2, m1+m2), n1+m1<=p, n2+m2<=p + for tgt_deriv in self.get_coefficient_identifiers(): + for src_deriv in src_expansion.get_coefficient_identifiers(): + needed = add_mi(src_deriv, tgt_deriv) + if needed not in needed_vector_terms: + needed_vector_terms.append(needed) + + return toeplitz_matrix_coeffs, needed_vector_terms, max_mi + + def m2l_global_precompute_exprs(self, src_expansion, src_rscale, + dvec, tgt_rscale, sac, use_fft=False): + # We know the general form of the multipole expansion is: + # + # coeff0 * diff(kernel(src - c1), mi0) + + # coeff1 * diff(kernel(src - c1, mi1) + ... + # + # To get the local expansion coefficients, we take derivatives of + # the multipole expansion. For eg: the coefficient w.r.t mir is + # + # coeff0 * diff(kernel(c2 - c1), mi0 + mir) + + # coeff1 * diff(kernel(c2 - c1, mi1 + mir) + ... + # + # The derivatives above depends only on `c2 - c1` and can be precomputed + # globally as there are only a finite number of values for `c2 - c1` for + # m2l. + + if not self.use_rscale: + src_rscale = 1 + + from sumpy.tools import add_mi + + toeplitz_matrix_coeffs, needed_vector_terms, max_mi = \ + self.m2l_global_precompute_mis(src_expansion) + + # Create a expansion terms wrangler for derivatives up to order + # (tgt order)+(src order) including a corresponding reduction matrix + # For eg: 2D full Taylor Laplace, this is (n, m), + # n+m<=2*p, n<=2*p, m<=2*p + srcplusderiv_terms_wrangler = \ + src_expansion.expansion_terms_wrangler.copy( + order=self.order + src_expansion.order, max_mi=tuple(max_mi)) + srcplusderiv_full_coeff_ids = \ + srcplusderiv_terms_wrangler.get_full_coefficient_identifiers() + srcplusderiv_ident_to_index = dict((ident, i) for i, ident in + enumerate(srcplusderiv_full_coeff_ids)) + + # The vector has the kernel derivatives and depends only on the distance + # between the two centers + taker = src_expansion.get_kernel_derivative_taker(dvec, src_rscale, sac) + vector_stored = [] + # Calculate the kernel derivatives for the compressed set + for term in \ + srcplusderiv_terms_wrangler.get_coefficient_identifiers(): + kernel_deriv = taker.diff(term) + vector_stored.append(kernel_deriv) + # Calculate the kernel derivatives for the full set + vector_full = \ + srcplusderiv_terms_wrangler.get_full_kernel_derivatives_from_stored( + vector_stored, src_rscale) + + for term in srcplusderiv_full_coeff_ids: + assert term in needed_vector_terms + + vector = [0]*len(needed_vector_terms) + for i, term in enumerate(needed_vector_terms): + vector[i] = add_to_sac(sac, + vector_full[srcplusderiv_ident_to_index[term]]) + return vector + def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, - dvec, tgt_rscale): + dvec, tgt_rscale, sac, precomputed_exprs=None, use_fft=False): logger.info("building translation operator: %s(%d) -> %s(%d): start" % (type(src_expansion).__name__, src_expansion.order, @@ -154,80 +263,51 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): tgt_rscale = 1 from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansionBase - if isinstance(src_expansion, VolumeTaylorMultipoleExpansionBase): - # We know the general form of the multipole expansion is: - # - # coeff0 * diff(kernel, mi0) + coeff1 * diff(kernel, mi1) + ... - # - # To get the local expansion coefficients, we take derivatives of - # the multipole expansion. - # - # This code speeds up derivative taking by caching all kernel - # derivatives. - - taker = src_expansion.get_kernel_derivative_taker(dvec) - - from sumpy.tools import add_mi - - max_mi = [0]*self.dim - for i in range(self.dim): - max_mi[i] = max(mi[i] for mi in - src_expansion.get_coefficient_identifiers()) - max_mi[i] += max(mi[i] for mi in - self.get_coefficient_identifiers()) - - # Create a expansion terms wrangler for derivatives up to order - # (tgt order)+(src order) including a corresponding reduction matrix - tgtplusderiv_exp_terms_wrangler = \ - src_expansion.expansion_terms_wrangler.copy( - order=self.order + src_expansion.order, max_mi=tuple(max_mi)) - tgtplusderiv_coeff_ids = \ - tgtplusderiv_exp_terms_wrangler.get_coefficient_identifiers() - tgtplusderiv_full_coeff_ids = \ - tgtplusderiv_exp_terms_wrangler.get_full_coefficient_identifiers() - - tgtplusderiv_ident_to_index = dict((ident, i) for i, ident in - enumerate(tgtplusderiv_full_coeff_ids)) + if isinstance(src_expansion, VolumeTaylorMultipoleExpansionBase): + toeplitz_matrix_coeffs, needed_vector_terms, max_mi = \ + self.m2l_global_precompute_mis(src_expansion) + toeplitz_matrix_ident_to_index = dict((ident, i) for i, ident in + enumerate(toeplitz_matrix_coeffs)) + + if precomputed_exprs is None: + derivatives = self.m2l_global_precompute_exprs(src_expansion, + src_rscale, dvec, tgt_rscale, sac, use_fft=use_fft) + else: + derivatives = precomputed_exprs + + derivatives_full = [0]*len(toeplitz_matrix_coeffs) + for expr, mi in zip(derivatives, needed_vector_terms): + derivatives_full[toeplitz_matrix_ident_to_index[mi]] = expr + + # Calculate the first row of the upper triangular Toeplitz matrix + toeplitz_first_row = [0] * len(toeplitz_matrix_coeffs) + for coeff, term in zip( + src_coeff_exprs, + src_expansion.get_coefficient_identifiers()): + toeplitz_first_row[toeplitz_matrix_ident_to_index[term]] = \ + add_to_sac(sac, coeff) + + # Do the matvec + if use_fft: + output = fft_toeplitz_upper_triangular(toeplitz_first_row, + derivatives_full, sac=sac) + else: + output = matvec_toeplitz_upper_triangular(toeplitz_first_row, + derivatives_full) + + # Filter out the dummy rows and scale them for target result = [] - for lexp_mi in self.get_coefficient_identifiers(): - lexp_mi_terms = [] - - # Embed the source coefficients in the coefficient array - # for the (tgt order)+(src order) wrangler, offset by lexp_mi. - embedded_coeffs = [0] * len(tgtplusderiv_full_coeff_ids) - for coeff, term in zip( - src_coeff_exprs, - src_expansion.get_coefficient_identifiers()): - embedded_coeffs[ - tgtplusderiv_ident_to_index[add_mi(lexp_mi, term)]] \ - = coeff - - # Compress the embedded coefficient set - stored_coeffs = tgtplusderiv_exp_terms_wrangler \ - .get_stored_mpole_coefficients_from_full( - embedded_coeffs, src_rscale) - - # Sum the embedded coefficient set - for i, coeff in enumerate(stored_coeffs): - if coeff == 0: - continue - nderivatives_for_scaling = \ - sum(tgtplusderiv_coeff_ids[i])-sum(lexp_mi) - kernel_deriv = ( - src_expansion.get_scaled_multipole( - taker.diff(tgtplusderiv_coeff_ids[i]), - dvec, src_rscale, - nderivatives=sum(tgtplusderiv_coeff_ids[i]), - nderivatives_for_scaling=nderivatives_for_scaling)) - - lexp_mi_terms.append( - coeff * kernel_deriv * tgt_rscale**sum(lexp_mi)) - result.append(sym.Add(*lexp_mi_terms)) + rscale_ratio = add_to_sac(sac, tgt_rscale/src_rscale) + for term in self.get_coefficient_identifiers(): + index = toeplitz_matrix_ident_to_index[term] + result.append(output[index]*rscale_ratio**sum(term)) + logger.info("building translation operator: done") return result - rscale_ratio = sym.UnevaluatedExpr(tgt_rscale/src_rscale) + rscale_ratio = tgt_rscale/src_rscale + rscale_ratio = add_to_sac(sac, rscale_ratio) from sumpy.tools import MiDerivativeTaker from math import factorial @@ -295,7 +375,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # the end in the hope of helping rscale magnitude. dvec_scaled = [d*src_rscale for d in dvec] expr = src_expansion.evaluate(src_coeff_exprs, dvec_scaled, - rscale=src_rscale) + rscale=src_rscale, sac=sac) replace_dict = dict((d, d/src_rscale) for d in dvec) taker = MiDerivativeTaker(expr, dvec) rscale_ratio = sym.UnevaluatedExpr(tgt_rscale/src_rscale) @@ -357,7 +437,7 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): def get_coefficient_identifiers(self): return list(range(-self.order, self.order+1)) - def coefficients_from_source(self, avec, bvec, rscale): + def coefficients_from_source(self, avec, bvec, rscale, sac): if not self.use_rscale: rscale = 1 @@ -370,14 +450,16 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): source_angle_rel_center = sym.atan2(-avec[1], -avec[0]) avec_len = sym_real_norm_2(avec) return [self.kernel.postprocess_at_source( - hankel_1(l, arg_scale * avec_len) - * rscale ** abs(l) - * sym.exp(sym.I * l * source_angle_rel_center), avec) - for l in self.get_coefficient_identifiers()] + hankel_1(c, arg_scale * avec_len) + * rscale ** abs(c) + * sym.exp(sym.I * c * source_angle_rel_center), avec) + for c in self.get_coefficient_identifiers()] - def evaluate(self, coeffs, bvec, rscale): + def evaluate(self, coeffs, bvec, rscale, sac, knl=None): if not self.use_rscale: rscale = 1 + if knl is None: + knl = self.kernel from sumpy.symbolic import sym_real_norm_2 bessel_j = sym.Function("bessel_j") @@ -386,15 +468,15 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): arg_scale = self.get_bessel_arg_scaling() - return sum(coeffs[self.get_storage_index(l)] - * self.kernel.postprocess_at_target( - bessel_j(l, arg_scale * bvec_len) - / rscale ** abs(l) - * sym.exp(sym.I * l * -target_angle_rel_center), bvec) - for l in self.get_coefficient_identifiers()) + return sum(coeffs[self.get_storage_index(c)] + * knl.postprocess_at_target( + bessel_j(c, arg_scale * bvec_len) + / rscale ** abs(c) + * sym.exp(sym.I * c * -target_angle_rel_center), bvec) + for c in self.get_coefficient_identifiers()) def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, - dvec, tgt_rscale): + dvec, tgt_rscale, sac): from sumpy.symbolic import sym_real_norm_2 if not self.use_rscale: @@ -408,13 +490,13 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): bessel_j = sym.Function("bessel_j") new_center_angle_rel_old_center = sym.atan2(dvec[1], dvec[0]) translated_coeffs = [] - for l in self.get_coefficient_identifiers(): + for j in self.get_coefficient_identifiers(): translated_coeffs.append( sum(src_coeff_exprs[src_expansion.get_storage_index(m)] - * bessel_j(m - l, arg_scale * dvec_len) + * bessel_j(m - j, arg_scale * dvec_len) / src_rscale ** abs(m) - * tgt_rscale ** abs(l) - * sym.exp(sym.I * (m - l) * -new_center_angle_rel_old_center) + * tgt_rscale ** abs(j) + * sym.exp(sym.I * (m - j) * -new_center_angle_rel_old_center) for m in src_expansion.get_coefficient_identifiers())) return translated_coeffs @@ -423,14 +505,14 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): hankel_1 = sym.Function("hankel_1") new_center_angle_rel_old_center = sym.atan2(dvec[1], dvec[0]) translated_coeffs = [] - for l in self.get_coefficient_identifiers(): + for j in self.get_coefficient_identifiers(): translated_coeffs.append( sum( - (-1) ** l - * hankel_1(m + l, arg_scale * dvec_len) + (-1) ** j + * hankel_1(m + j, arg_scale * dvec_len) * src_rscale ** abs(m) - * tgt_rscale ** abs(l) - * sym.exp(sym.I * (m + l) * new_center_angle_rel_old_center) + * tgt_rscale ** abs(j) + * sym.exp(sym.I * (m + j) * new_center_angle_rel_old_center) * src_coeff_exprs[src_expansion.get_storage_index(m)] for m in src_expansion.get_coefficient_identifiers())) return translated_coeffs diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py index b7e2769ba21b470cb163515da6e70c55eb360d2b..1adda8211c28f94a3cbe8aa7e9de7798d4de5938 100644 --- a/sumpy/expansion/multipole.py +++ b/sumpy/expansion/multipole.py @@ -25,12 +25,10 @@ THE SOFTWARE. from six.moves import range, zip import sumpy.symbolic as sym # noqa -from sumpy.symbolic import vector_xreplace from sumpy.expansion import ( ExpansionBase, VolumeTaylorExpansion, LaplaceConformingVolumeTaylorExpansion, HelmholtzConformingVolumeTaylorExpansion, BiharmonicConformingVolumeTaylorExpansion) -from pytools import cartesian_product import logging logger = logging.getLogger(__name__) @@ -56,7 +54,7 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): Coefficients represent the terms in front of the kernel derivatives. """ - def coefficients_from_source(self, avec, bvec, rscale): + def coefficients_from_source(self, avec, bvec, rscale, sac): from sumpy.kernel import DirectionalSourceDerivative kernel = self.kernel @@ -65,87 +63,50 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): if not self.use_rscale: rscale = 1 + coeff_identifiers = self.get_full_coefficient_identifiers() if isinstance(kernel, DirectionalSourceDerivative): - from sumpy.symbolic import make_sym_vector - - dir_vecs = [] - tmp_kernel = kernel - while isinstance(tmp_kernel, DirectionalSourceDerivative): - dir_vecs.append(make_sym_vector(tmp_kernel.dir_vec_name, kernel.dim)) - tmp_kernel = tmp_kernel.inner_kernel - - if kernel.get_base_kernel() is not tmp_kernel: - raise NotImplementedError("Unknown kernel wrapper.") - - nderivs = len(dir_vecs) - - coeff_identifiers = self.get_full_coefficient_identifiers() result = [0] * len(coeff_identifiers) - - for i, mi in enumerate(coeff_identifiers): - # One source derivative is the dot product of the gradient and - # directional vector. - # For multiple derivatives, gradient of gradients is taken. - # For eg: in 3D, 2 source derivatives gives 9 terms and - # cartesian_product below enumerates these 9 terms. - for deriv_terms in cartesian_product(*[range(kernel.dim)]*nderivs): - prod = 1 - derivative_mi = list(mi) - for nderivative, deriv_dim in enumerate(deriv_terms): - prod *= -derivative_mi[deriv_dim] - prod *= dir_vecs[nderivative][deriv_dim] - derivative_mi[deriv_dim] -= 1 - if any(v < 0 for v in derivative_mi): - continue - result[i] += mi_power(avec, derivative_mi) * prod - for i, mi in enumerate(coeff_identifiers): + result[i] = self.kernel.postprocess_at_source( + mi_power(avec, mi), avec) result[i] /= (mi_factorial(mi) * rscale ** sum(mi)) else: avec = [sym.UnevaluatedExpr(a * rscale**-1) for a in avec] - result = [ mi_power(avec, mi) / mi_factorial(mi) - for mi in self.get_full_coefficient_identifiers()] + for mi in coeff_identifiers] return ( self.expansion_terms_wrangler.get_stored_mpole_coefficients_from_full( result, rscale)) - def get_scaled_multipole(self, expr, bvec, rscale, nderivatives, - nderivatives_for_scaling=None): - if nderivatives_for_scaling is None: - nderivatives_for_scaling = nderivatives - - if self.kernel.has_efficient_scale_adjustment: - return ( - self.kernel.adjust_for_kernel_scaling( - vector_xreplace( - expr, - bvec, bvec * rscale**-1), - rscale, nderivatives) - / rscale ** (nderivatives - nderivatives_for_scaling)) - else: - return (rscale**nderivatives_for_scaling * expr) - - def evaluate(self, coeffs, bvec, rscale): + def evaluate(self, coeffs, bvec, rscale, sac, knl=None): + from sumpy.tools import MiDerivativeTakerWrapper + from pytools import single_valued if not self.use_rscale: rscale = 1 - - taker = self.get_kernel_derivative_taker(bvec) - - result = sym.Add(*tuple( - coeff - * self.get_scaled_multipole(taker.diff(mi), bvec, rscale, sum(mi)) - for coeff, mi in zip(coeffs, self.get_coefficient_identifiers()))) - + if knl is None: + knl = self.kernel + + taker = self.get_kernel_derivative_taker(bvec, rscale, sac) + expr_dict = {(0,)*self.dim: 1} + expr_dict = knl.get_derivative_transformation_at_target(expr_dict) + pp_nderivatives = single_valued(sum(mi) for mi in expr_dict.keys()) + + result = [] + for coeff, mi in zip(coeffs, self.get_coefficient_identifiers()): + wrapper = MiDerivativeTakerWrapper(taker, mi) + mi_expr = knl.postprocess_at_target(wrapper, bvec) + # For details about this correction, see the explanation at + # VolumeTaylorLocalExpansionBase.coefficients_from_source + expr = coeff * mi_expr / rscale**pp_nderivatives + result.append(expr) + + result = sym.Add(*tuple(result)) + #return knl.postprocess_at_target(result, bvec) return result - def get_kernel_derivative_taker(self, bvec): - from sumpy.tools import MiDerivativeTaker - return MiDerivativeTaker(self.kernel.get_expression(bvec), bvec) - def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, - dvec, tgt_rscale): + dvec, tgt_rscale, sac): if not isinstance(src_expansion, type(self)): raise RuntimeError("do not know how to translate %s to " "Taylor multipole expansion" @@ -347,7 +308,7 @@ class _HankelBased2DMultipoleExpansion(MultipoleExpansionBase): def get_coefficient_identifiers(self): return list(range(-self.order, self.order+1)) - def coefficients_from_source(self, avec, bvec, rscale): + def coefficients_from_source(self, avec, bvec, rscale, sac): if not self.use_rscale: rscale = 1 @@ -361,15 +322,17 @@ class _HankelBased2DMultipoleExpansion(MultipoleExpansionBase): source_angle_rel_center = sym.atan2(-avec[1], -avec[0]) return [ self.kernel.postprocess_at_source( - bessel_j(l, arg_scale * avec_len) - / rscale ** abs(l) - * sym.exp(sym.I * l * -source_angle_rel_center), + bessel_j(c, arg_scale * avec_len) + / rscale ** abs(c) + * sym.exp(sym.I * c * -source_angle_rel_center), avec) - for l in self.get_coefficient_identifiers()] + for c in self.get_coefficient_identifiers()] - def evaluate(self, coeffs, bvec, rscale): + def evaluate(self, coeffs, bvec, rscale, sac, knl=None): if not self.use_rscale: rscale = 1 + if knl is None: + knl = self.kernel from sumpy.symbolic import sym_real_norm_2 hankel_1 = sym.Function("hankel_1") @@ -378,15 +341,15 @@ class _HankelBased2DMultipoleExpansion(MultipoleExpansionBase): arg_scale = self.get_bessel_arg_scaling() - return sum(coeffs[self.get_storage_index(l)] - * self.kernel.postprocess_at_target( - hankel_1(l, arg_scale * bvec_len) - * rscale ** abs(l) - * sym.exp(sym.I * l * target_angle_rel_center), bvec) - for l in self.get_coefficient_identifiers()) + return sum(coeffs[self.get_storage_index(c)] + * knl.postprocess_at_target( + hankel_1(c, arg_scale * bvec_len) + * rscale ** abs(c) + * sym.exp(sym.I * c * target_angle_rel_center), bvec) + for c in self.get_coefficient_identifiers()) def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, - dvec, tgt_rscale): + dvec, tgt_rscale, sac): if not isinstance(src_expansion, type(self)): raise RuntimeError("do not know how to translate %s to %s" % (type(src_expansion).__name__, @@ -404,13 +367,13 @@ class _HankelBased2DMultipoleExpansion(MultipoleExpansionBase): arg_scale = self.get_bessel_arg_scaling() translated_coeffs = [] - for l in self.get_coefficient_identifiers(): + for j in self.get_coefficient_identifiers(): translated_coeffs.append( sum(src_coeff_exprs[src_expansion.get_storage_index(m)] - * bessel_j(m - l, arg_scale * dvec_len) + * bessel_j(m - j, arg_scale * dvec_len) * src_rscale ** abs(m) - / tgt_rscale ** abs(l) - * sym.exp(sym.I * (m - l) * new_center_angle_rel_old_center) + / tgt_rscale ** abs(j) + * sym.exp(sym.I * (m - j) * new_center_angle_rel_old_center) for m in src_expansion.get_coefficient_identifiers())) return translated_coeffs diff --git a/sumpy/fmm.py b/sumpy/fmm.py index a233175686f2c395fe7783235ddeb5e351b09ff0..289ab511c5e49a8953c90f36ff78cf560a1f41f2 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -40,7 +40,9 @@ from sumpy import ( P2EFromSingleBox, P2EFromCSR, E2PFromSingleBox, E2PFromCSR, P2PFromCSR, - E2EFromCSR, E2EFromChildren, E2EFromParent) + E2EFromCSR, + E2EFromCSRTranslationInvariant, E2EFromCSRTranslationClassesPrecompute, + E2EFromChildren, E2EFromParent) def level_to_rscale(tree, level): @@ -116,6 +118,18 @@ class SumpyExpansionWranglerCodeContainer(object): self.multipole_expansion(src_order), self.local_expansion(tgt_order)) + @memoize_method + def m2l_optimized(self, src_order, tgt_order): + return E2EFromCSRTranslationInvariant(self.cl_context, + self.multipole_expansion(src_order), + self.local_expansion(tgt_order)) + + @memoize_method + def m2l_optimized_precompute_kernel(self, src_order, tgt_order): + return E2EFromCSRTranslationClassesPrecompute(self.cl_context, + self.multipole_expansion(src_order), + self.local_expansion(tgt_order)) + @memoize_method def l2l(self, src_order, tgt_order): return E2EFromParent(self.cl_context, @@ -142,9 +156,11 @@ class SumpyExpansionWranglerCodeContainer(object): def get_wrangler(self, queue, tree, dtype, fmm_level_to_order, source_extra_kwargs={}, kernel_extra_kwargs=None, - self_extra_kwargs=None): + self_extra_kwargs=None, + translation_classes_data=None): return SumpyExpansionWrangler(self, queue, tree, dtype, fmm_level_to_order, - source_extra_kwargs, kernel_extra_kwargs, self_extra_kwargs) + source_extra_kwargs, kernel_extra_kwargs, self_extra_kwargs, + translation_classes_data=translation_classes_data) # }}} @@ -197,6 +213,56 @@ class SumpyTimingFuture(object): # }}} +# {{{ translation classes data + +class SumpyTranslationClassesData: + """A class for building and storing additional, optional data for + precomputation of translation classes passed to the expansion wrangler.""" + + def __init__(self, queue, trav, is_translation_per_level=True): + self.queue = queue + self.trav = trav + self.tree = trav.tree + self.is_translation_per_level = is_translation_per_level + + @property + @memoize_method + def translation_classes_builder(self): + from boxtree.translation_classes import TranslationClassesBuilder + return TranslationClassesBuilder(self.queue.context) + + @memoize_method + def build_translation_classes_lists(self): + trav = self.trav.to_device(self.queue) + tree = self.tree.to_device(self.queue) + return self.translation_classes_builder(self.queue, trav, tree, + is_translation_per_level=self.is_translation_per_level)[0] + + @memoize_method + def m2l_translation_classes_lists(self): + return (self + .build_translation_classes_lists() + .from_sep_siblings_translation_classes) + + @memoize_method + def m2l_translation_vectors(self): + return (self + .build_translation_classes_lists() + .from_sep_siblings_translation_class_to_distance_vector) + + @memoize_method + def m2l_translation_classes_level_starts(self): + return (self + .build_translation_classes_lists() + .from_sep_siblings_translation_classes_level_starts) + + +class SumpyTranslationClassesDataNotSuppliedWarning(UserWarning): + pass + +# }}} + + # {{{ expansion wrangler class SumpyExpansionWrangler(object): @@ -223,7 +289,8 @@ class SumpyExpansionWrangler(object): def __init__(self, code_container, queue, tree, dtype, fmm_level_to_order, source_extra_kwargs, kernel_extra_kwargs=None, - self_extra_kwargs=None): + self_extra_kwargs=None, + translation_classes_data=None): self.code = code_container self.queue = queue self.tree = tree @@ -253,14 +320,30 @@ class SumpyExpansionWrangler(object): self.extra_kwargs = source_extra_kwargs.copy() self.extra_kwargs.update(self.kernel_extra_kwargs) + if base_kernel.is_translation_invariant: + if translation_classes_data is None: + from warnings import warn + warn( + "List 2 (multipole-to-local) translations will be " + "unoptimized. Supply a translation_classes_data argument to " + "the wrangler for optimized List 2.", + SumpyTranslationClassesDataNotSuppliedWarning, + + stacklevel=2) + self.supports_optimized_m2l = False + else: + self.supports_optimized_m2l = True + else: + self.supports_optimized_m2l = False + + self.translation_classes_data = translation_classes_data + # {{{ data vector utilities - def _expansions_level_starts(self, order_to_size): + def _expansions_level_starts(self, order_to_size, level_starts): result = [0] for lev in range(self.tree.nlevels): - lev_nboxes = ( - self.tree.level_start_box_nrs[lev+1] - - self.tree.level_start_box_nrs[lev]) + lev_nboxes = level_starts[lev+1] - level_starts[lev] expn_size = order_to_size(self.level_orders[lev]) result.append( @@ -272,12 +355,29 @@ class SumpyExpansionWrangler(object): @memoize_method def multipole_expansions_level_starts(self): return self._expansions_level_starts( - lambda order: len(self.code.multipole_expansion_factory(order))) + lambda order: len(self.code.multipole_expansion(order)), + level_starts=self.tree.level_start_box_nrs) @memoize_method def local_expansions_level_starts(self): return self._expansions_level_starts( - lambda order: len(self.code.local_expansion_factory(order))) + lambda order: len(self.code.local_expansion_factory(order)), + level_starts=self.tree.level_start_box_nrs) + + @memoize_method + def m2l_translation_class_level_start_box_nrs(self): + data = self.translation_classes_data + return data.m2l_translation_classes_level_starts().get(self.queue) + + @memoize_method + def m2l_precomputed_exprs_level_starts(self): + def order_to_size(order): + mpole_expn = self.code.multipole_expansion_factory(order) + local_expn = self.code.local_expansion_factory(order) + return local_expn.m2l_global_precompute_nexpr(mpole_expn) + + return self._expansions_level_starts(order_to_size, + level_starts=self.m2l_translation_class_level_start_box_nrs()) def multipole_expansion_zeros(self): return cl.array.zeros( @@ -291,6 +391,12 @@ class SumpyExpansionWrangler(object): self.local_expansions_level_starts()[-1], dtype=self.dtype) + def m2l_precomputed_exprs_zeros(self): + return cl.array.zeros( + self.queue, + self.m2l_precomputed_exprs_level_starts()[-1], + dtype=self.dtype) + def multipole_expansions_view(self, mpole_exps, level): expn_start, expn_stop = \ self.multipole_expansions_level_starts()[level:level+2] @@ -307,6 +413,16 @@ class SumpyExpansionWrangler(object): return (box_start, local_exps[expn_start:expn_stop].reshape(box_stop-box_start, -1)) + def m2l_precomputed_exprs_view(self, m2l_precomputed_exprs, level): + expn_start, expn_stop = \ + self.m2l_precomputed_exprs_level_starts()[level:level+2] + translation_class_start, translation_class_stop = \ + self.m2l_translation_class_level_start_box_nrs()[level:level+2] + + exprs_level = m2l_precomputed_exprs[expn_start:expn_stop] + return (translation_class_start, exprs_level.reshape( + translation_class_stop - translation_class_start, -1)) + def output_zeros(self): from pytools.obj_array import make_obj_array return make_obj_array([ @@ -470,6 +586,69 @@ class SumpyExpansionWrangler(object): return (pot, SumpyTimingFuture(self.queue, events)) + @memoize_method + def _multipole_to_local_precompute(self, src_rscale): + m2l_precomputed_exprs = self.m2l_precomputed_exprs_zeros() + events = [] + for lev in range(self.tree.nlevels): + order = self.level_orders[lev] + precompute_kernel = \ + self.code.m2l_optimized_precompute_kernel(order, order) + + translation_classes_level_start, precomputed_exprs_view = \ + self.m2l_precomputed_exprs_view(m2l_precomputed_exprs, lev) + + ntranslation_classes = precomputed_exprs_view.shape[0] + + if ntranslation_classes == 0: + events.append([]) + continue + + m2l_translation_vectors = ( + self.translation_classes_data.m2l_translation_vectors()) + + evt, _ = precompute_kernel( + self.queue, + src_rscale=src_rscale, + translation_classes_level_start=translation_classes_level_start, + ntranslation_classes=ntranslation_classes, + m2l_precomputed_exprs=precomputed_exprs_view, + m2l_translation_vectors=m2l_translation_vectors, + ntranslation_vectors=m2l_translation_vectors.shape[1], + **self.kernel_extra_kwargs + ) + + events.append([evt]) + + return m2l_precomputed_exprs, events + + def multipole_to_local_precompute(self, info, lev): + """Adds to info dict, the information needed for a multipole-to-local + translation with precomputation. Returns true if the level `lev` + has translations to do. If the wrangler does not support an optimized + translation, returns true. + """ + if not self.supports_optimized_m2l: + return True + src_rscale = info["src_rscale"] + m2l_precomputed_exprs, events = \ + self._multipole_to_local_precompute(src_rscale) + translation_classes_level_start, precomputed_exprs_view = \ + self.m2l_precomputed_exprs_view(m2l_precomputed_exprs, lev) + info["m2l_precomputed_exprs"] = precomputed_exprs_view + info["translation_classes_level_start"] = translation_classes_level_start + info["m2l_translation_classes_lists"] = \ + self.translation_classes_data.m2l_translation_classes_lists() + info["wait_for"] = events[lev] + return precomputed_exprs_view.shape[0] != 0 + + @property + def m2l_class(self): + if self.supports_optimized_m2l: + return self.code.m2l_optimized + else: + return self.code.m2l + def multipole_to_local(self, level_start_target_box_nrs, target_boxes, src_box_starts, src_box_lists, @@ -477,23 +656,20 @@ class SumpyExpansionWrangler(object): local_exps = self.local_expansion_zeros() events = [] - for lev in range(self.tree.nlevels): start, stop = level_start_target_box_nrs[lev:lev+2] if start == stop: continue order = self.level_orders[lev] - m2l = self.code.m2l(order, order) + m2l = self.m2l_class(order, order) source_level_start_ibox, source_mpoles_view = \ self.multipole_expansions_view(mpole_exps, lev) target_level_start_ibox, target_local_exps_view = \ self.local_expansions_view(local_exps, lev) - evt, (local_exps_res,) = m2l( - self.queue, - + kwargs = dict( src_expansions=source_mpoles_view, src_base_ibox=source_level_start_ibox, tgt_expansions=target_local_exps_view, @@ -507,7 +683,11 @@ class SumpyExpansionWrangler(object): src_rscale=level_to_rscale(self.tree, lev), tgt_rscale=level_to_rscale(self.tree, lev), - **self.kernel_extra_kwargs) + **self.kernel_extra_kwargs + ) + if not self.multipole_to_local_precompute(kwargs, lev): + continue + evt, _ = m2l(self.queue, **kwargs) events.append(evt) return (local_exps, SumpyTimingFuture(self.queue, events)) @@ -523,6 +703,20 @@ class SumpyExpansionWrangler(object): wait_for = mpole_exps.events + for isrc_level, ssn in enumerate(source_boxes_by_level): + if len(target_boxes_by_source_level[isrc_level]) == 0: + continue + + m2p = self.code.m2p(self.level_orders[isrc_level]) + + source_level_start_ibox, source_mpoles_view = \ + self.multipole_expansions_view(mpole_exps, isrc_level) + kwargs.update(self.box_target_list_kwargs()) + + events = [] + + wait_for = mpole_exps.events + for isrc_level, ssn in enumerate(source_boxes_by_level): if len(target_boxes_by_source_level[isrc_level]) == 0: continue diff --git a/sumpy/kernel.py b/sumpy/kernel.py index 6383d8d57f4fc3e429333e686631c8bbcad1b6c6..d9428a9e29c28889db013d011c8fdd2aa956b96e 100644 --- a/sumpy/kernel.py +++ b/sumpy/kernel.py @@ -23,6 +23,7 @@ THE SOFTWARE. """ from six.moves import range, zip +from collections import defaultdict import loopy as lp import numpy as np @@ -122,8 +123,6 @@ class Kernel(object): .. automethod:: prepare_loopy_kernel .. automethod:: get_code_transformer .. automethod:: get_expression - .. attribute:: has_efficient_scale_adjustment - .. automethod:: adjust_for_kernel_scaling .. automethod:: postprocess_at_source .. automethod:: postprocess_at_target .. automethod:: get_global_scaling_const @@ -192,76 +191,19 @@ class Kernel(object): r"""Return a :mod:`sympy` expression for the kernel.""" raise NotImplementedError - has_efficient_scale_adjustment = False - - def adjust_for_kernel_scaling(self, expr, rscale, nderivatives): - r""" - Consider a Taylor multipole expansion: - - .. math:: - - f (x - y) = \sum_{i = 0}^{\infty} (\partial_y^i f) (x - y) \big|_{y = c} - \frac{(y - c)^i}{i!} . - - Now suppose we would like to use a scaled version :math:`g` of the - kernel :math:`f`: - - .. math:: - - \begin{eqnarray*} - f (x) & = & g (x / \alpha),\\ - f^{(i)} (x) & = & \frac{1}{\alpha^i} g^{(i)} (x / \alpha) . - \end{eqnarray*} - - where :math:`\alpha` is chosen to be on a length scale similar to - :math:`x` (for example by choosing :math:`\alpha` proporitional to the - size of the box for which the expansion is intended) so that :math:`x / - \alpha` is roughly of unit magnitude, to avoid arithmetic issues with - small arguments. This yields - - .. math:: - - f (x - y) = \sum_{i = 0}^{\infty} (\partial_y^i g) - \left( \frac{x - y}{\alpha} \right) \Bigg|_{y = c} - \cdot - \frac{(y - c)^i}{\alpha^i \cdot i!}. - - Observe that the :math:`(y - c)` term is now scaled to unit magnitude, - as is the argument of :math:`g`. - - With :math:`\xi = x / \alpha`, we find - - .. math:: - - \begin{eqnarray*} - g (\xi) & = & f (\alpha \xi),\\ - g^{(i)} (\xi) & = & \alpha^i f^{(i)} (\alpha \xi) . - \end{eqnarray*} - - Generically for all kernels, :math:`f^{(i)} (\alpha \xi)` is computable - by taking a sufficient number of symbolic derivatives of :math:`f` and - providing :math:`\alpha \xi = x` as the argument. - - Now, for some kernels, like :math:`f (x) = C \log x`, the powers of - :math:`\alpha^i` from the chain rule cancel with the ones from the - argument substituted into the kernel derivative: - - .. math:: - - g^{(i)} (\xi) = \alpha^i f^{(i)} (\alpha \xi) = C' \cdot \alpha^i \cdot - \frac{1}{(\alpha x)^i} \quad (i > 0), - - making them what you might call *scale-invariant*. In this case, one - may set :attr:`has_efficient_scale_adjustment`. For these kernels only, - :meth:`adjust_for_kernel_scaling` provides a shortcut for scaled kernel - derivative computation. Given :math:`f^{(i)}` as *expr*, it directly - returns an expression for :math:`g^{(i)}`, where :math:`i` is given - as *nderivatives*. - - :arg rscale: The scaling parameter :math:`\alpha` above. + def _diff(self, expr, vec, mi): + """Take the derivative of an expression or a MiDerivativeTakerWrapper """ - - raise NotImplementedError + from sumpy.tools import MiDerivativeTakerWrapper, add_mi + if isinstance(expr, MiDerivativeTakerWrapper): + taker, init_mi = expr + return taker.diff(add_mi(mi, init_mi)) + else: + for i in range(self.dim): + if mi[i] == 0: + continue + expr = expr.diff(vec[i], mi[i]) + return expr def postprocess_at_source(self, expr, avec): """Transform a kernel evaluation or expansion expression in a place @@ -271,7 +213,12 @@ class Kernel(object): The typical use of this function is to apply source-variable derivatives to the kernel. """ - return expr + expr_dict = {(0,)*self.dim: 1} + expr_dict = self.get_derivative_transformation_at_source(expr_dict) + result = 0 + for mi, coeff in expr_dict.items(): + result += coeff * self._diff(expr, avec, mi) + return result def postprocess_at_target(self, expr, bvec): """Transform a kernel evaluation or expansion expression in a place @@ -281,7 +228,36 @@ class Kernel(object): The typical use of this function is to apply target-variable derivatives to the kernel. """ - return expr + expr_dict = {(0,)*self.dim: 1} + expr_dict = self.get_derivative_transformation_at_target(expr_dict) + result = 0 + for mi, coeff in expr_dict.items(): + result += coeff * self._diff(expr, bvec, mi) + return result + + def get_derivative_transformation_at_source(self, expr_dict): + r"""Get the derivative transformation of the expression at source + represented by the dictionary expr_dict which is mapping from multi-index + `mi` to coefficient `coeff`. + Expression represented by the dictionary `expr_dict` is + :math:`\sum_{mi} \frac{\partial^mi}{x^mi}G * coeff`. Returns an + expression of the same type. + + This function is meant to be overridden by child classes where necessary. + """ + return expr_dict + + def get_derivative_transformation_at_target(self, expr_dict): + r"""Get the derivative transformation of the expression at target + represented by the dictionary expr_dict which is mapping from multi-index + `mi` to coefficient `coeff`. + Expression represented by the dictionary `expr_dict` is + :math:`\sum_{mi} \frac{\partial^mi}{x^mi}G * coeff`. Returns an + expression of the same type. + + This function is meant to be overridden by child classes where necessary. + """ + return expr_dict def get_global_scaling_const(self): r"""Return a global scaling constant of the kernel. @@ -380,6 +356,8 @@ class ExpressionKernel(Kernel): key_builder.rec(key_hash, value) mapper_method = "map_expression_kernel" + # TODO: Allow kernels that are not translation invariant + is_translation_invariant = True one_kernel_2d = ExpressionKernel( @@ -418,22 +396,6 @@ class LaplaceKernel(ExpressionKernel): global_scaling_const=scaling, is_complex_valued=False) - has_efficient_scale_adjustment = True - - def adjust_for_kernel_scaling(self, expr, rscale, nderivatives): - if self.dim == 2: - if nderivatives == 0: - import sumpy.symbolic as sp - return (expr + sp.log(rscale)) - else: - return expr - - elif self.dim == 3: - return expr / rscale - - else: - raise NotImplementedError("unsupported dimensionality") - def __getinitargs__(self): return (self.dim,) @@ -441,6 +403,7 @@ class LaplaceKernel(ExpressionKernel): return "LapKnl%dD" % self.dim mapper_method = "map_laplace_kernel" + is_translation_invariant = True class BiharmonicKernel(ExpressionKernel): @@ -477,6 +440,7 @@ class BiharmonicKernel(ExpressionKernel): return "BiharmKnl%dD" % self.dim mapper_method = "map_biharmonic_kernel" + is_translation_invariant = True class HelmholtzKernel(ExpressionKernel): @@ -547,6 +511,7 @@ class HelmholtzKernel(ExpressionKernel): )] mapper_method = "map_helmholtz_kernel" + is_translation_invariant = True class YukawaKernel(ExpressionKernel): @@ -559,14 +524,30 @@ class YukawaKernel(ExpressionKernel): """ lam = var(yukawa_lambda_name) - if dim == 2: - r = pymbolic_real_norm_2(make_sym_vector("d", dim)) + # NOTE: The Yukawa kernel is given by [1] + # -1/(2 pi)**(n/2) * (lam/r)**(n/2-1) * K(n/2-1, lam r) + # where K is a modified Bessel function of the second kind. + # + # [1] https://en.wikipedia.org/wiki/Green%27s_function + # [2] http://dlmf.nist.gov/10.27#E8 + # [3] https://dlmf.nist.gov/10.47#E2 + # [4] https://dlmf.nist.gov/10.49 - # http://dlmf.nist.gov/10.27#E8 + r = pymbolic_real_norm_2(make_sym_vector("d", dim)) + if dim == 2: + # NOTE: transform K(0, lam r) into a Hankel function using [2] expr = var("hankel_1")(0, var("I")*lam*r) - scaling_for_K0 = 1/2*var("pi")*var("I") # noqa: N806 + scaling_for_K0 = var("pi")/2*var("I") # noqa: N806 scaling = -1/(2*var("pi")) * scaling_for_K0 + elif dim == 3: + # NOTE: to get the expression, we do the following and simplify + # 1. express K(1/2, lam r) as a modified spherical Bessel function + # k(0, lam r) using [3] and use expression for k(0, lam r) from [4] + # 2. or use (AS 10.2.17) directly + expr = var("exp")(-lam*r) / r + + scaling = -1/(4 * var("pi")**2) else: raise RuntimeError("unsupported dimensionality") @@ -605,6 +586,7 @@ class YukawaKernel(ExpressionKernel): )] mapper_method = "map_yukawa_kernel" + is_translation_invariant = True class StokesletKernel(ExpressionKernel): @@ -672,6 +654,7 @@ class StokesletKernel(ExpressionKernel): )] mapper_method = "map_stokeslet_kernel" + is_translation_invariant = True class StressletKernel(ExpressionKernel): @@ -765,19 +748,11 @@ class KernelWrapper(Kernel): def get_expression(self, scaled_dist_vec): return self.inner_kernel.get_expression(scaled_dist_vec) - @property - def has_efficient_scale_adjustment(self): - return self.inner_kernel.has_efficient_scale_adjustment - - def adjust_for_kernel_scaling(self, expr, rscale, nderivatives): - return self.inner_kernel.adjust_for_kernel_scaling( - expr, rscale, nderivatives) - - def postprocess_at_source(self, expr, avec): - return self.inner_kernel.postprocess_at_source(expr, avec) + def get_derivative_transformation_at_source(self, expr_dict): + return self.inner_kernel.get_derivative_transformation_at_source(expr_dict) - def postprocess_at_target(self, expr, avec): - return self.inner_kernel.postprocess_at_target(expr, avec) + def get_derivative_transformation_at_target(self, expr_dict): + return self.inner_kernel.get_derivative_transformation_at_target(expr_dict) def get_global_scaling_const(self): return self.inner_kernel.get_global_scaling_const() @@ -816,9 +791,15 @@ class AxisTargetDerivative(DerivativeBase): def __repr__(self): return "AxisTargetDerivative(%d, %r)" % (self.axis, self.inner_kernel) - def postprocess_at_target(self, expr, bvec): - expr = self.inner_kernel.postprocess_at_target(expr, bvec) - return expr.diff(bvec[self.axis]) + def get_derivative_transformation_at_target(self, expr_dict): + expr_dict = self.inner_kernel.get_derivative_transformation_at_target( + expr_dict) + result = dict() + for mi, coeff in expr_dict.items(): + new_mi = list(mi) + new_mi[self.axis] += 1 + result[tuple(new_mi)] = coeff + return result def replace_inner_kernel(self, new_inner_kernel): return type(self)(self.axis, new_inner_kernel) @@ -890,18 +871,21 @@ class DirectionalTargetDerivative(DirectionalDerivative): return transform - def postprocess_at_target(self, expr, bvec): - expr = self.inner_kernel.postprocess_at_target(expr, bvec) - - dim = len(bvec) - assert dim == self.dim - + def get_derivative_transformation_at_target(self, expr_dict): from sumpy.symbolic import make_sym_vector as make_sympy_vector - dir_vec = make_sympy_vector(self.dir_vec_name, dim) + dir_vec = make_sympy_vector(self.dir_vec_name, self.dim) + + expr_dict = self.inner_kernel.get_derivative_transformation_at_target( + expr_dict) - # bvec = tgt-center - return sum(dir_vec[axis]*expr.diff(bvec[axis]) - for axis in range(dim)) + # bvec = tgt - center + result = defaultdict(lambda: 0) + for mi, coeff in expr_dict.items(): + for axis in range(self.dim): + new_mi = list(mi) + new_mi[axis] += 1 + result[tuple(new_mi)] += coeff * dir_vec[axis] + return result def get_source_args(self): return [ @@ -932,18 +916,21 @@ class DirectionalSourceDerivative(DirectionalDerivative): return transform - def postprocess_at_source(self, expr, avec): - expr = self.inner_kernel.postprocess_at_source(expr, avec) - - dimensions = len(avec) - assert dimensions == self.dim - + def get_derivative_transformation_at_source(self, expr_dict): from sumpy.symbolic import make_sym_vector as make_sympy_vector - dir_vec = make_sympy_vector(self.dir_vec_name, dimensions) + dir_vec = make_sympy_vector(self.dir_vec_name, self.dim) + + expr_dict = self.inner_kernel.get_derivative_transformation_at_source( + expr_dict) # avec = center-src -> minus sign from chain rule - return sum(-dir_vec[axis]*expr.diff(avec[axis]) - for axis in range(dimensions)) + result = defaultdict(lambda: 0) + for mi, coeff in expr_dict.items(): + for axis in range(self.dim): + new_mi = list(mi) + new_mi[axis] += 1 + result[tuple(new_mi)] += -coeff * dir_vec[axis] + return result def get_source_args(self): return [ diff --git a/sumpy/p2e.py b/sumpy/p2e.py index 2aa648040a7b51645345160ac8288d877ea17c11..f5472fc42e306c21eeec5091b26d548cdb66c71a 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -88,7 +88,7 @@ class P2EBase(KernelCacheWrapper): coeff_names = [ sac.assign_unique("coeff%d" % i, coeff_i) for i, coeff_i in enumerate( - self.expansion.coefficients_from_source(avec, None, rscale))] + self.expansion.coefficients_from_source(avec, None, rscale, sac))] sac.run_global_cse() diff --git a/sumpy/point_calculus.py b/sumpy/point_calculus.py index f8b4f5bdf38ec7cd9043047f07e5f2c2ce31b69c..fc164d81b226c0ed2e38f2e60c68d4cd466c8b7e 100644 --- a/sumpy/point_calculus.py +++ b/sumpy/point_calculus.py @@ -231,9 +231,9 @@ class CalculusPatch(object): from pytools.obj_array import make_obj_array return make_obj_array([ sum( - levi_civita((l, m, n)) * self.diff(m, arg[n]) + levi_civita((k, m, n)) * self.diff(m, arg[n]) for m in range(3) for n in range(3)) - for l in range(3)]) + for k in range(3)]) def eval_at_center(self, f_values): """Interpolate *f_values* to the center point. diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 9708764c01d0448d7c7e8314efb461989c838acd..e6704ba85f832067349cd3f291c1b47f8243f05c 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -68,7 +68,7 @@ def stringify_expn_index(i): def expand(expansion_nr, sac, expansion, avec, bvec): rscale = sym.Symbol("rscale") - coefficients = expansion.coefficients_from_source(avec, bvec, rscale) + coefficients = expansion.coefficients_from_source(avec, bvec, rscale, sac) assigned_coeffs = [ sym.Symbol( @@ -78,7 +78,7 @@ def expand(expansion_nr, sac, expansion, avec, bvec): for i in expansion.get_coefficient_identifiers()] return sac.assign_unique("expn%d_result" % expansion_nr, - expansion.evaluate(assigned_coeffs, bvec, rscale)) + expansion.evaluate(assigned_coeffs, bvec, rscale, sac)) # {{{ layer potential computation diff --git a/sumpy/symbolic.py b/sumpy/symbolic.py index 7a86958aebd7eea6c8054b11e61413314886ebe9..0fed239d30b04fe38360efee190de5f38b66293b 100644 --- a/sumpy/symbolic.py +++ b/sumpy/symbolic.py @@ -76,7 +76,7 @@ _find_symbolic_backend() # Before adding a function here, make sure it's present in both modules. SYMBOLIC_API = """ Add Basic Mul Pow exp sqrt log symbols sympify cos sin atan2 Function Symbol -Derivative Integer Matrix Subs I pi functions""".split() +Derivative Integer Matrix Subs I pi functions Number Float""".split() if USE_SYMENGINE: import symengine as sym diff --git a/sumpy/tools.py b/sumpy/tools.py index 2719f43a2e128da47997f0d1a010072d9bcec1dd..63c6105854982519d35193224ec7ac4f10a4283f 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -3,6 +3,8 @@ from __future__ import division, absolute_import __copyright__ = """ Copyright (C) 2012 Andreas Kloeckner Copyright (C) 2018 Alexandru Fikl +Copyright (C) 2006-2019 SymPy Development Team +Copyright (C) 2020 Isuru Fernando """ __license__ = """ @@ -23,6 +25,36 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +============================================================================= +Copyright (c) 2006-2019 SymPy Development Team + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + a. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + c. Neither the name of SymPy nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +DAMAGE. """ import six @@ -30,6 +62,9 @@ from six.moves import range, zip from pytools import memoize_method, memoize_in import numpy as np import sumpy.symbolic as sym +import numbers +import math +from collections import namedtuple import pyopencl as cl import pyopencl.array # noqa @@ -67,28 +102,108 @@ def mi_power(vector, mi, evaluate=True): return result +def add_to_sac(sac, expr): + import sumpy.symbolic as sym + numeric_types = (numbers.Number, sym.Number, int, float, complex) + if sac is None or isinstance(expr, numeric_types): + return expr + else: + return sym.Symbol(sac.assign_unique("temp", expr)) + + class MiDerivativeTaker(object): - def __init__(self, expr, var_list): + def __init__(self, expr, var_list, rscale=1, sac=None): + r""" + A class to take scaled derivatives of the symbolic expression + expr w.r.t. variables var_list and the scaling parameter rscale. + + Consider a Taylor multipole expansion: + + .. math:: + + f (x - y) = \sum_{i = 0}^{\infty} (\partial_y^i f) (x - y) \big|_{y = c} + \frac{(y - c)^i}{i!} . + + Now suppose we would like to use a scaled version :math:`g` of the + kernel :math:`f`: + + .. math:: + + \begin{eqnarray*} + f (x) & = & g (x / \alpha),\\ + f^{(i)} (x) & = & \frac{1}{\alpha^i} g^{(i)} (x / \alpha) . + \end{eqnarray*} + + where :math:`\alpha` is chosen to be on a length scale similar to + :math:`x` (for example by choosing :math:`\alpha` proporitional to the + size of the box for which the expansion is intended) so that :math:`x / + \alpha` is roughly of unit magnitude, to avoid arithmetic issues with + small arguments. This yields + + .. math:: + + f (x - y) = \sum_{i = 0}^{\infty} (\partial_y^i g) + \left( \frac{x - y}{\alpha} \right) \Bigg|_{y = c} + \cdot + \frac{(y - c)^i}{\alpha^i \cdot i!}. + + Observe that the :math:`(y - c)` term is now scaled to unit magnitude, + as is the argument of :math:`g`. + + With :math:`\xi = x / \alpha`, we find + + .. math:: + + \begin{eqnarray*} + g (\xi) & = & f (\alpha \xi),\\ + g^{(i)} (\xi) & = & \alpha^i f^{(i)} (\alpha \xi) . + \end{eqnarray*} + + Generically for all kernels, :math:`f^{(i)} (\alpha \xi)` is computable + by taking a sufficient number of symbolic derivatives of :math:`f` and + providing :math:`\alpha \xi = x` as the argument. + + Now, for some kernels, like :math:`f (x) = C \log x`, the powers of + :math:`\alpha^i` from the chain rule cancel with the ones from the + argument substituted into the kernel derivatives: + + .. math:: + + g^{(i)} (\xi) = \alpha^i f^{(i)} (\alpha \xi) = C' \cdot \alpha^i \cdot + \frac{1}{(\alpha x)^i} \quad (i > 0), + + making them what you might call *scale-invariant*. + + This derivative taker returns :math:`g^{(i)}(\xi) = \alpha^i f^{(i)}` + given :math:`f^{(0)}` as *expr* and :math:`\alpha` as *rscale*. + """ + assert isinstance(expr, sym.Basic) self.var_list = var_list empty_mi = (0,) * len(var_list) self.cache_by_mi = {empty_mi: expr} + self.rscale = rscale + self.sac = sac + self.dim = len(self.var_list) + self.orig_expr = expr def mi_dist(self, a, b): return np.array(a, dtype=int) - np.array(b, dtype=int) def diff(self, mi): try: - expr = self.cache_by_mi[mi] + return self.cache_by_mi[mi] except KeyError: - current_mi = self.get_closest_cached_mi(mi) - expr = self.cache_by_mi[current_mi] + pass + + current_mi = self.get_closest_cached_mi(mi) + expr = self.cache_by_mi[current_mi] - for next_deriv, next_mi in self.get_derivative_taking_sequence( - current_mi, mi): - expr = expr.diff(next_deriv) - self.cache_by_mi[next_mi] = expr + for next_deriv, next_mi in self.get_derivative_taking_sequence( + current_mi, mi): + expr = expr.diff(next_deriv) * self.rscale + self.cache_by_mi[next_mi] = expr return expr @@ -106,6 +221,160 @@ class MiDerivativeTaker(object): if (np.array(mi) >= np.array(other_mi)).all()), key=lambda other_mi: sum(self.mi_dist(mi, other_mi))) + +class LaplaceDerivativeTaker(MiDerivativeTaker): + + def __init__(self, expr, var_list, rscale=1, sac=None): + super(LaplaceDerivativeTaker, self).__init__(expr, var_list, rscale, sac) + self.scaled_var_list = [add_to_sac(self.sac, v/rscale) for v in var_list] + self.scaled_r = add_to_sac(self.sac, + sym.sqrt(sum(v**2 for v in self.scaled_var_list))) + + def diff(self, mi): + # Return zero for negative values. Makes the algorithm readable. + if min(mi) < 0: + return 0 + try: + return self.cache_by_mi[mi] + except KeyError: + pass + + dim = self.dim + if max(mi) == 1: + return MiDerivativeTaker.diff(self, mi) + d = -1 + for i in range(dim): + if mi[i] >= 2: + d = i + break + assert d >= 0 + expr = 0 + for i in range(dim): + mi_minus_one = list(mi) + mi_minus_one[i] -= 1 + mi_minus_one = tuple(mi_minus_one) + mi_minus_two = list(mi) + mi_minus_two[i] -= 2 + mi_minus_two = tuple(mi_minus_two) + x = self.scaled_var_list[i] + n = mi[i] + if i == d: + if dim == 3: + expr -= (2*n - 1) * x * self.diff(mi_minus_one) + expr -= (n - 1)**2 * self.diff(mi_minus_two) + else: + expr -= 2 * x * (n - 1) * self.diff(mi_minus_one) + expr -= (n - 1) * (n - 2) * self.diff(mi_minus_two) + if n == 2 and sum(mi) == 2: + expr += 1 + else: + expr -= 2 * n * x * self.diff(mi_minus_one) + expr -= n * (n - 1) * self.diff(mi_minus_two) + expr /= self.scaled_r**2 + expr = add_to_sac(self.sac, expr) + self.cache_by_mi[mi] = expr + return expr + + +class RadialDerivativeTaker(MiDerivativeTaker): + + def __init__(self, expr, var_list, rscale=1, sac=None): + """ + Takes the derivatives of a radial function. + """ + import sumpy.symbolic as sym + super(RadialDerivativeTaker, self).__init__(expr, var_list, rscale, sac) + empty_mi = (0,) * len(var_list) + self.cache_by_mi_q = {(empty_mi, 0): expr} + self.r = sym.sqrt(sum(v**2 for v in var_list)) + rsym = sym.Symbol("_r") + r_expr = expr.xreplace({self.r**2: rsym**2}) + self.is_radial = not any(r_expr.has(v) for v in var_list) + + def diff(self, mi, q=0): + """ + See [1] for the algorithm + + [1]: Tausch, J., 2003. The fast multipole method for arbitrary Green's + functions. Contemporary Mathematics, 329, pp.307-314. + """ + if not self.is_radial: + assert q == 0 + return MiDerivativeTaker.diff(self, mi) + + try: + return self.cache_by_mi_q[(mi, q)] + except KeyError: + pass + + for i in range(self.dim): + if mi[i] == 1: + mi_minus_one = list(mi) + mi_minus_one[i] = 0 + mi_minus_one = tuple(mi_minus_one) + expr = self.var_list[i] * self.diff(mi_minus_one, q=q+1) + expr *= self.rscale + self.cache_by_mi_q[(mi, q)] = expr + return expr + + for i in range(self.dim): + if mi[i] >= 2: + mi_minus_one = list(mi) + mi_minus_one[i] -= 1 + mi_minus_one = tuple(mi_minus_one) + mi_minus_two = list(mi) + mi_minus_two[i] -= 2 + mi_minus_two = tuple(mi_minus_two) + expr = (mi[i]-1)*self.diff(mi_minus_two, q=q+1) * self.rscale + expr += self.var_list[i] * self.diff(mi_minus_one, q=q+1) + expr *= self.rscale + expr = add_to_sac(self.sac, expr) + self.cache_by_mi_q[(mi, q)] = expr + return expr + + assert mi == (0,)*self.dim + assert q > 0 + + prev_expr = self.diff(mi, q=q-1) + # Need to get expr.diff(r)/r, but we can only do expr.diff(x) + # Use expr.diff(x) = expr.diff(r) * x / r + expr = prev_expr.diff(self.var_list[0])/self.var_list[0] + # We need to distribute the division above + expr = expr.expand(deep=False) + self.cache_by_mi_q[(mi, q)] = expr + return expr + + +class HelmholtzYukawaDerivativeTaker(RadialDerivativeTaker): + + def diff(self, mi, q=0): + import sumpy.symbolic as sym + if q < 2 or mi != (0,)*self.dim: + return RadialDerivativeTaker.diff(self, mi, q) + try: + return self.cache_by_mi_q[(mi, q)] + except KeyError: + pass + + if self.dim == 2: + # See https://dlmf.nist.gov/10.6.E6 + # and https://dlmf.nist.gov/10.6#E1 + k = self.orig_expr.args[1] / self.r + expr = - 2 * (q - 1) * self.diff(mi, q - 1) + expr += - k**2 * self.diff(mi, q - 2) + expr /= self.r**2 + else: + # See reference [1] in RadialDerivativeTaker.diff + k = (self.orig_expr * self.r).args[-1] / sym.I / self.r + expr = -(2*q - 1)/self.r**2 * self.diff(mi, q - 1) + expr += -k**2 / self.r * self.diff(mi, q - 2) + self.cache_by_mi_q[(mi, q)] = expr + return expr + + +MiDerivativeTakerWrapper = namedtuple('MiDerivativeTakerWrapper', + ['taker', 'initial_mi']) + # }}} @@ -670,6 +939,8 @@ def my_syntactic_subs(expr, subst_dict): return expr +# {{{ matrices + def reduced_row_echelon_form(m): """Calculates a reduced row echelon form of a matrix `m`. @@ -760,4 +1031,119 @@ def solve_symbolic(A, b): # noqa: N803 red = reduced_row_echelon_form(big)[0] return red[:, big.shape[0]:] +# }}} + + +# {{{ FFT + +def _complex_tuple_mul(a, b, sac): + """ + Multiply the two complex numbers represented as a tuple + for real and imaginary parts + """ + return (add_to_sac(sac, (a[0]*b[0])-(a[1]*b[1])), + add_to_sac(sac, (a[0]*b[1])+(a[1]*b[0]))) + + +def _binary_reverse(n, bits): + # Returns the reverse of the number n in binary form with bits + # number of bits + b = bin(n)[2:].rjust(bits, "0") + return int(b[::-1], 2) + + +def _padded_fft_size(n): + if n < 2: + return n + b = n.bit_length() - 1 + if n & (n - 1): # not a power of 2 + b += 1 + n = 2**b + return n + + +def fft(seq, inverse=False, sac=None): + """ + Return the discrete fourier transform of the sequence seq. + seq should be a python iterable with tuples of length 2 + corresponding to the real part and imaginary part. + """ + + a = seq + n = len(a) + if n < 2: + return a + + n = _padded_fft_size(n) + b = n.bit_length() - 1 + + a += [(0, 0)]*(n - len(a)) + for i in range(1, n): + j = _binary_reverse(i, b) + if i < j: + a[i], a[j] = a[j], a[i] + ang = 2*math.pi/n + # Rewrite cosines and sines using cosines of angle in the first quadrant + # This is to reduce duplicate of floating point numbers with 1 ULP difference + # and also make sure quantities like cos(pi/2) - sin(pi/2) produces 0 exactly. + w = [(math.cos(ang*i), math.cos(ang*(n/4.0 - i))) for i in range((n + 3)//4)] + w[0] = (1, 0) + w += [(-math.cos(ang*(n/2 - i)), math.cos(ang*(i - n/4.0))) for + i in range((n + 3)//4, n//2)] + if n % 4 == 0: + w[n//4] = (0, 1) + if inverse: + w = [(x[0], -x[1]) for x in w] + h = 2 + while h <= n: + hf, ut = h // 2, n // h + for i in range(0, n, h): + for j in range(hf): + u, v = a[i + j], _complex_tuple_mul(a[i + j + hf], w[ut * j], + sac=sac) + a[i + j] = (u[0] + v[0], u[1] + v[1]) + a[i + j + hf] = (u[0] - v[0], u[1] - v[1]) + h *= 2 + + if inverse: + a = [(x[0]/n, x[1]/n) for x in a] + + return a + + +def fft_toeplitz_upper_triangular(first_row, x, sac=None): + """ + Returns the matvec of the Toeplitz matrix given by + the first row and the vector x using a Fourier transform + """ + assert len(first_row) == len(x) + n = len(first_row) + v = list(first_row) + v += [0]*(n-1) + + x = list(reversed(x)) + x += [0]*(n-1) + + v_fft = fft([(a, 0) for a in v], sac) + x_fft = fft([(a, 0) for a in x], sac) + res_fft = [_complex_tuple_mul(a, b, sac) for a, b in zip(v_fft, x_fft)] + res = fft(res_fft, inverse=True, sac=sac) + return [a for a, _ in reversed(res[:n])] + + +def fft_toeplitz_upper_triangular_lwork(n): + return _padded_fft_size(2*n - 1) + + +def matvec_toeplitz_upper_triangular(first_row, vector): + n = len(first_row) + assert len(vector) == n + output = [0]*n + for row in range(n): + terms = tuple(first_row[col-row]*vector[col] for col in range(row, n)) + output[row] = sym.Add(*terms) + return output + +# }}} + # vim: fdm=marker diff --git a/sumpy/version.py b/sumpy/version.py index f42335928e7fcb0cf6f93e8461714fb436691f8d..f44cfe20283522f87e8ebb10d41fff268d205080 100644 --- a/sumpy/version.py +++ b/sumpy/version.py @@ -43,7 +43,7 @@ else: # }}} -VERSION = (2016, 1) +VERSION = (2020, 1) VERSION_STATUS = "beta1" VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS diff --git a/test/test_codegen.py b/test/test_codegen.py index 7e3c25e0e5f029ef5161ba970106283e01ef245f..fa15592796a88dff555396e2624f38da04f31756 100644 --- a/test/test_codegen.py +++ b/test/test_codegen.py @@ -91,7 +91,7 @@ def test_line_taylor_coeff_growth(): expn = LineTaylorLocalExpansion(LaplaceKernel(2), order) avec = make_sym_vector("a", 2) bvec = make_sym_vector("b", 2) - coeffs = expn.coefficients_from_source(avec, bvec, rscale=1) + coeffs = expn.coefficients_from_source(avec, bvec, rscale=1, sac=None) sym2pymbolic = SympyToPymbolicMapper() coeffs_pymbolic = [sym2pymbolic(c) for c in coeffs] diff --git a/test/test_cse.py b/test/test_cse.py index 703e8c9f93ca2069c273ead893a00ed0ba3dc697..2f54f34bab36e9be4047c05f1a2c663523a83509 100644 --- a/test/test_cse.py +++ b/test/test_cse.py @@ -253,12 +253,12 @@ def test_issue_4499(): G = Function('G') # noqa t = Tuple( *(a, a + S(1)/2, 2*a, b, 2*a - b + 1, (sqrt(z)/2)**(-2*a + 1)*B(2*a - - b, sqrt(z))*B(b - 1, sqrt(z))*G(b)*G(2*a - b + 1), - sqrt(z)*(sqrt(z)/2)**(-2*a + 1)*B(b, sqrt(z))*B(2*a - b, + - b, sqrt(z))*B(b - 1, sqrt(z))*G(b)*G(2*a - b + 1), # noqa + sqrt(z)*(sqrt(z)/2)**(-2*a + 1)*B(b, sqrt(z))*B(2*a - b, # noqa sqrt(z))*G(b)*G(2*a - b + 1), sqrt(z)*(sqrt(z)/2)**(-2*a + 1)*B(b - 1, sqrt(z))*B(2*a - b + 1, sqrt(z))*G(b)*G(2*a - b + 1), - (sqrt(z)/2)**(-2*a + 1)*B(b, sqrt(z))*B(2*a - b + 1, - sqrt(z))*G(b)*G(2*a - b + 1), 1, 0, S(1)/2, z/2, -b + 1, -2*a + b, + (sqrt(z)/2)**(-2*a + 1)*B(b, sqrt(z))*B(2*a - b + 1, # noqa + sqrt(z))*G(b)*G(2*a - b + 1), 1, 0, S(1)/2, z/2, -b + 1, -2*a + b, # noqa -2*a)) # noqa c = cse(t) assert len(c[0]) == 11 diff --git a/test/test_fmm.py b/test/test_fmm.py index 2bc6d69d6bcafedf338cc78aea0591d6d5ac3845..baa7f5ef0ac144bb716bfab95851b0fa76879eb1 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -115,10 +115,10 @@ def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class): # {{{ plot tree if 0: - host_tree = tree.get() - host_trav = trav.get() + host_tree = tree.get(queue) + host_trav = trav.get(queue) - if 1: + if 0: print("src_box", host_tree.find_box_nr_for_source(403)) print("tgt_box", host_tree.find_box_nr_for_target(28)) print(list(host_trav.target_or_target_parent_boxes).index(37)) @@ -170,7 +170,8 @@ def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class): for order in order_values: out_kernels = [knl] - from sumpy.fmm import SumpyExpansionWranglerCodeContainer + from sumpy.fmm import (SumpyExpansionWranglerCodeContainer, + SumpyTranslationClassesData) wcc = SumpyExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), @@ -178,7 +179,8 @@ def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class): out_kernels) wrangler = wcc.get_wrangler(queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order, - kernel_extra_kwargs=extra_kwargs) + kernel_extra_kwargs=extra_kwargs, + translation_classes_data=SumpyTranslationClassesData(queue, trav)) from boxtree.fmm import drive_fmm diff --git a/test/test_kernels.py b/test/test_kernels.py index 8225723cdd4f7df2afb7c487a578cc8c9ee81d28..4e445888e9b7f24cf939fa7f34e27286cb60f2f2 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -45,6 +45,7 @@ from sumpy.expansion.local import ( from sumpy.kernel import (LaplaceKernel, HelmholtzKernel, AxisTargetDerivative, DirectionalSourceDerivative, BiharmonicKernel, StokesletKernel) from pytools.convergence import PConvergenceVerifier +import sumpy.symbolic as sym import logging logger = logging.getLogger(__name__) @@ -346,6 +347,72 @@ def test_p2e2p(ctx_getter, base_knl, expn_class, order, with_source_derivative): assert eoc_rec_grad_x.order_estimate() > tgt_order_grad - grad_slack +# {{{ test toeplitz + +def _m2l_translate_simple(tgt_expansion, src_expansion, src_coeff_exprs, src_rscale, + dvec, tgt_rscale): + + if not tgt_expansion.use_rscale: + src_rscale = 1 + tgt_rscale = 1 + + from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansionBase + if not isinstance(src_expansion, VolumeTaylorMultipoleExpansionBase): + return 1 + + # We know the general form of the multipole expansion is: + # + # coeff0 * diff(kernel, mi0) + coeff1 * diff(kernel, mi1) + ... + # + # To get the local expansion coefficients, we take derivatives of + # the multipole expansion. + taker = src_expansion.get_kernel_derivative_taker(dvec, src_rscale, sac=None) + + from sumpy.tools import add_mi + + result = [] + for deriv in tgt_expansion.get_coefficient_identifiers(): + local_result = [] + for coeff, term in zip( + src_coeff_exprs, + src_expansion.get_coefficient_identifiers()): + + kernel_deriv = taker.diff(add_mi(deriv, term)) / src_rscale**sum(deriv) + + local_result.append( + coeff * kernel_deriv * tgt_rscale**sum(deriv)) + result.append(sym.Add(*local_result)) + return result + + +def test_m2l_toeplitz(ctx_getter): + dim = 3 + knl = LaplaceKernel(dim) + local_expn_class = LaplaceConformingVolumeTaylorLocalExpansion + mpole_expn_class = LaplaceConformingVolumeTaylorMultipoleExpansion + + local_expn = local_expn_class(knl, order=5) + mpole_expn = mpole_expn_class(knl, order=5) + + dvec = sym.make_sym_vector("d", dim) + src_coeff_exprs = list(1 + np.random.randn(len(mpole_expn))) + src_rscale = 2.0 + tgt_rscale = 1.0 + + expected_output = _m2l_translate_simple(local_expn, mpole_expn, src_coeff_exprs, + src_rscale, dvec, tgt_rscale) + actual_output = local_expn.translate_from(mpole_expn, src_coeff_exprs, + src_rscale, dvec, tgt_rscale, sac=None) + + replace_dict = dict((d, np.random.rand(1)[0]) for d in dvec) + for sym_a, sym_b in zip(expected_output, actual_output): + num_a = sym_a.xreplace(replace_dict) + num_b = sym_b.xreplace(replace_dict) + assert abs(num_a - num_b)/abs(num_a) < 1e-10 + + +# }}} + @pytest.mark.parametrize("knl, local_expn_class, mpole_expn_class", [ (LaplaceKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion), (LaplaceKernel(2), LaplaceConformingVolumeTaylorLocalExpansion, diff --git a/test/test_misc.py b/test/test_misc.py index a25b2e800ab6f9942395821cf0cb4345e80b6ccf..77f887c1896c3c0db8fc6a69209defa2e1a50591 100644 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -68,6 +68,7 @@ class YukawaKernelInfo: BiharmonicKernelInfo(2), BiharmonicKernelInfo(3), YukawaKernelInfo(2, 5), + YukawaKernelInfo(3, 5), ]) def test_pde_check_kernels(ctx_factory, knl_info, order=5): dim = knl_info.kernel.dim diff --git a/test/test_tools.py b/test/test_tools.py new file mode 100644 index 0000000000000000000000000000000000000000..743caa14f9d9e632bf32aa679c984da8a0556506 --- /dev/null +++ b/test/test_tools.py @@ -0,0 +1,56 @@ +from __future__ import division, absolute_import, print_function + +__copyright__ = "Copyright (C) 2020 Isuru Fernando" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import logging +logger = logging.getLogger(__name__) + +import sumpy.symbolic as sym +from sumpy.tools import (fft_toeplitz_upper_triangular, + matvec_toeplitz_upper_triangular) +import numpy as np + + +def test_fft(): + k = 5 + v = np.random.rand(k) + x = np.random.rand(k) + + fft = fft_toeplitz_upper_triangular(v, x) + matvec = matvec_toeplitz_upper_triangular(v, x) + + for i in range(k): + assert abs(fft[i] - matvec[i]) < 1e-15 + + +def test_fft_small_floats(): + k = 5 + v = sym.make_sym_vector("v", k) + x = sym.make_sym_vector("x", k) + + fft = fft_toeplitz_upper_triangular(v, x) + for expr in fft: + for f in expr.atoms(sym.Float): + if f == 0: + continue + assert abs(f) > 1e-10