From 529db343b0a4a1f0803e37a6bfa0ae0526c731b9 Mon Sep 17 00:00:00 2001 From: "[6~" Date: Mon, 16 Mar 2020 23:25:53 -0500 Subject: [PATCH 01/48] Add prototype Github CI workflow --- .github/workflows/ci.yml | 64 ++++++++++++++++++++++++++++++++++++++++ README.rst | 6 ++-- 2 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..463b78d1 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,64 @@ +name: CI +on: + push: + branches: + - master + pull_request: + paths-ignore: + - 'doc/*.rst' + schedule: + - cron: '17 3 * * 0' + +jobs: + flake8: + name: Flake8 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - + uses: actions/setup-python@v1 + with: + python-version: '3.x' + - name: "Main Script" + run: | + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh + . ./prepare-and-run-flake8.sh sumpy test + + pytest2: + name: Conda Pytest Py2 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: "Main Script" + run: | + sed 's/python=3/python=2.7/' .test-conda-env-py3.yml > .test-conda-env-py2-pre.yml + grep -v symengine .test-conda-env-py2-pre.yml > .test-conda-env-py2.yml + cat .test-conda-env-py2.yml + CONDA_ENVIRONMENT=.test-conda-env-py2.yml + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh + . ./build-and-test-py-project-within-miniconda.sh + + pytest3: + name: Conda Pytest Py3 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: "Main Script" + run: | + grep -v symengine .test-conda-env-py3.yml > .test-conda-env.yml + CONDA_ENVIRONMENT=.test-conda-env.yml + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh + . ./build-and-test-py-project-within-miniconda.sh + + pytest3symengine: + name: Conda Pytest Py3 Symengine + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: "Main Script" + run: | + CONDA_ENVIRONMENT=.test-conda-env-py3.yml + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh + . ./build-and-test-py-project-within-miniconda.sh + +# vim: sw=4 diff --git a/README.rst b/README.rst index 884bef17..3d54af6a 100644 --- a/README.rst +++ b/README.rst @@ -4,9 +4,9 @@ sumpy: n-body kernels and translation operators .. image:: https://gitlab.tiker.net/inducer/sumpy/badges/master/pipeline.svg :alt: Gitlab Build Status :target: https://gitlab.tiker.net/inducer/sumpy/commits/master -.. image:: https://dev.azure.com/ak-spam/inducer/_apis/build/status/inducer.sumpy?branchName=master - :alt: Azure Build Status - :target: https://dev.azure.com/ak-spam/inducer/_build/latest?definitionId=17&branchName=master +.. image:: https://github.com/inducer/sumpy/workflows/CI/badge.svg?branch=master + :alt: Github Build Status + :target: https://github.com/inducer/sumpy/actions?query=branch%3Amaster+workflow%3ACI .. image:: https://badge.fury.io/py/sumpy.png :alt: Python Package Index Release Page :target: https://pypi.org/project/sumpy/ -- GitLab From 8bf5e3fb87afea66e9a8ebdc02340d9aafc87d17 Mon Sep 17 00:00:00 2001 From: "[6~" Date: Tue, 17 Mar 2020 08:18:55 -0500 Subject: [PATCH 02/48] Drop Azure CI --- azure-pipelines.yml | 96 --------------------------------------------- 1 file changed, 96 deletions(-) delete mode 100644 azure-pipelines.yml diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index 9f1f92a2..00000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,96 +0,0 @@ -jobs: -- - job: 'Python2' - pool: - vmImage: 'ubuntu-latest' - - steps: - - - script: | - set -e - sed 's/python=3/python=2.7/' .test-conda-env-py3.yml > .test-conda-env-py2-pre.yml - grep -v symengine .test-conda-env-py2-pre.yml > .test-conda-env-py2.yml - cat .test-conda-env-py2.yml - CONDA_ENVIRONMENT=.test-conda-env-py2.yml - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - . ./build-and-test-py-project-within-miniconda.sh - - displayName: 'Pytest Conda' - - - task: PublishTestResults@2 - inputs: - testResultsFormat: 'JUnit' - testResultsFiles: 'test/pytest.xml' - -- - job: 'Python3' - pool: - vmImage: 'ubuntu-latest' - - steps: - - - script: | - set -e - grep -v symengine .test-conda-env-py3.yml > .test-conda-env.yml - CONDA_ENVIRONMENT=.test-conda-env.yml - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - . ./build-and-test-py-project-within-miniconda.sh - - displayName: 'Pytest Conda' - - - task: PublishTestResults@2 - inputs: - testResultsFormat: 'JUnit' - testResultsFiles: 'test/pytest.xml' - -- - job: 'Python3Symengine' - pool: - vmImage: 'ubuntu-latest' - - steps: - - - script: | - set -e - CONDA_ENVIRONMENT=.test-conda-env-py3.yml - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - . ./build-and-test-py-project-within-miniconda.sh - - displayName: 'Pytest Conda' - - - - task: PublishTestResults@2 - inputs: - testResultsFormat: 'JUnit' - testResultsFiles: 'test/pytest.xml' - -- - job: 'Flake8' - pool: - vmImage: 'ubuntu-latest' - strategy: - matrix: - Python37: - python.version: '3.7' - - steps: - - - task: UsePythonVersion@0 - inputs: - versionSpec: '$(python.version)' - - - - script: | - set -e - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh - . ./prepare-and-run-flake8.sh sumpy test - - displayName: 'Flake8' - -schedules: -- - cron: "0 0 * * 0" - displayName: Weekly build - branches: - include: - - master -- GitLab From 2e9e72767d7d995614c3884fd17dd86ae04d0f22 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Thu, 19 Mar 2020 00:20:59 -0500 Subject: [PATCH 03/48] Add FFT --- sumpy/tools.py | 100 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/sumpy/tools.py b/sumpy/tools.py index 2719f43a..7cfe6138 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -3,6 +3,8 @@ from __future__ import division, absolute_import __copyright__ = """ Copyright (C) 2012 Andreas Kloeckner Copyright (C) 2018 Alexandru Fikl +Copyright (C) 2006-2019 SymPy Development Team +Copyright (C) 2020 Isuru Fernando """ __license__ = """ @@ -30,6 +32,8 @@ from six.moves import range, zip from pytools import memoize_method, memoize_in import numpy as np import sumpy.symbolic as sym +import numbers +import math import pyopencl as cl import pyopencl.array # noqa @@ -670,6 +674,8 @@ def my_syntactic_subs(expr, subst_dict): return expr +# {{{ matrices + def reduced_row_echelon_form(m): """Calculates a reduced row echelon form of a matrix `m`. @@ -760,4 +766,98 @@ def solve_symbolic(A, b): # noqa: N803 red = reduced_row_echelon_form(big)[0] return red[:, big.shape[0]:] +# }}} + + +# {{{ FFT + +def _fft_uneval_expr(expr): + """ + Creates a CSE node if the expr is not a numeric type + """ + if isinstance(expr, (numbers.Number, sym.Number, int, float, complex)): + return expr + # UnevaluatedExpr is not implemented in SymEngine, but that's fine + # as SymEngine doesn't distribute 2*(a+b) to 2*a + 2*b. + # SymPy distributes which destroys the complexity. + return sym.UnevaluatedExpr(expr) + + +def _complex_tuple_mul(a, b): + """ + Multiply the two complex numbers represented as a tuple + for real and imaginary parts + """ + return (_fft_uneval_expr((a[0]*b[0])-(a[1]*b[1])), + _fft_uneval_expr((a[0]*b[1])+(a[1]*b[0]))) + + +def _binary_reverse(n, bits): + # Returns the reverse of the number n in binary form with bits + # number of bits + b = bin(n)[2:].rjust(bits, "0") + return int(b[::-1], 2) + + +def fft(seq, inverse=False): + """ + Return the discrete fourier transform of the sequence seq. + seq should be a python iterable with tuples of length 2 + corresponding to the real part and imaginary part. + """ + + a = seq + n = len(a) + if n < 2: + return a + + b = n.bit_length() - 1 + if n & (n - 1): # not a power of 2 + b += 1 + n = 2**b + + a += [(0, 0)]*(n - len(a)) + for i in range(1, n): + j = _binary_reverse(i, b) + if i < j: + a[i], a[j] = a[j], a[i] + ang = -2*math.pi/n if inverse else 2*math.pi/n + + w = [(math.cos(ang*i), math.sin(ang*i)) for i in range(n // 2)] + for i in range(len(w)): + if abs(w[i][0]) == 1.0: + w[i] = (int(w[i][0]), 0) + if abs(w[i][1]) == 1.0: + w[i] = (0, int(w[i][1])) + + h = 2 + while h <= n: + hf, ut = h // 2, n // h + for i in range(0, n, h): + for j in range(hf): + u, v = a[i + j], _complex_tuple_mul(a[i + j + hf], w[ut * j]) + a[i + j] = (u[0] + v[0], u[1] + v[1]) + a[i + j + hf] = (u[0] - v[0], u[1] - v[1]) + h *= 2 + + if inverse: + a = [(x[0]/n, x[1]/n) for x in a] + + return a + + +def toeplitz(v, x): + """ + Returns the matvec of the Toeplitz matrix given by + the Toeplitz vector v and the vector x using a Fourier transform + """ + assert len(v) == len(x) + v_fft = fft([(a, 0) for a in v]) + x_fft = fft([(a, 0) for a in x]) + res_fft = [_complex_tuple_mul(a, b) for a, b in zip(v_fft, x_fft)] + res = fft(res_fft, inverse=True) + return [a for a, _ in res] + +# }}} + # vim: fdm=marker -- GitLab From ab662aaf4bf5dc88922ac7b84a96aff7a5679ea7 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Sat, 21 Mar 2020 16:22:50 -0500 Subject: [PATCH 04/48] Generate Toeplitz matrix --- sumpy/expansion/local.py | 86 +++++++++++++++++++++++----------------- sumpy/symbolic.py | 2 +- sumpy/tools.py | 25 ++++++++++-- 3 files changed, 71 insertions(+), 42 deletions(-) diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index c32778ae..2b8c8b3c 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -141,6 +141,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): for coeff, mi in zip( evaluated_coeffs, self.get_full_coefficient_identifiers())) + def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, dvec, tgt_rscale): logger.info("building translation operator: %s(%d) -> %s(%d): start" @@ -168,6 +169,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): taker = src_expansion.get_kernel_derivative_taker(dvec) from sumpy.tools import add_mi + from pytools import generate_nonnegative_integer_tuples_below as gnitb max_mi = [0]*self.dim for i in range(self.dim): @@ -176,54 +178,64 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): max_mi[i] += max(mi[i] for mi in self.get_coefficient_identifiers()) + toeplitz_matrix_coeffs = list(gnitb([m + 1 for m in max_mi])) + toeplitz_matrix_ident_to_index = dict((ident, i) for i, ident in + enumerate(toeplitz_matrix_coeffs)) + # Create a expansion terms wrangler for derivatives up to order # (tgt order)+(src order) including a corresponding reduction matrix + # For eg: 2D full Taylor Laplace, this is (n, m), + # n+m<=2*p, n<=2*p, m<=2*p tgtplusderiv_exp_terms_wrangler = \ src_expansion.expansion_terms_wrangler.copy( order=self.order + src_expansion.order, max_mi=tuple(max_mi)) - tgtplusderiv_coeff_ids = \ - tgtplusderiv_exp_terms_wrangler.get_coefficient_identifiers() tgtplusderiv_full_coeff_ids = \ tgtplusderiv_exp_terms_wrangler.get_full_coefficient_identifiers() - tgtplusderiv_ident_to_index = dict((ident, i) for i, ident in enumerate(tgtplusderiv_full_coeff_ids)) + # The vector has the kernel derivatives and depends only on the distance + # between the two centers + taker = src_expansion.get_kernel_derivative_taker(dvec) + vector_stored = [] + # Calculate the kernel derivatives for the compressed set + for term in tgtplusderiv_exp_terms_wrangler.get_coefficient_identifiers(): + kernel_deriv = taker.diff(term) + vector_stored.append(kernel_deriv) + # Calculate the kernel derivatives for the full set + vector_full = tgtplusderiv_exp_terms_wrangler.get_full_kernel_derivatives_from_stored( + vector_stored, src_rscale) + + from sumpy.tools import add_mi + needed_vector_terms = set([]) + # For eg: 2D full Taylor Laplace, we only need kernel derivatives + # (n1+n2, m1+m2), n1+m1<=p, n2+m2<=p + for tgt_deriv in self.get_coefficient_identifiers(): + for src_deriv in src_expansion.get_coefficient_identifiers(): + needed = add_mi(src_deriv, tgt_deriv) + needed_vector_terms.add(needed) + + vector = [0]*len(toeplitz_matrix_coeffs) + for i, term in enumerate(toeplitz_matrix_coeffs): + if term in tgtplusderiv_ident_to_index: + vector[i] = vector_full[tgtplusderiv_ident_to_index[term]] + + # Calculate the first row of the upper triangular Toeplitz matrix + toeplitz_first_row = [0] * len(toeplitz_matrix_coeffs) + for coeff, term in zip( + src_coeff_exprs, + src_expansion.get_coefficient_identifiers()): + toeplitz_first_row[toeplitz_matrix_ident_to_index[term]] = coeff * src_rscale**sum(term) + + # Do the matvec + output = matvec_toeplitz_upper_triangular(toeplitz_first_row, vector) + + # Filter out the dummy rows and scale them for target result = [] - for lexp_mi in self.get_coefficient_identifiers(): - lexp_mi_terms = [] - - # Embed the source coefficients in the coefficient array - # for the (tgt order)+(src order) wrangler, offset by lexp_mi. - embedded_coeffs = [0] * len(tgtplusderiv_full_coeff_ids) - for coeff, term in zip( - src_coeff_exprs, - src_expansion.get_coefficient_identifiers()): - embedded_coeffs[ - tgtplusderiv_ident_to_index[add_mi(lexp_mi, term)]] \ - = coeff - - # Compress the embedded coefficient set - stored_coeffs = tgtplusderiv_exp_terms_wrangler \ - .get_stored_mpole_coefficients_from_full( - embedded_coeffs, src_rscale) - - # Sum the embedded coefficient set - for i, coeff in enumerate(stored_coeffs): - if coeff == 0: - continue - nderivatives_for_scaling = \ - sum(tgtplusderiv_coeff_ids[i])-sum(lexp_mi) - kernel_deriv = ( - src_expansion.get_scaled_multipole( - taker.diff(tgtplusderiv_coeff_ids[i]), - dvec, src_rscale, - nderivatives=sum(tgtplusderiv_coeff_ids[i]), - nderivatives_for_scaling=nderivatives_for_scaling)) - - lexp_mi_terms.append( - coeff * kernel_deriv * tgt_rscale**sum(lexp_mi)) - result.append(sym.Add(*lexp_mi_terms)) + for term in self.get_coefficient_identifiers(): + index = toeplitz_matrix_ident_to_index[term] + result.append(output[index]*tgt_rscale**sum(term)) + logger.info("building translation operator: done") return result diff --git a/sumpy/symbolic.py b/sumpy/symbolic.py index 7a86958a..e0f22671 100644 --- a/sumpy/symbolic.py +++ b/sumpy/symbolic.py @@ -76,7 +76,7 @@ _find_symbolic_backend() # Before adding a function here, make sure it's present in both modules. SYMBOLIC_API = """ Add Basic Mul Pow exp sqrt log symbols sympify cos sin atan2 Function Symbol -Derivative Integer Matrix Subs I pi functions""".split() +Derivative Integer Matrix Subs I pi functions Number""".split() if USE_SYMENGINE: import symengine as sym diff --git a/sumpy/tools.py b/sumpy/tools.py index 7cfe6138..ddd9800e 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -846,17 +846,34 @@ def fft(seq, inverse=False): return a -def toeplitz(v, x): +def fft_toeplitz_upper_triangular(first_row, x): """ Returns the matvec of the Toeplitz matrix given by - the Toeplitz vector v and the vector x using a Fourier transform + the first row and the vector x using a Fourier transform """ - assert len(v) == len(x) + assert len(first_row) == len(x) + n = len(first_row) + v = list(first_row) + v += [0]*(n-1) + + x = list(reversed(x)) + x += [0]*(n-1) + v_fft = fft([(a, 0) for a in v]) x_fft = fft([(a, 0) for a in x]) res_fft = [_complex_tuple_mul(a, b) for a, b in zip(v_fft, x_fft)] res = fft(res_fft, inverse=True) - return [a for a, _ in res] + return [a for a, _ in reversed(res[:n])] + + +def matvec_toeplitz_upper_triangular(first_row, vector): + n = len(first_row) + assert len(vector) == n + output = [0]*n + for row in range(n): + for col in range(row, n): + output[row] += first_row[col-row]*vector[col] + return output # }}} -- GitLab From 83e921419dcb83d04d0a968e463b1b67dd1cc73e Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Sat, 21 Mar 2020 19:18:20 -0500 Subject: [PATCH 05/48] Fix missing import and formatting --- sumpy/expansion/local.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index 2b8c8b3c..1c24c7db 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -30,6 +30,9 @@ from sumpy.expansion import ( HelmholtzConformingVolumeTaylorExpansion, BiharmonicConformingVolumeTaylorExpansion) +from sumpy.tools import (matvec_toeplitz_upper_triangular, + fft_toeplitz_upper_triangular) + class LocalExpansionBase(ExpansionBase): pass @@ -141,9 +144,8 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): for coeff, mi in zip( evaluated_coeffs, self.get_full_coefficient_identifiers())) - def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, - dvec, tgt_rscale): + dvec, tgt_rscale, use_fft=False): logger.info("building translation operator: %s(%d) -> %s(%d): start" % (type(src_expansion).__name__, src_expansion.order, @@ -186,27 +188,28 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # (tgt order)+(src order) including a corresponding reduction matrix # For eg: 2D full Taylor Laplace, this is (n, m), # n+m<=2*p, n<=2*p, m<=2*p - tgtplusderiv_exp_terms_wrangler = \ + srcplusderiv_terms_wrangler = \ src_expansion.expansion_terms_wrangler.copy( order=self.order + src_expansion.order, max_mi=tuple(max_mi)) - tgtplusderiv_full_coeff_ids = \ - tgtplusderiv_exp_terms_wrangler.get_full_coefficient_identifiers() - tgtplusderiv_ident_to_index = dict((ident, i) for i, ident in - enumerate(tgtplusderiv_full_coeff_ids)) + srcplusderiv_full_coeff_ids = \ + srcplusderiv_terms_wrangler.get_full_coefficient_identifiers() + srcplusderiv_ident_to_index = dict((ident, i) for i, ident in + enumerate(srcplusderiv_full_coeff_ids)) # The vector has the kernel derivatives and depends only on the distance # between the two centers taker = src_expansion.get_kernel_derivative_taker(dvec) vector_stored = [] # Calculate the kernel derivatives for the compressed set - for term in tgtplusderiv_exp_terms_wrangler.get_coefficient_identifiers(): + for term in \ + srcplusderiv_terms_wrangler.get_coefficient_identifiers(): kernel_deriv = taker.diff(term) vector_stored.append(kernel_deriv) # Calculate the kernel derivatives for the full set - vector_full = tgtplusderiv_exp_terms_wrangler.get_full_kernel_derivatives_from_stored( + vector_full = \ + srcplusderiv_terms_wrangler.get_full_kernel_derivatives_from_stored( vector_stored, src_rscale) - from sumpy.tools import add_mi needed_vector_terms = set([]) # For eg: 2D full Taylor Laplace, we only need kernel derivatives # (n1+n2, m1+m2), n1+m1<=p, n2+m2<=p @@ -217,18 +220,22 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): vector = [0]*len(toeplitz_matrix_coeffs) for i, term in enumerate(toeplitz_matrix_coeffs): - if term in tgtplusderiv_ident_to_index: - vector[i] = vector_full[tgtplusderiv_ident_to_index[term]] + if term in srcplusderiv_ident_to_index: + vector[i] = vector_full[srcplusderiv_ident_to_index[term]] # Calculate the first row of the upper triangular Toeplitz matrix toeplitz_first_row = [0] * len(toeplitz_matrix_coeffs) for coeff, term in zip( src_coeff_exprs, src_expansion.get_coefficient_identifiers()): - toeplitz_first_row[toeplitz_matrix_ident_to_index[term]] = coeff * src_rscale**sum(term) + toeplitz_first_row[toeplitz_matrix_ident_to_index[term]] = \ + coeff * src_rscale**sum(term) # Do the matvec - output = matvec_toeplitz_upper_triangular(toeplitz_first_row, vector) + if use_fft: + output = fft_toeplitz_upper_triangular(toeplitz_first_row, vector) + else: + output = matvec_toeplitz_upper_triangular(toeplitz_first_row, vector) # Filter out the dummy rows and scale them for target result = [] -- GitLab From a17c60fb14f1185908be6205e1aadc4615ee9cee Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Sun, 22 Mar 2020 00:46:05 -0500 Subject: [PATCH 06/48] Fix rscaling --- sumpy/expansion/local.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index 1c24c7db..52cce2fc 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -203,7 +203,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # Calculate the kernel derivatives for the compressed set for term in \ srcplusderiv_terms_wrangler.get_coefficient_identifiers(): - kernel_deriv = taker.diff(term) + kernel_deriv = taker.diff(term) * src_rscale**sum(term) vector_stored.append(kernel_deriv) # Calculate the kernel derivatives for the full set vector_full = \ @@ -228,8 +228,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): for coeff, term in zip( src_coeff_exprs, src_expansion.get_coefficient_identifiers()): - toeplitz_first_row[toeplitz_matrix_ident_to_index[term]] = \ - coeff * src_rscale**sum(term) + toeplitz_first_row[toeplitz_matrix_ident_to_index[term]] = coeff # Do the matvec if use_fft: @@ -241,7 +240,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): result = [] for term in self.get_coefficient_identifiers(): index = toeplitz_matrix_ident_to_index[term] - result.append(output[index]*tgt_rscale**sum(term)) + result.append(output[index]*(tgt_rscale/src_rscale)**sum(term)) logger.info("building translation operator: done") return result -- GitLab From 56682e04f8b620830fbff3b6c41a2f929df42c54 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Sun, 22 Mar 2020 01:35:57 -0500 Subject: [PATCH 07/48] use get_scaled_multipole --- sumpy/expansion/local.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index 52cce2fc..0db59480 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -203,7 +203,12 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # Calculate the kernel derivatives for the compressed set for term in \ srcplusderiv_terms_wrangler.get_coefficient_identifiers(): - kernel_deriv = taker.diff(term) * src_rscale**sum(term) + kernel_deriv = src_expansion.get_scaled_multipole( + taker.diff(term), + dvec, src_rscale, + nderivatives=sum(term), + nderivatives_for_scaling=sum(term), + ) vector_stored.append(kernel_deriv) # Calculate the kernel derivatives for the full set vector_full = \ -- GitLab From 0978271dcd3123662ea09a1f09c67502c50790f9 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Sun, 22 Mar 2020 01:39:44 -0500 Subject: [PATCH 08/48] Use UnevaluatedExpr for the ratio of rscales --- sumpy/expansion/local.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index 0db59480..025ce5fe 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -243,9 +243,10 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # Filter out the dummy rows and scale them for target result = [] + rscale_ratio = sym.UnevaluatedExpr(tgt_rscale/src_rscale) for term in self.get_coefficient_identifiers(): index = toeplitz_matrix_ident_to_index[term] - result.append(output[index]*(tgt_rscale/src_rscale)**sum(term)) + result.append(output[index]*rscale_ratio**sum(term)) logger.info("building translation operator: done") return result -- GitLab From 320cc8a4d6fc5dc97ee73eafdf1c1ba2b9b005c3 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Sun, 22 Mar 2020 11:23:02 -0500 Subject: [PATCH 09/48] Rewrite cosines and sines using cosines of angle in the first quadrant --- sumpy/tools.py | 46 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/sumpy/tools.py b/sumpy/tools.py index ddd9800e..84b45195 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -25,6 +25,36 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +============================================================================= +Copyright (c) 2006-2019 SymPy Development Team + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + a. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + c. Neither the name of SymPy nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +DAMAGE. """ import six @@ -823,12 +853,16 @@ def fft(seq, inverse=False): a[i], a[j] = a[j], a[i] ang = -2*math.pi/n if inverse else 2*math.pi/n - w = [(math.cos(ang*i), math.sin(ang*i)) for i in range(n // 2)] - for i in range(len(w)): - if abs(w[i][0]) == 1.0: - w[i] = (int(w[i][0]), 0) - if abs(w[i][1]) == 1.0: - w[i] = (0, int(w[i][1])) + ang = 2*math.pi/n + # Rewrite cosines and sines using cosines of angle in the first quadrant + # This is to reduce duplicate of floating point numbers with 1 ULP difference + # and also make sure quantities like cos(pi/2) - sin(pi/2) produces 0 exactly. + w = [(math.cos(ang*i), math.cos(ang*(n/4.0 - i))) for i in range((n + 3)//4)] + w[0] = (1, 0) + w += [(-math.cos(ang*(n/2 - i)), math.cos(ang*(i - n/4.0))) for + i in range((n + 3)//4, n//2)] + if inverse: + w = [(a[0], -a[1]) for a in w] h = 2 while h <= n: -- GitLab From ef2731f35c365489d34a9d38238b00221ff48967 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Mon, 23 Mar 2020 23:30:37 -0500 Subject: [PATCH 10/48] Add a test to check for FFT --- sumpy/symbolic.py | 2 +- sumpy/tools.py | 5 ++--- test/test_tools.py | 55 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 4 deletions(-) create mode 100644 test/test_tools.py diff --git a/sumpy/symbolic.py b/sumpy/symbolic.py index e0f22671..0fed239d 100644 --- a/sumpy/symbolic.py +++ b/sumpy/symbolic.py @@ -76,7 +76,7 @@ _find_symbolic_backend() # Before adding a function here, make sure it's present in both modules. SYMBOLIC_API = """ Add Basic Mul Pow exp sqrt log symbols sympify cos sin atan2 Function Symbol -Derivative Integer Matrix Subs I pi functions Number""".split() +Derivative Integer Matrix Subs I pi functions Number Float""".split() if USE_SYMENGINE: import symengine as sym diff --git a/sumpy/tools.py b/sumpy/tools.py index 84b45195..eebb9efb 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -851,8 +851,6 @@ def fft(seq, inverse=False): j = _binary_reverse(i, b) if i < j: a[i], a[j] = a[j], a[i] - ang = -2*math.pi/n if inverse else 2*math.pi/n - ang = 2*math.pi/n # Rewrite cosines and sines using cosines of angle in the first quadrant # This is to reduce duplicate of floating point numbers with 1 ULP difference @@ -861,9 +859,10 @@ def fft(seq, inverse=False): w[0] = (1, 0) w += [(-math.cos(ang*(n/2 - i)), math.cos(ang*(i - n/4.0))) for i in range((n + 3)//4, n//2)] + if n%4 == 0: + w[n//4] = (0, 1) if inverse: w = [(a[0], -a[1]) for a in w] - h = 2 while h <= n: hf, ut = h // 2, n // h diff --git a/test/test_tools.py b/test/test_tools.py new file mode 100644 index 00000000..eed6d304 --- /dev/null +++ b/test/test_tools.py @@ -0,0 +1,55 @@ +from __future__ import division, absolute_import, print_function + +__copyright__ = "Copyright (C) 2020 Isuru Fernando" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import logging +logger = logging.getLogger(__name__) + +import sumpy.symbolic as sym +from sumpy.tools import (fft_toeplitz_upper_triangular, + matvec_toeplitz_upper_triangular) +import numpy as np + +def test_fft(): + k = 5 + v = np.random.rand(k) + x = np.random.rand(k) + + fft = fft_toeplitz_upper_triangular(v, x) + matvec = matvec_toeplitz_upper_triangular(v, x) + + for i in range(k): + assert abs(fft[i] - matvec[i]) < 1e-15 + + +def test_fft_small_floats(): + k = 5 + v = sym.make_sym_vector("v", k) + x = sym.make_sym_vector("x", k) + + fft = fft_toeplitz_upper_triangular(v, x) + for expr in fft: + for f in expr.atoms(sym.Float): + if f == 0: + continue + assert abs(f) > 1e-10 -- GitLab From 5001688caa7cf6ec15accf88cb61b7d72347fda2 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Tue, 24 Mar 2020 00:37:04 -0500 Subject: [PATCH 11/48] Fix formatting --- sumpy/tools.py | 2 +- test/test_tools.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sumpy/tools.py b/sumpy/tools.py index eebb9efb..7f979526 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -859,7 +859,7 @@ def fft(seq, inverse=False): w[0] = (1, 0) w += [(-math.cos(ang*(n/2 - i)), math.cos(ang*(i - n/4.0))) for i in range((n + 3)//4, n//2)] - if n%4 == 0: + if n % 4 == 0: w[n//4] = (0, 1) if inverse: w = [(a[0], -a[1]) for a in w] diff --git a/test/test_tools.py b/test/test_tools.py index eed6d304..743caa14 100644 --- a/test/test_tools.py +++ b/test/test_tools.py @@ -30,6 +30,7 @@ from sumpy.tools import (fft_toeplitz_upper_triangular, matvec_toeplitz_upper_triangular) import numpy as np + def test_fft(): k = 5 v = np.random.rand(k) -- GitLab From 76ac6fc7b0254c4417678bdf5e3d3fc04fb3c9e9 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Tue, 24 Mar 2020 00:54:55 -0500 Subject: [PATCH 12/48] Python2 has no notion of local variables --- sumpy/tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sumpy/tools.py b/sumpy/tools.py index 7f979526..d6daeb45 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -862,7 +862,7 @@ def fft(seq, inverse=False): if n % 4 == 0: w[n//4] = (0, 1) if inverse: - w = [(a[0], -a[1]) for a in w] + w = [(x[0], -x[1]) for x in w] h = 2 while h <= n: hf, ut = h // 2, n // h -- GitLab From 166a20dd476cb3f440dfa217c56fa310ed9b202a Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Tue, 24 Mar 2020 21:26:20 -0500 Subject: [PATCH 13/48] Add a numeric test that M2L is toeplitz using simple translation --- test/test_kernels.py | 70 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/test/test_kernels.py b/test/test_kernels.py index 8225723c..a8af5827 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -45,6 +45,7 @@ from sumpy.expansion.local import ( from sumpy.kernel import (LaplaceKernel, HelmholtzKernel, AxisTargetDerivative, DirectionalSourceDerivative, BiharmonicKernel, StokesletKernel) from pytools.convergence import PConvergenceVerifier +import sumpy.symbolic as sym import logging logger = logging.getLogger(__name__) @@ -345,6 +346,75 @@ def test_p2e2p(ctx_getter, base_knl, expn_class, order, with_source_derivative): assert eoc_rec_pot.order_estimate() > tgt_order - slack assert eoc_rec_grad_x.order_estimate() > tgt_order_grad - grad_slack +# {{{ test toeplitz + +def _m2l_translate_simple(tgt_expansion, src_expansion, src_coeff_exprs, src_rscale, + dvec, tgt_rscale): + + if not tgt_expansion.use_rscale: + src_rscale = 1 + tgt_rscale = 1 + + from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansionBase + if not isinstance(src_expansion, VolumeTaylorMultipoleExpansionBase): + return 1 + + # We know the general form of the multipole expansion is: + # + # coeff0 * diff(kernel, mi0) + coeff1 * diff(kernel, mi1) + ... + # + # To get the local expansion coefficients, we take derivatives of + # the multipole expansion. + taker = src_expansion.get_kernel_derivative_taker(dvec) + + from sumpy.tools import add_mi + + result = [] + for deriv in tgt_expansion.get_coefficient_identifiers(): + local_result = [] + for coeff, term in zip( + src_coeff_exprs, + src_expansion.get_coefficient_identifiers()): + + kernel_deriv = ( + src_expansion.get_scaled_multipole( + taker.diff(add_mi(deriv, term)), + dvec, src_rscale, + nderivatives=sum(deriv) + sum(term), + nderivatives_for_scaling=sum(term))) + + local_result.append( + coeff * kernel_deriv * tgt_rscale**sum(deriv)) + result.append(sym.Add(*local_result)) + return result + + +def test_m2l_toeplitz(ctx_getter): + dim = 3 + knl = LaplaceKernel(dim) + local_expn_class = LaplaceConformingVolumeTaylorLocalExpansion + mpole_expn_class = LaplaceConformingVolumeTaylorMultipoleExpansion + + local_expn = local_expn_class(knl, order=5) + mpole_expn = mpole_expn_class(knl, order=5) + + dvec = sym.make_sym_vector("d", dim) + src_coeff_exprs = list(1 + np.random.randn(len(mpole_expn))) + src_rscale = 2.0 + tgt_rscale = 1.0 + + expected_output = _m2l_translate_simple(local_expn, mpole_expn, src_coeff_exprs, + src_rscale, dvec, tgt_rscale) + actual_output = local_expn.translate_from(mpole_expn, src_coeff_exprs, + src_rscale, dvec, tgt_rscale) + + replace_dict = dict((d, np.random.rand(1)[0]) for d in dvec) + for sym_a, sym_b in zip(expected_output, actual_output): + num_a = sym_a.xreplace(replace_dict) + num_b = sym_b.xreplace(replace_dict) + assert abs(num_a - num_b)/abs(num_a) < 1e-13 + +# }}} @pytest.mark.parametrize("knl, local_expn_class, mpole_expn_class", [ (LaplaceKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion), -- GitLab From 906abb36aeaaee96e936ff979299e8f893f9f57a Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Tue, 24 Mar 2020 22:43:35 -0500 Subject: [PATCH 14/48] Use 1e-10 for now --- test/test_kernels.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_kernels.py b/test/test_kernels.py index a8af5827..64c5b217 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -346,6 +346,7 @@ def test_p2e2p(ctx_getter, base_knl, expn_class, order, with_source_derivative): assert eoc_rec_pot.order_estimate() > tgt_order - slack assert eoc_rec_grad_x.order_estimate() > tgt_order_grad - grad_slack + # {{{ test toeplitz def _m2l_translate_simple(tgt_expansion, src_expansion, src_coeff_exprs, src_rscale, @@ -412,7 +413,8 @@ def test_m2l_toeplitz(ctx_getter): for sym_a, sym_b in zip(expected_output, actual_output): num_a = sym_a.xreplace(replace_dict) num_b = sym_b.xreplace(replace_dict) - assert abs(num_a - num_b)/abs(num_a) < 1e-13 + assert abs(num_a - num_b)/abs(num_a) < 1e-10 + # }}} -- GitLab From beec30c4212eac4812b2f99d4db2283ce2f0f98a Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Tue, 24 Mar 2020 23:45:18 -0500 Subject: [PATCH 15/48] Use _fft_uneval_expr --- sumpy/expansion/local.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index 025ce5fe..5aa98c92 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -170,7 +170,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): taker = src_expansion.get_kernel_derivative_taker(dvec) - from sumpy.tools import add_mi + from sumpy.tools import add_mi, _fft_uneval_expr from pytools import generate_nonnegative_integer_tuples_below as gnitb max_mi = [0]*self.dim @@ -243,7 +243,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # Filter out the dummy rows and scale them for target result = [] - rscale_ratio = sym.UnevaluatedExpr(tgt_rscale/src_rscale) + rscale_ratio = _fft_uneval_expr(tgt_rscale/src_rscale) for term in self.get_coefficient_identifiers(): index = toeplitz_matrix_ident_to_index[term] result.append(output[index]*rscale_ratio**sum(term)) -- GitLab From 51dd80425d0ffc3106fda51f760a527f85ff0d74 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Wed, 1 Apr 2020 15:15:39 -0500 Subject: [PATCH 16/48] kernel: add 3d Yukawa kernel --- sumpy/kernel.py | 24 ++++++++++++++++++++---- test/test_misc.py | 1 + 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/sumpy/kernel.py b/sumpy/kernel.py index 6383d8d5..53b8a550 100644 --- a/sumpy/kernel.py +++ b/sumpy/kernel.py @@ -559,14 +559,30 @@ class YukawaKernel(ExpressionKernel): """ lam = var(yukawa_lambda_name) - if dim == 2: - r = pymbolic_real_norm_2(make_sym_vector("d", dim)) + # NOTE: The Yukawa kernel is given by [1] + # -1/(2 pi)**(n/2) * (lam/r)**(n/2-1) * K(n/2-1, lam r) + # where K is a modified Bessel function of the second kind. + # + # [1] https://en.wikipedia.org/wiki/Green%27s_function + # [2] http://dlmf.nist.gov/10.27#E8 + # [3] https://dlmf.nist.gov/10.47#E2 + # [4] https://dlmf.nist.gov/10.49 - # http://dlmf.nist.gov/10.27#E8 + r = pymbolic_real_norm_2(make_sym_vector("d", dim)) + if dim == 2: + # NOTE: transform K(0, lam r) into a Hankel function using [2] expr = var("hankel_1")(0, var("I")*lam*r) - scaling_for_K0 = 1/2*var("pi")*var("I") # noqa: N806 + scaling_for_K0 = var("pi")/2*var("I") # noqa: N806 scaling = -1/(2*var("pi")) * scaling_for_K0 + elif dim == 3: + # NOTE: to get the expression, we do the following and simplify + # 1. express K(1/2, lam r) as a modified spherical Bessel function + # k(0, lam r) using [3] and use expression for k(0, lam r) from [4] + # 2. or use (AS 10.2.17) directly + expr = var("exp")(-lam*r) / r + + scaling = -1/(4 * var("pi")**2) else: raise RuntimeError("unsupported dimensionality") diff --git a/test/test_misc.py b/test/test_misc.py index a25b2e80..77f887c1 100644 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -68,6 +68,7 @@ class YukawaKernelInfo: BiharmonicKernelInfo(2), BiharmonicKernelInfo(3), YukawaKernelInfo(2, 5), + YukawaKernelInfo(3, 5), ]) def test_pde_check_kernels(ctx_factory, knl_info, order=5): dim = knl_info.kernel.dim -- GitLab From abddb9ee5c666499d616c4da65f5ce2e57a30fd0 Mon Sep 17 00:00:00 2001 From: "[6~" Date: Thu, 9 Apr 2020 16:11:41 -0500 Subject: [PATCH 17/48] Fix Github CI badge in README.rst --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 3d54af6a..63c0a5e2 100644 --- a/README.rst +++ b/README.rst @@ -4,9 +4,9 @@ sumpy: n-body kernels and translation operators .. image:: https://gitlab.tiker.net/inducer/sumpy/badges/master/pipeline.svg :alt: Gitlab Build Status :target: https://gitlab.tiker.net/inducer/sumpy/commits/master -.. image:: https://github.com/inducer/sumpy/workflows/CI/badge.svg?branch=master +.. image:: https://github.com/inducer/sumpy/workflows/CI/badge.svg?branch=master&event=push :alt: Github Build Status - :target: https://github.com/inducer/sumpy/actions?query=branch%3Amaster+workflow%3ACI + :target: https://github.com/inducer/sumpy/actions?query=branch%3Amaster+workflow%3ACI+event%3Apush .. image:: https://badge.fury.io/py/sumpy.png :alt: Python Package Index Release Page :target: https://pypi.org/project/sumpy/ -- GitLab From 91d838f69fada42f27c0350be87668e841e620b3 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Sun, 3 May 2020 15:56:24 -0500 Subject: [PATCH 18/48] Add a new derivative taker for Laplace --- sumpy/kernel.py | 99 ++++++++++++++++++++++++++++++++++++------------- sumpy/tools.py | 49 ++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 26 deletions(-) diff --git a/sumpy/kernel.py b/sumpy/kernel.py index 6383d8d5..f80e4b96 100644 --- a/sumpy/kernel.py +++ b/sumpy/kernel.py @@ -23,6 +23,7 @@ THE SOFTWARE. """ from six.moves import range, zip +from collections import defaultdict import loopy as lp import numpy as np @@ -271,6 +272,11 @@ class Kernel(object): The typical use of this function is to apply source-variable derivatives to the kernel. """ + expr_dict = {(0,)*self.dim: 1} + expr_dict = self.get_derivative_transformation_at_source(expr_dict, bvec) + result = 0 + for mi, coeff in expr_dict.items(): + result += coeff * expr.diff(mi) return expr def postprocess_at_target(self, expr, bvec): @@ -281,8 +287,37 @@ class Kernel(object): The typical use of this function is to apply target-variable derivatives to the kernel. """ + expr_dict = {(0,)*self.dim: 1} + expr_dict = self.get_derivative_transformation_at_target(expr_dict, bvec) + result = 0 + for mi, coeff in expr_dict.items(): + result += coeff * expr.diff(mi) return expr + def get_derivative_transformation_at_source(self, expr_dict): + r"""Get the derivative transformation of the expression at source + represented by the dictionary expr_dict which is mapping from multi-index + `mi` to coefficient `coeff`. + Expression represented by the dictionary `expr_dict` is + :math:`\sum_{mi} \frac{\partial^mi}{x^mi}G * coeff`. Returns an + expression of the same type. + + This function is meant to be overridden by child classes where necessary. + """ + return expr_dict + + def get_derivative_transformation_at_target(self, expr_dict): + r"""Get the derivative transformation of the expression at target + represented by the dictionary expr_dict which is mapping from multi-index + `mi` to coefficient `coeff`. + Expression represented by the dictionary `expr_dict` is + :math:`\sum_{mi} \frac{\partial^mi}{x^mi}G * coeff`. Returns an + expression of the same type. + + This function is meant to be overridden by child classes where necessary. + """ + return expr_dict + def get_global_scaling_const(self): r"""Return a global scaling constant of the kernel. Typically, this ensures that the kernel is scaled so that @@ -773,11 +808,11 @@ class KernelWrapper(Kernel): return self.inner_kernel.adjust_for_kernel_scaling( expr, rscale, nderivatives) - def postprocess_at_source(self, expr, avec): - return self.inner_kernel.postprocess_at_source(expr, avec) + def get_derivative_transformation_at_source(self, expr_dict): + return self.inner_kernel.get_derivative_transformation_at_source(expr_dict) - def postprocess_at_target(self, expr, avec): - return self.inner_kernel.postprocess_at_target(expr, avec) + def get_derivative_transformation_at_target(self, expr_dict): + return self.inner_kernel.get_derivative_transformation_at_target(expr_dict) def get_global_scaling_const(self): return self.inner_kernel.get_global_scaling_const() @@ -816,9 +851,15 @@ class AxisTargetDerivative(DerivativeBase): def __repr__(self): return "AxisTargetDerivative(%d, %r)" % (self.axis, self.inner_kernel) - def postprocess_at_target(self, expr, bvec): - expr = self.inner_kernel.postprocess_at_target(expr, bvec) - return expr.diff(bvec[self.axis]) + def get_derivative_transformation_at_target(self, expr_dict): + expr_dict = self.inner_kernel.get_derivative_transformation_at_target( + expr_dict) + result = dict() + for mi, coeff in expr_dict.items(): + new_mi = list(mi) + new_mi[self.axis] += 1 + result[tuple(new_mi)] = coeff + return result def replace_inner_kernel(self, new_inner_kernel): return type(self)(self.axis, new_inner_kernel) @@ -890,18 +931,21 @@ class DirectionalTargetDerivative(DirectionalDerivative): return transform - def postprocess_at_target(self, expr, bvec): - expr = self.inner_kernel.postprocess_at_target(expr, bvec) - - dim = len(bvec) - assert dim == self.dim - + def get_derivative_transformation_at_target(self, expr_dict): from sumpy.symbolic import make_sym_vector as make_sympy_vector - dir_vec = make_sympy_vector(self.dir_vec_name, dim) + dir_vec = make_sympy_vector(self.dir_vec_name, self.dim) + + expr_dict = self.inner_kernel.get_derivative_transformation_at_target( + expr_dict) - # bvec = tgt-center - return sum(dir_vec[axis]*expr.diff(bvec[axis]) - for axis in range(dim)) + # bvec = tgt - center + result = defaultdict(lambda: 0) + for mi, coeff in expr_dict.items(): + for axis in range(dim): + new_mi = list(mi) + new_mi[self.axis] += 1 + result[tuple(new_mi)] += coeff * dir_vec[axis] + return result def get_source_args(self): return [ @@ -932,18 +976,21 @@ class DirectionalSourceDerivative(DirectionalDerivative): return transform - def postprocess_at_source(self, expr, avec): - expr = self.inner_kernel.postprocess_at_source(expr, avec) - - dimensions = len(avec) - assert dimensions == self.dim - + def get_derivative_transformation_at_source(self, expr_dict): from sumpy.symbolic import make_sym_vector as make_sympy_vector - dir_vec = make_sympy_vector(self.dir_vec_name, dimensions) + dir_vec = make_sympy_vector(self.dir_vec_name, self.dim) + + expr_dict = self.inner_kernel.get_derivative_transformation_at_source( + expr_dict) # avec = center-src -> minus sign from chain rule - return sum(-dir_vec[axis]*expr.diff(avec[axis]) - for axis in range(dimensions)) + result = defaultdict(lambda: 0) + for mi, coeff in expr_dict.items(): + for axis in range(dim): + new_mi = list(mi) + new_mi[self.axis] += 1 + result[tuple(new_mi)] += -coeff * dir_vec[axis] + return result def get_source_args(self): return [ diff --git a/sumpy/tools.py b/sumpy/tools.py index d6daeb45..cadd69b5 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -140,6 +140,55 @@ class MiDerivativeTaker(object): if (np.array(mi) >= np.array(other_mi)).all()), key=lambda other_mi: sum(self.mi_dist(mi, other_mi))) + +class Laplace3DDerivativeTaker(MiDerivativeTaker): + + def __init__(self, expr, var_list): + super(Laplace3DDerivativeTaker, self).__init__(expr, var_list) + self.r = sym.sqrt(sum(v**2 for v in var_list)) + + def diff(self, mi): + # Return zero for negative values. Makes the algorithm readable. + if min(mi) < 0: + return 0 + try: + expr = self.cache_by_mi[mi] + except KeyError: + order = sum(mi) + if max(mi) == 1: + return MiDerivativeTaker.diff(self, mi) + d = -1: + for i in range(3): + if mi[i] >= 2: + d = i + break + assert d >= 0 + expr = 0 + for i in range(3): + mi_minus_one = tuple(mi) + mi_minus_one[i] -= 1 + mi_minus_two = tuple(mi) + mi_minus_two[i] -= 2 + if i == d: + expr -= (2*mi[i]-1)*var_list[i]*self.diff(mi_minus_one) + expr -= (mi[i]-1)**2*self.diff(mi_minus_two) + else: + expr -= (2*mi[i])*var_list[i]*self.diff(mi_minus_one) + expr -= mi[i]*(mi[i]-1)*self.diff(mi_minus_two) + expr /= self.r**2 + expr = sym.UnevaluatedExpr(expr) + self.cache_by_mi[mi] = expr + return expr + + +class MiDerivativeTakerWrapper(object): + def __init__(self, taker, initial_mi): + self.taker = taker + self.initial_mi = initial_mi + + def diff(self, mi): + return self.taker.diff(add_mi(mi, self.initial_mi)) + # }}} -- GitLab From bb5a2b5b54340797e2f4139dbc4d06baf2d8595a Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Sun, 3 May 2020 22:35:37 -0500 Subject: [PATCH 19/48] Use the derivative taker everywhere --- sumpy/e2p.py | 3 +- sumpy/expansion/local.py | 25 ++++++++++------- sumpy/expansion/multipole.py | 53 +++++++----------------------------- sumpy/kernel.py | 42 ++++++++++++++++++++++++---- sumpy/tools.py | 12 +++----- 5 files changed, 66 insertions(+), 69 deletions(-) diff --git a/sumpy/e2p.py b/sumpy/e2p.py index 7b0072ad..2dad9828 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -94,8 +94,7 @@ class E2PBase(KernelCacheWrapper): value = self.expansion.evaluate(coeff_exprs, bvec, rscale) result_names = [ - sac.assign_unique("result_%d_p" % i, - knl.postprocess_at_target(value, bvec)) + sac.assign_unique("result_%d_p" % i, value) for i, knl in enumerate(self.kernels) ] diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index 5aa98c92..369307db 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -120,14 +120,17 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): """ def coefficients_from_source(self, avec, bvec, rscale): - from sumpy.tools import MiDerivativeTaker - ppkernel = self.kernel.postprocess_at_source( - self.kernel.get_expression(avec), avec) + from sumpy.tools import MiDerivativeTakerWrapper + + result = [] + taker = self.kernel.get_derivative_taker(avec) + for mi in self.get_coefficient_identifiers(): + wrapper = MiDerivativeTakerWrapper(taker, mi) + mi_expr = self.kernel.postprocess_at_source(wrapper, avec) + expr = mi_expr * rscale ** sum(mi) + result.append(expr) - taker = MiDerivativeTaker(ppkernel, avec) - return [ - taker.diff(mi) * rscale ** sum(mi) - for mi in self.get_coefficient_identifiers()] + return result def evaluate(self, coeffs, bvec, rscale): evaluated_coeffs = ( @@ -137,13 +140,15 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): bvec = [b*rscale**-1 for b in bvec] from sumpy.tools import mi_power, mi_factorial - return sum( + result = sum( coeff * mi_power(bvec, mi, evaluate=False) / mi_factorial(mi) for coeff, mi in zip( evaluated_coeffs, self.get_full_coefficient_identifiers())) + return self.kernel.postprocess_at_target(result, bvec) + def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, dvec, tgt_rscale, use_fft=False): logger.info("building translation operator: %s(%d) -> %s(%d): start" @@ -168,7 +173,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # This code speeds up derivative taking by caching all kernel # derivatives. - taker = src_expansion.get_kernel_derivative_taker(dvec) + taker = src_expansion.kernel.get_derivative_taker(dvec) from sumpy.tools import add_mi, _fft_uneval_expr from pytools import generate_nonnegative_integer_tuples_below as gnitb @@ -198,7 +203,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # The vector has the kernel derivatives and depends only on the distance # between the two centers - taker = src_expansion.get_kernel_derivative_taker(dvec) + taker = src_expansion.kernel.get_derivative_taker(dvec) vector_stored = [] # Calculate the kernel derivatives for the compressed set for term in \ diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py index b7e2769b..0ebfe475 100644 --- a/sumpy/expansion/multipole.py +++ b/sumpy/expansion/multipole.py @@ -66,44 +66,12 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): rscale = 1 if isinstance(kernel, DirectionalSourceDerivative): - from sumpy.symbolic import make_sym_vector - - dir_vecs = [] - tmp_kernel = kernel - while isinstance(tmp_kernel, DirectionalSourceDerivative): - dir_vecs.append(make_sym_vector(tmp_kernel.dir_vec_name, kernel.dim)) - tmp_kernel = tmp_kernel.inner_kernel - - if kernel.get_base_kernel() is not tmp_kernel: - raise NotImplementedError("Unknown kernel wrapper.") - - nderivs = len(dir_vecs) - - coeff_identifiers = self.get_full_coefficient_identifiers() result = [0] * len(coeff_identifiers) - - for i, mi in enumerate(coeff_identifiers): - # One source derivative is the dot product of the gradient and - # directional vector. - # For multiple derivatives, gradient of gradients is taken. - # For eg: in 3D, 2 source derivatives gives 9 terms and - # cartesian_product below enumerates these 9 terms. - for deriv_terms in cartesian_product(*[range(kernel.dim)]*nderivs): - prod = 1 - derivative_mi = list(mi) - for nderivative, deriv_dim in enumerate(deriv_terms): - prod *= -derivative_mi[deriv_dim] - prod *= dir_vecs[nderivative][deriv_dim] - derivative_mi[deriv_dim] -= 1 - if any(v < 0 for v in derivative_mi): - continue - result[i] += mi_power(avec, derivative_mi) * prod - for i, mi in enumerate(coeff_identifiers): + result[i] = self.kernel.postprocess_at_source(mi_power(avec, mi)) result[i] /= (mi_factorial(mi) * rscale ** sum(mi)) else: avec = [sym.UnevaluatedExpr(a * rscale**-1) for a in avec] - result = [ mi_power(avec, mi) / mi_factorial(mi) for mi in self.get_full_coefficient_identifiers()] @@ -128,21 +96,20 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): return (rscale**nderivatives_for_scaling * expr) def evaluate(self, coeffs, bvec, rscale): + from sumpy.tools import MiDerivativeTakerWrapper if not self.use_rscale: rscale = 1 - taker = self.get_kernel_derivative_taker(bvec) - - result = sym.Add(*tuple( - coeff - * self.get_scaled_multipole(taker.diff(mi), bvec, rscale, sum(mi)) - for coeff, mi in zip(coeffs, self.get_coefficient_identifiers()))) + taker = self.kernel.get_derivative_taker(bvec) - return result + result = [] + for coeff, mi in zip(coeffs, self.get_coefficient_identifiers()): + mi_expr = self.kernel.postprocess_at_target( + MiDerivativeTakerWrapper(taker, mi), bvec) + expr = coeff * self.get_scaled_multipole(mi_expr, bvec, rscale, sum(mi)) + result.append(expr) - def get_kernel_derivative_taker(self, bvec): - from sumpy.tools import MiDerivativeTaker - return MiDerivativeTaker(self.kernel.get_expression(bvec), bvec) + return sym.Add(*tuple(result)) def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, dvec, tgt_rscale): diff --git a/sumpy/kernel.py b/sumpy/kernel.py index f80e4b96..7d436308 100644 --- a/sumpy/kernel.py +++ b/sumpy/kernel.py @@ -127,6 +127,7 @@ class Kernel(object): .. automethod:: adjust_for_kernel_scaling .. automethod:: postprocess_at_source .. automethod:: postprocess_at_target + .. automethod:: get_derivative_taker .. automethod:: get_global_scaling_const .. automethod:: get_args .. automethod:: get_source_args @@ -264,6 +265,21 @@ class Kernel(object): raise NotImplementedError + def _diff(self, expr, vec, mi): + """Take the derivative of an expression or a MiDerivativeTakerWrapper + """ + from sumpy.tools import MiDerivativeTakerWrapper, add_mi + if isinstance(expr, MiDerivativeTakerWrapper): + taker, init_mi = expr + return taker.diff(add_mi(mi, init_mi)) + else: + for i in range(self.dim): + if mi[i] == 0: + continue + expr = expr.diff(vec[i], mi[i]) + return expr + + def postprocess_at_source(self, expr, avec): """Transform a kernel evaluation or expansion expression in a place where the vector a (something - source) is known. ("something" may be @@ -273,11 +289,11 @@ class Kernel(object): derivatives to the kernel. """ expr_dict = {(0,)*self.dim: 1} - expr_dict = self.get_derivative_transformation_at_source(expr_dict, bvec) + expr_dict = self.get_derivative_transformation_at_source(expr_dict) result = 0 for mi, coeff in expr_dict.items(): - result += coeff * expr.diff(mi) - return expr + result += coeff * self._diff(expr, avec, mi) + return result def postprocess_at_target(self, expr, bvec): """Transform a kernel evaluation or expansion expression in a place @@ -288,11 +304,11 @@ class Kernel(object): derivatives to the kernel. """ expr_dict = {(0,)*self.dim: 1} - expr_dict = self.get_derivative_transformation_at_target(expr_dict, bvec) + expr_dict = self.get_derivative_transformation_at_target(expr_dict) result = 0 for mi, coeff in expr_dict.items(): - result += coeff * expr.diff(mi) - return expr + result += coeff * self._diff(expr, bvec, mi) + return result def get_derivative_transformation_at_source(self, expr_dict): r"""Get the derivative transformation of the expression at source @@ -341,6 +357,13 @@ class Kernel(object): """ return [] + def get_derivative_taker(self, dvec): + """Return a MiDerivativeTaker instance that supports taking the + derivatives of this kernel + """ + from sumpy.tools import MiDerivativeTaker + return MiDerivativeTaker(self.get_expression(dvec), dvec) + # }}} @@ -475,6 +498,13 @@ class LaplaceKernel(ExpressionKernel): def __repr__(self): return "LapKnl%dD" % self.dim + def get_derivative_taker(self, dvec): + from sumpy.tools import Laplace3DDerivativeTaker + if self.dim == 3: + return Laplace3DDerivativeTaker(self.get_expression(dvec), dvec) + else: + return MiDerivativeTaker(self.get_expression(dvec), dvec) + mapper_method = "map_laplace_kernel" diff --git a/sumpy/tools.py b/sumpy/tools.py index cadd69b5..6bb4ff2e 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -64,6 +64,7 @@ import numpy as np import sumpy.symbolic as sym import numbers import math +from collections import namedtuple import pyopencl as cl import pyopencl.array # noqa @@ -157,7 +158,7 @@ class Laplace3DDerivativeTaker(MiDerivativeTaker): order = sum(mi) if max(mi) == 1: return MiDerivativeTaker.diff(self, mi) - d = -1: + d = -1 for i in range(3): if mi[i] >= 2: d = i @@ -181,13 +182,8 @@ class Laplace3DDerivativeTaker(MiDerivativeTaker): return expr -class MiDerivativeTakerWrapper(object): - def __init__(self, taker, initial_mi): - self.taker = taker - self.initial_mi = initial_mi - - def diff(self, mi): - return self.taker.diff(add_mi(mi, self.initial_mi)) +MiDerivativeTakerWrapper = namedtuple('MiDerivativeTakerWrapper', + ['taker', 'initial_mi']) # }}} -- GitLab From 26cba1dd4b7afc3209d605a121a35bb1fd5dd8ff Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Sun, 3 May 2020 22:57:29 -0500 Subject: [PATCH 20/48] Get tests to pass --- sumpy/e2p.py | 10 +++++----- sumpy/expansion/local.py | 18 +++++++++++------- sumpy/expansion/multipole.py | 34 +++++++++++++++++++++++----------- sumpy/kernel.py | 11 +++++------ sumpy/symbolic.py | 4 ++-- sumpy/tools.py | 16 +++++++--------- test/test_kernels.py | 2 +- 7 files changed, 54 insertions(+), 41 deletions(-) diff --git a/sumpy/e2p.py b/sumpy/e2p.py index 2dad9828..fb5d323c 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -91,12 +91,12 @@ class E2PBase(KernelCacheWrapper): coeff_exprs = [sym.Symbol("coeff%d" % i) for i in range(len(self.expansion.get_coefficient_identifiers()))] - value = self.expansion.evaluate(coeff_exprs, bvec, rscale) - result_names = [ - sac.assign_unique("result_%d_p" % i, value) - for i, knl in enumerate(self.kernels) - ] + result_names = [] + for i, knl in enumerate(self.kernels): + value = self.expansion.evaluate(coeff_exprs, bvec, rscale, knl=knl) + name = sac.assign_unique("result_%d_p" % i, value) + result_names.append(name) sac.run_global_cse() diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index 369307db..5db686dc 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -102,7 +102,7 @@ class LineTaylorLocalExpansion(LocalExpansionBase): .subs("tau", 0) for i in self.get_coefficient_identifiers()] - def evaluate(self, coeffs, bvec, rscale): + def evaluate(self, coeffs, bvec, rscale, knl=None): # no point in heeding rscale here--just ignore it from pytools import factorial return sym.Add(*( @@ -132,22 +132,24 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): return result - def evaluate(self, coeffs, bvec, rscale): + def evaluate(self, coeffs, bvec, rscale, knl=None): evaluated_coeffs = ( self.expansion_terms_wrangler.get_full_kernel_derivatives_from_stored( coeffs, rscale)) - bvec = [b*rscale**-1 for b in bvec] + bvec_scaled = [b*rscale**-1 for b in bvec] from sumpy.tools import mi_power, mi_factorial result = sum( coeff - * mi_power(bvec, mi, evaluate=False) + * mi_power(bvec_scaled, mi, evaluate=False) / mi_factorial(mi) for coeff, mi in zip( evaluated_coeffs, self.get_full_coefficient_identifiers())) - return self.kernel.postprocess_at_target(result, bvec) + if knl is None: + knl = self.kernel + return knl.postprocess_at_target(result, bvec) def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, dvec, tgt_rscale, use_fft=False): @@ -404,9 +406,11 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): * sym.exp(sym.I * l * source_angle_rel_center), avec) for l in self.get_coefficient_identifiers()] - def evaluate(self, coeffs, bvec, rscale): + def evaluate(self, coeffs, bvec, rscale, knl=None): if not self.use_rscale: rscale = 1 + if knl is None: + knl = self.kernel from sumpy.symbolic import sym_real_norm_2 bessel_j = sym.Function("bessel_j") @@ -416,7 +420,7 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): arg_scale = self.get_bessel_arg_scaling() return sum(coeffs[self.get_storage_index(l)] - * self.kernel.postprocess_at_target( + * knl.postprocess_at_target( bessel_j(l, arg_scale * bvec_len) / rscale ** abs(l) * sym.exp(sym.I * l * -target_angle_rel_center), bvec) diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py index 0ebfe475..8186e460 100644 --- a/sumpy/expansion/multipole.py +++ b/sumpy/expansion/multipole.py @@ -30,7 +30,6 @@ from sumpy.expansion import ( ExpansionBase, VolumeTaylorExpansion, LaplaceConformingVolumeTaylorExpansion, HelmholtzConformingVolumeTaylorExpansion, BiharmonicConformingVolumeTaylorExpansion) -from pytools import cartesian_product import logging logger = logging.getLogger(__name__) @@ -65,16 +64,18 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): if not self.use_rscale: rscale = 1 + coeff_identifiers = self.get_full_coefficient_identifiers() if isinstance(kernel, DirectionalSourceDerivative): result = [0] * len(coeff_identifiers) for i, mi in enumerate(coeff_identifiers): - result[i] = self.kernel.postprocess_at_source(mi_power(avec, mi)) + result[i] = self.kernel.postprocess_at_source( + mi_power(avec, mi), avec) result[i] /= (mi_factorial(mi) * rscale ** sum(mi)) else: avec = [sym.UnevaluatedExpr(a * rscale**-1) for a in avec] result = [ mi_power(avec, mi) / mi_factorial(mi) - for mi in self.get_full_coefficient_identifiers()] + for mi in coeff_identifiers] return ( self.expansion_terms_wrangler.get_stored_mpole_coefficients_from_full( result, rscale)) @@ -95,21 +96,30 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): else: return (rscale**nderivatives_for_scaling * expr) - def evaluate(self, coeffs, bvec, rscale): + def evaluate(self, coeffs, bvec, rscale, knl=None): from sumpy.tools import MiDerivativeTakerWrapper + from pytools import single_valued if not self.use_rscale: rscale = 1 + if knl is None: + knl = self.kernel - taker = self.kernel.get_derivative_taker(bvec) + taker = knl.get_derivative_taker(bvec) + expr_dict = {(0,)*self.dim: 1} + expr_dict = knl.get_derivative_transformation_at_target(expr_dict) + pp_nderivatives = single_valued(sum(mi) for mi in expr_dict.keys()) result = [] for coeff, mi in zip(coeffs, self.get_coefficient_identifiers()): - mi_expr = self.kernel.postprocess_at_target( - MiDerivativeTakerWrapper(taker, mi), bvec) - expr = coeff * self.get_scaled_multipole(mi_expr, bvec, rscale, sum(mi)) + wrapper = MiDerivativeTakerWrapper(taker, mi) + mi_expr = knl.postprocess_at_target(wrapper, bvec) + expr = coeff * self.get_scaled_multipole(mi_expr, bvec, rscale, + sum(mi) + pp_nderivatives, sum(mi)) result.append(expr) - return sym.Add(*tuple(result)) + result = sym.Add(*tuple(result)) + #return knl.postprocess_at_target(result, bvec) + return result def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, dvec, tgt_rscale): @@ -334,9 +344,11 @@ class _HankelBased2DMultipoleExpansion(MultipoleExpansionBase): avec) for l in self.get_coefficient_identifiers()] - def evaluate(self, coeffs, bvec, rscale): + def evaluate(self, coeffs, bvec, rscale, knl=None): if not self.use_rscale: rscale = 1 + if knl is None: + knl = self.kernel from sumpy.symbolic import sym_real_norm_2 hankel_1 = sym.Function("hankel_1") @@ -346,7 +358,7 @@ class _HankelBased2DMultipoleExpansion(MultipoleExpansionBase): arg_scale = self.get_bessel_arg_scaling() return sum(coeffs[self.get_storage_index(l)] - * self.kernel.postprocess_at_target( + * knl.postprocess_at_target( hankel_1(l, arg_scale * bvec_len) * rscale ** abs(l) * sym.exp(sym.I * l * target_angle_rel_center), bvec) diff --git a/sumpy/kernel.py b/sumpy/kernel.py index 7d436308..e020841d 100644 --- a/sumpy/kernel.py +++ b/sumpy/kernel.py @@ -279,7 +279,6 @@ class Kernel(object): expr = expr.diff(vec[i], mi[i]) return expr - def postprocess_at_source(self, expr, avec): """Transform a kernel evaluation or expansion expression in a place where the vector a (something - source) is known. ("something" may be @@ -499,7 +498,7 @@ class LaplaceKernel(ExpressionKernel): return "LapKnl%dD" % self.dim def get_derivative_taker(self, dvec): - from sumpy.tools import Laplace3DDerivativeTaker + from sumpy.tools import Laplace3DDerivativeTaker, MiDerivativeTaker if self.dim == 3: return Laplace3DDerivativeTaker(self.get_expression(dvec), dvec) else: @@ -971,9 +970,9 @@ class DirectionalTargetDerivative(DirectionalDerivative): # bvec = tgt - center result = defaultdict(lambda: 0) for mi, coeff in expr_dict.items(): - for axis in range(dim): + for axis in range(self.dim): new_mi = list(mi) - new_mi[self.axis] += 1 + new_mi[axis] += 1 result[tuple(new_mi)] += coeff * dir_vec[axis] return result @@ -1016,9 +1015,9 @@ class DirectionalSourceDerivative(DirectionalDerivative): # avec = center-src -> minus sign from chain rule result = defaultdict(lambda: 0) for mi, coeff in expr_dict.items(): - for axis in range(dim): + for axis in range(self.dim): new_mi = list(mi) - new_mi[self.axis] += 1 + new_mi[axis] += 1 result[tuple(new_mi)] += -coeff * dir_vec[axis] return result diff --git a/sumpy/symbolic.py b/sumpy/symbolic.py index 0fed239d..b63fca68 100644 --- a/sumpy/symbolic.py +++ b/sumpy/symbolic.py @@ -82,11 +82,11 @@ if USE_SYMENGINE: import symengine as sym from pymbolic.interop.symengine import ( PymbolicToSymEngineMapper as PymbolicToSympyMapper, - SymEngineToPymbolicMapper as SympyToPymbolicMapper) + SymEngineToPymbolicMapper as SympyToPymbolicMapper, make_cse) else: import sympy as sym from pymbolic.interop.sympy import ( - PymbolicToSympyMapper, SympyToPymbolicMapper) + PymbolicToSympyMapper, SympyToPymbolicMapper, make_cse) for _apifunc in SYMBOLIC_API: globals()[_apifunc] = getattr(sym, _apifunc) diff --git a/sumpy/tools.py b/sumpy/tools.py index 6bb4ff2e..34936601 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -155,7 +155,6 @@ class Laplace3DDerivativeTaker(MiDerivativeTaker): try: expr = self.cache_by_mi[mi] except KeyError: - order = sum(mi) if max(mi) == 1: return MiDerivativeTaker.diff(self, mi) d = -1 @@ -166,20 +165,19 @@ class Laplace3DDerivativeTaker(MiDerivativeTaker): assert d >= 0 expr = 0 for i in range(3): - mi_minus_one = tuple(mi) + mi_minus_one = list(mi) mi_minus_one[i] -= 1 - mi_minus_two = tuple(mi) + mi_minus_two = list(mi) mi_minus_two[i] -= 2 if i == d: - expr -= (2*mi[i]-1)*var_list[i]*self.diff(mi_minus_one) - expr -= (mi[i]-1)**2*self.diff(mi_minus_two) + expr -= (2*mi[i]-1)*self.var_list[i]*self.diff(tuple(mi_minus_one)) + expr -= (mi[i]-1)**2*self.diff(tuple(mi_minus_two)) else: - expr -= (2*mi[i])*var_list[i]*self.diff(mi_minus_one) - expr -= mi[i]*(mi[i]-1)*self.diff(mi_minus_two) + expr -= (2*mi[i])*self.var_list[i]*self.diff(tuple(mi_minus_one)) + expr -= mi[i]*(mi[i]-1)*self.diff(tuple(mi_minus_two)) expr /= self.r**2 - expr = sym.UnevaluatedExpr(expr) self.cache_by_mi[mi] = expr - return expr + return expr MiDerivativeTakerWrapper = namedtuple('MiDerivativeTakerWrapper', diff --git a/test/test_kernels.py b/test/test_kernels.py index 64c5b217..a1d5fec4 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -366,7 +366,7 @@ def _m2l_translate_simple(tgt_expansion, src_expansion, src_coeff_exprs, src_rsc # # To get the local expansion coefficients, we take derivatives of # the multipole expansion. - taker = src_expansion.get_kernel_derivative_taker(dvec) + taker = src_expansion.kernel.get_derivative_taker(dvec) from sumpy.tools import add_mi -- GitLab From f3d294981f770bd34a25e3c3f6f80ea8443ae60c Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Mon, 4 May 2020 12:04:31 -0500 Subject: [PATCH 21/48] Make get_derivative_taker part of expansion and not kernel --- sumpy/expansion/__init__.py | 8 ++++++++ sumpy/expansion/local.py | 6 +++--- sumpy/expansion/multipole.py | 2 +- sumpy/kernel.py | 15 --------------- sumpy/symbolic.py | 4 ++-- sumpy/tools.py | 12 +++++++----- test/test_kernels.py | 2 +- 7 files changed, 22 insertions(+), 27 deletions(-) diff --git a/sumpy/expansion/__init__.py b/sumpy/expansion/__init__.py index 9d3a917a..9c6a84ae 100644 --- a/sumpy/expansion/__init__.py +++ b/sumpy/expansion/__init__.py @@ -57,6 +57,7 @@ class ExpansionBase(object): .. automethod:: translate_from .. automethod:: __eq__ .. automethod:: __ne__ + .. automethod:: get_kernel_derivative_taker """ def __init__(self, kernel, order, use_rscale=None): @@ -155,6 +156,13 @@ class ExpansionBase(object): def __ne__(self, other): return not self.__eq__(other) + def get_kernel_derivative_taker(self, dvec): + """Return a MiDerivativeTaker instance that supports taking + derivatives of the kernel with respect to dvec + """ + from sumpy.tools import MiDerivativeTaker + return MiDerivativeTaker(self.kernel.get_expression(dvec), dvec) + # }}} diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index 5db686dc..3879f50d 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -123,7 +123,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): from sumpy.tools import MiDerivativeTakerWrapper result = [] - taker = self.kernel.get_derivative_taker(avec) + taker = self.get_kernel_derivative_taker(avec) for mi in self.get_coefficient_identifiers(): wrapper = MiDerivativeTakerWrapper(taker, mi) mi_expr = self.kernel.postprocess_at_source(wrapper, avec) @@ -175,7 +175,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # This code speeds up derivative taking by caching all kernel # derivatives. - taker = src_expansion.kernel.get_derivative_taker(dvec) + taker = src_expansion.get_kernel_derivative_taker(dvec) from sumpy.tools import add_mi, _fft_uneval_expr from pytools import generate_nonnegative_integer_tuples_below as gnitb @@ -205,7 +205,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # The vector has the kernel derivatives and depends only on the distance # between the two centers - taker = src_expansion.kernel.get_derivative_taker(dvec) + taker = src_expansion.get_kernel_derivative_taker(dvec) vector_stored = [] # Calculate the kernel derivatives for the compressed set for term in \ diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py index 8186e460..c4b7f098 100644 --- a/sumpy/expansion/multipole.py +++ b/sumpy/expansion/multipole.py @@ -104,7 +104,7 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): if knl is None: knl = self.kernel - taker = knl.get_derivative_taker(bvec) + taker = self.get_kernel_derivative_taker(bvec) expr_dict = {(0,)*self.dim: 1} expr_dict = knl.get_derivative_transformation_at_target(expr_dict) pp_nderivatives = single_valued(sum(mi) for mi in expr_dict.keys()) diff --git a/sumpy/kernel.py b/sumpy/kernel.py index e020841d..e470e48d 100644 --- a/sumpy/kernel.py +++ b/sumpy/kernel.py @@ -127,7 +127,6 @@ class Kernel(object): .. automethod:: adjust_for_kernel_scaling .. automethod:: postprocess_at_source .. automethod:: postprocess_at_target - .. automethod:: get_derivative_taker .. automethod:: get_global_scaling_const .. automethod:: get_args .. automethod:: get_source_args @@ -356,13 +355,6 @@ class Kernel(object): """ return [] - def get_derivative_taker(self, dvec): - """Return a MiDerivativeTaker instance that supports taking the - derivatives of this kernel - """ - from sumpy.tools import MiDerivativeTaker - return MiDerivativeTaker(self.get_expression(dvec), dvec) - # }}} @@ -497,13 +489,6 @@ class LaplaceKernel(ExpressionKernel): def __repr__(self): return "LapKnl%dD" % self.dim - def get_derivative_taker(self, dvec): - from sumpy.tools import Laplace3DDerivativeTaker, MiDerivativeTaker - if self.dim == 3: - return Laplace3DDerivativeTaker(self.get_expression(dvec), dvec) - else: - return MiDerivativeTaker(self.get_expression(dvec), dvec) - mapper_method = "map_laplace_kernel" diff --git a/sumpy/symbolic.py b/sumpy/symbolic.py index b63fca68..0fed239d 100644 --- a/sumpy/symbolic.py +++ b/sumpy/symbolic.py @@ -82,11 +82,11 @@ if USE_SYMENGINE: import symengine as sym from pymbolic.interop.symengine import ( PymbolicToSymEngineMapper as PymbolicToSympyMapper, - SymEngineToPymbolicMapper as SympyToPymbolicMapper, make_cse) + SymEngineToPymbolicMapper as SympyToPymbolicMapper) else: import sympy as sym from pymbolic.interop.sympy import ( - PymbolicToSympyMapper, SympyToPymbolicMapper, make_cse) + PymbolicToSympyMapper, SympyToPymbolicMapper) for _apifunc in SYMBOLIC_API: globals()[_apifunc] = getattr(sym, _apifunc) diff --git a/sumpy/tools.py b/sumpy/tools.py index 34936601..c2b9b132 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -142,10 +142,10 @@ class MiDerivativeTaker(object): key=lambda other_mi: sum(self.mi_dist(mi, other_mi))) -class Laplace3DDerivativeTaker(MiDerivativeTaker): +class LaplaceDerivativeTaker(MiDerivativeTaker): def __init__(self, expr, var_list): - super(Laplace3DDerivativeTaker, self).__init__(expr, var_list) + super(LaplaceDerivativeTaker, self).__init__(expr, var_list) self.r = sym.sqrt(sum(v**2 for v in var_list)) def diff(self, mi): @@ -155,7 +155,7 @@ class Laplace3DDerivativeTaker(MiDerivativeTaker): try: expr = self.cache_by_mi[mi] except KeyError: - if max(mi) == 1: + if max(mi) == 1 or len(self.var_list) == 2: return MiDerivativeTaker.diff(self, mi) d = -1 for i in range(3): @@ -170,10 +170,12 @@ class Laplace3DDerivativeTaker(MiDerivativeTaker): mi_minus_two = list(mi) mi_minus_two[i] -= 2 if i == d: - expr -= (2*mi[i]-1)*self.var_list[i]*self.diff(tuple(mi_minus_one)) + expr -= (2*mi[i]-1)*self.var_list[i]*self.diff( + tuple(mi_minus_one)) expr -= (mi[i]-1)**2*self.diff(tuple(mi_minus_two)) else: - expr -= (2*mi[i])*self.var_list[i]*self.diff(tuple(mi_minus_one)) + expr -= (2*mi[i])*self.var_list[i]*self.diff( + tuple(mi_minus_one)) expr -= mi[i]*(mi[i]-1)*self.diff(tuple(mi_minus_two)) expr /= self.r**2 self.cache_by_mi[mi] = expr diff --git a/test/test_kernels.py b/test/test_kernels.py index a1d5fec4..64c5b217 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -366,7 +366,7 @@ def _m2l_translate_simple(tgt_expansion, src_expansion, src_coeff_exprs, src_rsc # # To get the local expansion coefficients, we take derivatives of # the multipole expansion. - taker = src_expansion.kernel.get_derivative_taker(dvec) + taker = src_expansion.get_kernel_derivative_taker(dvec) from sumpy.tools import add_mi -- GitLab From e96bc8707f52320d4127c2d65e55b8a0dfdb9c24 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Mon, 4 May 2020 16:52:43 -0500 Subject: [PATCH 22/48] Add support Laplace 2D --- sumpy/tools.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/sumpy/tools.py b/sumpy/tools.py index c2b9b132..c2845ef0 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -155,28 +155,33 @@ class LaplaceDerivativeTaker(MiDerivativeTaker): try: expr = self.cache_by_mi[mi] except KeyError: - if max(mi) == 1 or len(self.var_list) == 2: + if max(mi) == 1: return MiDerivativeTaker.diff(self, mi) d = -1 - for i in range(3): + dim = len(self.var_list) + for i in range(dim): if mi[i] >= 2: d = i break assert d >= 0 expr = 0 - for i in range(3): + for i in range(dim): mi_minus_one = list(mi) mi_minus_one[i] -= 1 mi_minus_two = list(mi) mi_minus_two[i] -= 2 + x = self.var_list[i] + n = mi[i] if i == d: - expr -= (2*mi[i]-1)*self.var_list[i]*self.diff( - tuple(mi_minus_one)) - expr -= (mi[i]-1)**2*self.diff(tuple(mi_minus_two)) + if dim == 3: + expr -= (2*n-1)*x*self.diff(tuple(mi_minus_one)) + expr -= (n-1)**2*self.diff(tuple(mi_minus_two)) + else: + expr -= 2*x*(n-1)*self.diff(tuple(mi_minus_one)) + expr -= (n-1)*(n-2)*self.diff(tuple(mi_minus_two)) else: - expr -= (2*mi[i])*self.var_list[i]*self.diff( - tuple(mi_minus_one)) - expr -= mi[i]*(mi[i]-1)*self.diff(tuple(mi_minus_two)) + expr -= 2*n*x*self.diff(tuple(mi_minus_one)) + expr -= n*(n-1)*self.diff(tuple(mi_minus_two)) expr /= self.r**2 self.cache_by_mi[mi] = expr return expr -- GitLab From 3fdaa5f8adff932f0c60c4a3b374b7d59a5f181d Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Tue, 5 May 2020 23:04:06 -0500 Subject: [PATCH 23/48] Use new derivativetaker with Laplace --- sumpy/expansion/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sumpy/expansion/__init__.py b/sumpy/expansion/__init__.py index 9c6a84ae..a826baca 100644 --- a/sumpy/expansion/__init__.py +++ b/sumpy/expansion/__init__.py @@ -643,6 +643,10 @@ class LaplaceConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): def __init__(self, kernel, order, use_rscale): self.expansion_terms_wrangler_key = (order, kernel.dim) + def get_kernel_derivative_taker(self, dvec): + from sumpy.tools import LaplaceDerivativeTaker + return LaplaceDerivativeTaker(self.kernel.get_expression(dvec), dvec) + class HelmholtzConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): -- GitLab From e4f6379774d942418eee03926c035b9c8d1bf2a8 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Wed, 6 May 2020 00:00:18 -0500 Subject: [PATCH 24/48] Pass around a sac --- benchmarks/bench_translations.py | 4 ++-- sumpy/cse.py | 1 - sumpy/e2e.py | 2 +- sumpy/e2p.py | 2 +- sumpy/expansion/__init__.py | 11 ++++++++--- sumpy/expansion/local.py | 18 +++++++++--------- sumpy/expansion/multipole.py | 10 +++++----- sumpy/p2e.py | 2 +- sumpy/qbx.py | 4 ++-- test/test_codegen.py | 2 +- test/test_kernels.py | 2 +- 11 files changed, 31 insertions(+), 27 deletions(-) diff --git a/benchmarks/bench_translations.py b/benchmarks/bench_translations.py index 621ac275..6e87370f 100644 --- a/benchmarks/bench_translations.py +++ b/benchmarks/bench_translations.py @@ -65,9 +65,9 @@ class TranslationBenchmarkSuite: dvec = sym.make_sym_vector("d", knl.dim) src_rscale = sym.Symbol("src_rscale") tgt_rscale = sym.Symbol("tgt_rscale") - result = l_expn.translate_from(m_expn, src_coeff_exprs, src_rscale, - dvec, tgt_rscale) sac = SymbolicAssignmentCollection() + result = l_expn.translate_from(m_expn, src_coeff_exprs, src_rscale, + dvec, tgt_rscale, sac) for i, expr in enumerate(result): sac.assign_unique("coeff%d" % i, expr) sac.run_global_cse() diff --git a/sumpy/cse.py b/sumpy/cse.py index 823ef546..476eb147 100644 --- a/sumpy/cse.py +++ b/sumpy/cse.py @@ -145,7 +145,6 @@ class FuncArgTracker(object): for func_i, func in enumerate(funcs): func_argset = set() - for func_arg in func.args: arg_number = self.get_or_add_value_number(func_arg) func_argset.add(arg_number) diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 12e80b5f..99f75e81 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -111,7 +111,7 @@ class E2EBase(KernelCacheWrapper): for i, coeff_i in enumerate( self.tgt_expansion.translate_from( self.src_expansion, src_coeff_exprs, src_rscale, - dvec, tgt_rscale))] + dvec, tgt_rscale, sac))] sac.run_global_cse() diff --git a/sumpy/e2p.py b/sumpy/e2p.py index fb5d323c..41e6ea3e 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -94,7 +94,7 @@ class E2PBase(KernelCacheWrapper): result_names = [] for i, knl in enumerate(self.kernels): - value = self.expansion.evaluate(coeff_exprs, bvec, rscale, knl=knl) + value = self.expansion.evaluate(coeff_exprs, bvec, rscale, sac, knl=knl) name = sac.assign_unique("result_%d_p" % i, value) result_names.append(name) diff --git a/sumpy/expansion/__init__.py b/sumpy/expansion/__init__.py index a826baca..a8f78cfe 100644 --- a/sumpy/expansion/__init__.py +++ b/sumpy/expansion/__init__.py @@ -115,20 +115,25 @@ class ExpansionBase(object): """ raise NotImplementedError - def coefficients_from_source(self, avec, bvec, rscale): + def coefficients_from_source(self, avec, bvec, rscale, sac): """Form an expansion from a source point. :arg avec: vector from source to center. :arg bvec: vector from center to target. Not usually necessary, except for line-Taylor expansion. + :arg sac: A SymbolicAssignmentCollction used for storing + temporary expressions. :returns: a list of :mod:`sympy` expressions representing the coefficients of the expansion. """ raise NotImplementedError - def evaluate(self, coeffs, bvec, rscale): + def evaluate(self, coeffs, bvec, rscale, sac): """ + :arg sac: A SymbolicAssignmentCollction used for storing + temporary expressions. + :return: a :mod:`sympy` expression corresponding to the evaluated expansion with the coefficients in *coeffs*. @@ -137,7 +142,7 @@ class ExpansionBase(object): raise NotImplementedError def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, - dvec, tgt_rscale): + dvec, tgt_rscale, sac): raise NotImplementedError def update_persistent_hash(self, key_hash, key_builder): diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index 3879f50d..c52d0016 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -61,7 +61,7 @@ class LineTaylorLocalExpansion(LocalExpansionBase): def get_coefficient_identifiers(self): return list(range(self.order+1)) - def coefficients_from_source(self, avec, bvec, rscale): + def coefficients_from_source(self, avec, bvec, rscale, sac): # no point in heeding rscale here--just ignore it if bvec is None: raise RuntimeError("cannot use line-Taylor expansions in a setting " @@ -102,7 +102,7 @@ class LineTaylorLocalExpansion(LocalExpansionBase): .subs("tau", 0) for i in self.get_coefficient_identifiers()] - def evaluate(self, coeffs, bvec, rscale, knl=None): + def evaluate(self, coeffs, bvec, rscale, sac, knl=None): # no point in heeding rscale here--just ignore it from pytools import factorial return sym.Add(*( @@ -119,7 +119,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): Coefficients represent derivative values of the kernel. """ - def coefficients_from_source(self, avec, bvec, rscale): + def coefficients_from_source(self, avec, bvec, rscale, sac): from sumpy.tools import MiDerivativeTakerWrapper result = [] @@ -132,7 +132,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): return result - def evaluate(self, coeffs, bvec, rscale, knl=None): + def evaluate(self, coeffs, bvec, rscale, sac, knl=None): evaluated_coeffs = ( self.expansion_terms_wrangler.get_full_kernel_derivatives_from_stored( coeffs, rscale)) @@ -152,7 +152,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): return knl.postprocess_at_target(result, bvec) def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, - dvec, tgt_rscale, use_fft=False): + dvec, tgt_rscale, sac, use_fft=False): logger.info("building translation operator: %s(%d) -> %s(%d): start" % (type(src_expansion).__name__, src_expansion.order, @@ -326,7 +326,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # the end in the hope of helping rscale magnitude. dvec_scaled = [d*src_rscale for d in dvec] expr = src_expansion.evaluate(src_coeff_exprs, dvec_scaled, - rscale=src_rscale) + rscale=src_rscale, sac=sac) replace_dict = dict((d, d/src_rscale) for d in dvec) taker = MiDerivativeTaker(expr, dvec) rscale_ratio = sym.UnevaluatedExpr(tgt_rscale/src_rscale) @@ -388,7 +388,7 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): def get_coefficient_identifiers(self): return list(range(-self.order, self.order+1)) - def coefficients_from_source(self, avec, bvec, rscale): + def coefficients_from_source(self, avec, bvec, rscale, sac): if not self.use_rscale: rscale = 1 @@ -406,7 +406,7 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): * sym.exp(sym.I * l * source_angle_rel_center), avec) for l in self.get_coefficient_identifiers()] - def evaluate(self, coeffs, bvec, rscale, knl=None): + def evaluate(self, coeffs, bvec, rscale, sac, knl=None): if not self.use_rscale: rscale = 1 if knl is None: @@ -427,7 +427,7 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): for l in self.get_coefficient_identifiers()) def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, - dvec, tgt_rscale): + dvec, tgt_rscale, sac): from sumpy.symbolic import sym_real_norm_2 if not self.use_rscale: diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py index c4b7f098..c1514357 100644 --- a/sumpy/expansion/multipole.py +++ b/sumpy/expansion/multipole.py @@ -55,7 +55,7 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): Coefficients represent the terms in front of the kernel derivatives. """ - def coefficients_from_source(self, avec, bvec, rscale): + def coefficients_from_source(self, avec, bvec, rscale, sac): from sumpy.kernel import DirectionalSourceDerivative kernel = self.kernel @@ -96,7 +96,7 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): else: return (rscale**nderivatives_for_scaling * expr) - def evaluate(self, coeffs, bvec, rscale, knl=None): + def evaluate(self, coeffs, bvec, rscale, sac, knl=None): from sumpy.tools import MiDerivativeTakerWrapper from pytools import single_valued if not self.use_rscale: @@ -122,7 +122,7 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): return result def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, - dvec, tgt_rscale): + dvec, tgt_rscale, sac): if not isinstance(src_expansion, type(self)): raise RuntimeError("do not know how to translate %s to " "Taylor multipole expansion" @@ -324,7 +324,7 @@ class _HankelBased2DMultipoleExpansion(MultipoleExpansionBase): def get_coefficient_identifiers(self): return list(range(-self.order, self.order+1)) - def coefficients_from_source(self, avec, bvec, rscale): + def coefficients_from_source(self, avec, bvec, rscale, sac): if not self.use_rscale: rscale = 1 @@ -344,7 +344,7 @@ class _HankelBased2DMultipoleExpansion(MultipoleExpansionBase): avec) for l in self.get_coefficient_identifiers()] - def evaluate(self, coeffs, bvec, rscale, knl=None): + def evaluate(self, coeffs, bvec, rscale, sac, knl=None): if not self.use_rscale: rscale = 1 if knl is None: diff --git a/sumpy/p2e.py b/sumpy/p2e.py index 2aa64804..f5472fc4 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -88,7 +88,7 @@ class P2EBase(KernelCacheWrapper): coeff_names = [ sac.assign_unique("coeff%d" % i, coeff_i) for i, coeff_i in enumerate( - self.expansion.coefficients_from_source(avec, None, rscale))] + self.expansion.coefficients_from_source(avec, None, rscale, sac))] sac.run_global_cse() diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 9708764c..e6704ba8 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -68,7 +68,7 @@ def stringify_expn_index(i): def expand(expansion_nr, sac, expansion, avec, bvec): rscale = sym.Symbol("rscale") - coefficients = expansion.coefficients_from_source(avec, bvec, rscale) + coefficients = expansion.coefficients_from_source(avec, bvec, rscale, sac) assigned_coeffs = [ sym.Symbol( @@ -78,7 +78,7 @@ def expand(expansion_nr, sac, expansion, avec, bvec): for i in expansion.get_coefficient_identifiers()] return sac.assign_unique("expn%d_result" % expansion_nr, - expansion.evaluate(assigned_coeffs, bvec, rscale)) + expansion.evaluate(assigned_coeffs, bvec, rscale, sac)) # {{{ layer potential computation diff --git a/test/test_codegen.py b/test/test_codegen.py index 7e3c25e0..02884eba 100644 --- a/test/test_codegen.py +++ b/test/test_codegen.py @@ -91,7 +91,7 @@ def test_line_taylor_coeff_growth(): expn = LineTaylorLocalExpansion(LaplaceKernel(2), order) avec = make_sym_vector("a", 2) bvec = make_sym_vector("b", 2) - coeffs = expn.coefficients_from_source(avec, bvec, rscale=1) + coeffs = expn.coefficients_from_source(avec, bvec, rscale=1, None) sym2pymbolic = SympyToPymbolicMapper() coeffs_pymbolic = [sym2pymbolic(c) for c in coeffs] diff --git a/test/test_kernels.py b/test/test_kernels.py index 64c5b217..c52427c4 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -407,7 +407,7 @@ def test_m2l_toeplitz(ctx_getter): expected_output = _m2l_translate_simple(local_expn, mpole_expn, src_coeff_exprs, src_rscale, dvec, tgt_rscale) actual_output = local_expn.translate_from(mpole_expn, src_coeff_exprs, - src_rscale, dvec, tgt_rscale) + src_rscale, dvec, tgt_rscale, sac=None) replace_dict = dict((d, np.random.rand(1)[0]) for d in dvec) for sym_a, sym_b in zip(expected_output, actual_output): -- GitLab From 0c3b5cf045a0df459ab26101ef47530630e6cc23 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Wed, 6 May 2020 00:34:03 -0500 Subject: [PATCH 25/48] Fix Laplace 2D mi=(2,0) case --- sumpy/tools.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sumpy/tools.py b/sumpy/tools.py index c2845ef0..88c5aea2 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -155,10 +155,10 @@ class LaplaceDerivativeTaker(MiDerivativeTaker): try: expr = self.cache_by_mi[mi] except KeyError: + dim = len(self.var_list) if max(mi) == 1: return MiDerivativeTaker.diff(self, mi) d = -1 - dim = len(self.var_list) for i in range(dim): if mi[i] >= 2: d = i @@ -179,6 +179,8 @@ class LaplaceDerivativeTaker(MiDerivativeTaker): else: expr -= 2*x*(n-1)*self.diff(tuple(mi_minus_one)) expr -= (n-1)*(n-2)*self.diff(tuple(mi_minus_two)) + if n == 2 and sum(mi) == 2: + expr += 1 else: expr -= 2*n*x*self.diff(tuple(mi_minus_one)) expr -= n*(n-1)*self.diff(tuple(mi_minus_two)) -- GitLab From cc684398eb16190d46d6045ed7b949a2f5851368 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Wed, 6 May 2020 16:42:15 -0500 Subject: [PATCH 26/48] Move efficient scaling to the deriv taker --- sumpy/expansion/__init__.py | 8 +-- sumpy/expansion/local.py | 27 ++++++---- sumpy/expansion/multipole.py | 25 ++-------- sumpy/kernel.py | 97 ------------------------------------ sumpy/tools.py | 83 +++++++++++++++++++++++++++--- test/test_codegen.py | 2 +- test/test_kernels.py | 9 +--- 7 files changed, 103 insertions(+), 148 deletions(-) diff --git a/sumpy/expansion/__init__.py b/sumpy/expansion/__init__.py index a8f78cfe..5551537a 100644 --- a/sumpy/expansion/__init__.py +++ b/sumpy/expansion/__init__.py @@ -161,12 +161,12 @@ class ExpansionBase(object): def __ne__(self, other): return not self.__eq__(other) - def get_kernel_derivative_taker(self, dvec): + def get_kernel_derivative_taker(self, dvec, rscale=1): """Return a MiDerivativeTaker instance that supports taking derivatives of the kernel with respect to dvec """ from sumpy.tools import MiDerivativeTaker - return MiDerivativeTaker(self.kernel.get_expression(dvec), dvec) + return MiDerivativeTaker(self.kernel.get_expression(dvec), dvec, rscale) # }}} @@ -648,9 +648,9 @@ class LaplaceConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): def __init__(self, kernel, order, use_rscale): self.expansion_terms_wrangler_key = (order, kernel.dim) - def get_kernel_derivative_taker(self, dvec): + def get_kernel_derivative_taker(self, dvec, rscale=1): from sumpy.tools import LaplaceDerivativeTaker - return LaplaceDerivativeTaker(self.kernel.get_expression(dvec), dvec) + return LaplaceDerivativeTaker(self.kernel.get_expression(dvec), dvec, rscale) class HelmholtzConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index c52d0016..032516ef 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -24,6 +24,7 @@ THE SOFTWARE. from six.moves import range, zip import sumpy.symbolic as sym +from pytools import single_valued from sumpy.expansion import ( ExpansionBase, VolumeTaylorExpansion, LaplaceConformingVolumeTaylorExpansion, @@ -123,11 +124,22 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): from sumpy.tools import MiDerivativeTakerWrapper result = [] - taker = self.get_kernel_derivative_taker(avec) + taker = self.get_kernel_derivative_taker(avec, rscale) + expr_dict = {(0,)*self.dim: 1} + expr_dict = self.kernel.get_derivative_transformation_at_source(expr_dict) + pp_nderivatives = single_valued(sum(mi) for mi in expr_dict.keys()) + for mi in self.get_coefficient_identifiers(): wrapper = MiDerivativeTakerWrapper(taker, mi) mi_expr = self.kernel.postprocess_at_source(wrapper, avec) - expr = mi_expr * rscale ** sum(mi) + # By passing `rscale` to the derivative taker we are taking a scaled + # version of the derivative which is `expr.diff(mi)*rscale**sum(mi)` + # which might be implemented efficiently for kernels like Laplace. + # One caveat is that `postprocess_at_source` might take more + # derivatives which would multiply the expression by more `rscale`s + # than necessary as the derivative taker does not know about + # `postprocess_at_source`. This is corrected by dividing by `rscale`. + expr = mi_expr / rscale ** pp_nderivatives result.append(expr) return result @@ -175,8 +187,6 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # This code speeds up derivative taking by caching all kernel # derivatives. - taker = src_expansion.get_kernel_derivative_taker(dvec) - from sumpy.tools import add_mi, _fft_uneval_expr from pytools import generate_nonnegative_integer_tuples_below as gnitb @@ -205,17 +215,12 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # The vector has the kernel derivatives and depends only on the distance # between the two centers - taker = src_expansion.get_kernel_derivative_taker(dvec) + taker = src_expansion.get_kernel_derivative_taker(dvec, src_rscale) vector_stored = [] # Calculate the kernel derivatives for the compressed set for term in \ srcplusderiv_terms_wrangler.get_coefficient_identifiers(): - kernel_deriv = src_expansion.get_scaled_multipole( - taker.diff(term), - dvec, src_rscale, - nderivatives=sum(term), - nderivatives_for_scaling=sum(term), - ) + kernel_deriv = taker.diff(term) vector_stored.append(kernel_deriv) # Calculate the kernel derivatives for the full set vector_full = \ diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py index c1514357..de9aa500 100644 --- a/sumpy/expansion/multipole.py +++ b/sumpy/expansion/multipole.py @@ -80,22 +80,6 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): self.expansion_terms_wrangler.get_stored_mpole_coefficients_from_full( result, rscale)) - def get_scaled_multipole(self, expr, bvec, rscale, nderivatives, - nderivatives_for_scaling=None): - if nderivatives_for_scaling is None: - nderivatives_for_scaling = nderivatives - - if self.kernel.has_efficient_scale_adjustment: - return ( - self.kernel.adjust_for_kernel_scaling( - vector_xreplace( - expr, - bvec, bvec * rscale**-1), - rscale, nderivatives) - / rscale ** (nderivatives - nderivatives_for_scaling)) - else: - return (rscale**nderivatives_for_scaling * expr) - def evaluate(self, coeffs, bvec, rscale, sac, knl=None): from sumpy.tools import MiDerivativeTakerWrapper from pytools import single_valued @@ -104,7 +88,7 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): if knl is None: knl = self.kernel - taker = self.get_kernel_derivative_taker(bvec) + taker = self.get_kernel_derivative_taker(bvec, rscale) expr_dict = {(0,)*self.dim: 1} expr_dict = knl.get_derivative_transformation_at_target(expr_dict) pp_nderivatives = single_valued(sum(mi) for mi in expr_dict.keys()) @@ -113,8 +97,9 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): for coeff, mi in zip(coeffs, self.get_coefficient_identifiers()): wrapper = MiDerivativeTakerWrapper(taker, mi) mi_expr = knl.postprocess_at_target(wrapper, bvec) - expr = coeff * self.get_scaled_multipole(mi_expr, bvec, rscale, - sum(mi) + pp_nderivatives, sum(mi)) + # For details about this correction, see the explanation at + # VolumeTaylorLocalExpansionBase.coefficients_from_source + expr = coeff * mi_expr / rscale**pp_nderivatives result.append(expr) result = sym.Add(*tuple(result)) @@ -365,7 +350,7 @@ class _HankelBased2DMultipoleExpansion(MultipoleExpansionBase): for l in self.get_coefficient_identifiers()) def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, - dvec, tgt_rscale): + dvec, tgt_rscale, sac): if not isinstance(src_expansion, type(self)): raise RuntimeError("do not know how to translate %s to %s" % (type(src_expansion).__name__, diff --git a/sumpy/kernel.py b/sumpy/kernel.py index e470e48d..092cbb49 100644 --- a/sumpy/kernel.py +++ b/sumpy/kernel.py @@ -123,8 +123,6 @@ class Kernel(object): .. automethod:: prepare_loopy_kernel .. automethod:: get_code_transformer .. automethod:: get_expression - .. attribute:: has_efficient_scale_adjustment - .. automethod:: adjust_for_kernel_scaling .. automethod:: postprocess_at_source .. automethod:: postprocess_at_target .. automethod:: get_global_scaling_const @@ -193,77 +191,6 @@ class Kernel(object): r"""Return a :mod:`sympy` expression for the kernel.""" raise NotImplementedError - has_efficient_scale_adjustment = False - - def adjust_for_kernel_scaling(self, expr, rscale, nderivatives): - r""" - Consider a Taylor multipole expansion: - - .. math:: - - f (x - y) = \sum_{i = 0}^{\infty} (\partial_y^i f) (x - y) \big|_{y = c} - \frac{(y - c)^i}{i!} . - - Now suppose we would like to use a scaled version :math:`g` of the - kernel :math:`f`: - - .. math:: - - \begin{eqnarray*} - f (x) & = & g (x / \alpha),\\ - f^{(i)} (x) & = & \frac{1}{\alpha^i} g^{(i)} (x / \alpha) . - \end{eqnarray*} - - where :math:`\alpha` is chosen to be on a length scale similar to - :math:`x` (for example by choosing :math:`\alpha` proporitional to the - size of the box for which the expansion is intended) so that :math:`x / - \alpha` is roughly of unit magnitude, to avoid arithmetic issues with - small arguments. This yields - - .. math:: - - f (x - y) = \sum_{i = 0}^{\infty} (\partial_y^i g) - \left( \frac{x - y}{\alpha} \right) \Bigg|_{y = c} - \cdot - \frac{(y - c)^i}{\alpha^i \cdot i!}. - - Observe that the :math:`(y - c)` term is now scaled to unit magnitude, - as is the argument of :math:`g`. - - With :math:`\xi = x / \alpha`, we find - - .. math:: - - \begin{eqnarray*} - g (\xi) & = & f (\alpha \xi),\\ - g^{(i)} (\xi) & = & \alpha^i f^{(i)} (\alpha \xi) . - \end{eqnarray*} - - Generically for all kernels, :math:`f^{(i)} (\alpha \xi)` is computable - by taking a sufficient number of symbolic derivatives of :math:`f` and - providing :math:`\alpha \xi = x` as the argument. - - Now, for some kernels, like :math:`f (x) = C \log x`, the powers of - :math:`\alpha^i` from the chain rule cancel with the ones from the - argument substituted into the kernel derivative: - - .. math:: - - g^{(i)} (\xi) = \alpha^i f^{(i)} (\alpha \xi) = C' \cdot \alpha^i \cdot - \frac{1}{(\alpha x)^i} \quad (i > 0), - - making them what you might call *scale-invariant*. In this case, one - may set :attr:`has_efficient_scale_adjustment`. For these kernels only, - :meth:`adjust_for_kernel_scaling` provides a shortcut for scaled kernel - derivative computation. Given :math:`f^{(i)}` as *expr*, it directly - returns an expression for :math:`g^{(i)}`, where :math:`i` is given - as *nderivatives*. - - :arg rscale: The scaling parameter :math:`\alpha` above. - """ - - raise NotImplementedError - def _diff(self, expr, vec, mi): """Take the derivative of an expression or a MiDerivativeTakerWrapper """ @@ -467,22 +394,6 @@ class LaplaceKernel(ExpressionKernel): global_scaling_const=scaling, is_complex_valued=False) - has_efficient_scale_adjustment = True - - def adjust_for_kernel_scaling(self, expr, rscale, nderivatives): - if self.dim == 2: - if nderivatives == 0: - import sumpy.symbolic as sp - return (expr + sp.log(rscale)) - else: - return expr - - elif self.dim == 3: - return expr / rscale - - else: - raise NotImplementedError("unsupported dimensionality") - def __getinitargs__(self): return (self.dim,) @@ -814,14 +725,6 @@ class KernelWrapper(Kernel): def get_expression(self, scaled_dist_vec): return self.inner_kernel.get_expression(scaled_dist_vec) - @property - def has_efficient_scale_adjustment(self): - return self.inner_kernel.has_efficient_scale_adjustment - - def adjust_for_kernel_scaling(self, expr, rscale, nderivatives): - return self.inner_kernel.adjust_for_kernel_scaling( - expr, rscale, nderivatives) - def get_derivative_transformation_at_source(self, expr_dict): return self.inner_kernel.get_derivative_transformation_at_source(expr_dict) diff --git a/sumpy/tools.py b/sumpy/tools.py index 88c5aea2..4cc529ed 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -104,11 +104,77 @@ def mi_power(vector, mi, evaluate=True): class MiDerivativeTaker(object): - def __init__(self, expr, var_list): + def __init__(self, expr, var_list, rscale=1): + r""" + A class to take scaled derivatives of the symbolic expression + expr w.r.t. variables var_list and the scaling parameter rscale. + + Consider a Taylor multipole expansion: + + .. math:: + + f (x - y) = \sum_{i = 0}^{\infty} (\partial_y^i f) (x - y) \big|_{y = c} + \frac{(y - c)^i}{i!} . + + Now suppose we would like to use a scaled version :math:`g` of the + kernel :math:`f`: + + .. math:: + + \begin{eqnarray*} + f (x) & = & g (x / \alpha),\\ + f^{(i)} (x) & = & \frac{1}{\alpha^i} g^{(i)} (x / \alpha) . + \end{eqnarray*} + + where :math:`\alpha` is chosen to be on a length scale similar to + :math:`x` (for example by choosing :math:`\alpha` proporitional to the + size of the box for which the expansion is intended) so that :math:`x / + \alpha` is roughly of unit magnitude, to avoid arithmetic issues with + small arguments. This yields + + .. math:: + + f (x - y) = \sum_{i = 0}^{\infty} (\partial_y^i g) + \left( \frac{x - y}{\alpha} \right) \Bigg|_{y = c} + \cdot + \frac{(y - c)^i}{\alpha^i \cdot i!}. + + Observe that the :math:`(y - c)` term is now scaled to unit magnitude, + as is the argument of :math:`g`. + + With :math:`\xi = x / \alpha`, we find + + .. math:: + + \begin{eqnarray*} + g (\xi) & = & f (\alpha \xi),\\ + g^{(i)} (\xi) & = & \alpha^i f^{(i)} (\alpha \xi) . + \end{eqnarray*} + + Generically for all kernels, :math:`f^{(i)} (\alpha \xi)` is computable + by taking a sufficient number of symbolic derivatives of :math:`f` and + providing :math:`\alpha \xi = x` as the argument. + + Now, for some kernels, like :math:`f (x) = C \log x`, the powers of + :math:`\alpha^i` from the chain rule cancel with the ones from the + argument substituted into the kernel derivatives: + + .. math:: + + g^{(i)} (\xi) = \alpha^i f^{(i)} (\alpha \xi) = C' \cdot \alpha^i \cdot + \frac{1}{(\alpha x)^i} \quad (i > 0), + + making them what you might call *scale-invariant*. + + This derivative taker returns :math:`g^{(i)}(\xi) = \alpha^i f^{(i)}` + given :math:`f^{(0)}` as *expr* and :math:`\alpha` as *rscale*. + """ + assert isinstance(expr, sym.Basic) self.var_list = var_list empty_mi = (0,) * len(var_list) self.cache_by_mi = {empty_mi: expr} + self.rscale = rscale def mi_dist(self, a, b): return np.array(a, dtype=int) - np.array(b, dtype=int) @@ -122,7 +188,7 @@ class MiDerivativeTaker(object): for next_deriv, next_mi in self.get_derivative_taking_sequence( current_mi, mi): - expr = expr.diff(next_deriv) + expr = expr.diff(next_deriv) * self.rscale self.cache_by_mi[next_mi] = expr return expr @@ -144,9 +210,10 @@ class MiDerivativeTaker(object): class LaplaceDerivativeTaker(MiDerivativeTaker): - def __init__(self, expr, var_list): - super(LaplaceDerivativeTaker, self).__init__(expr, var_list) + def __init__(self, expr, var_list, rscale=1): + super(LaplaceDerivativeTaker, self).__init__(expr, var_list, rscale) self.r = sym.sqrt(sum(v**2 for v in var_list)) + self.scaled_r = sym.sqrt(sum((v/rscale)**2 for v in var_list)) def diff(self, mi): # Return zero for negative values. Makes the algorithm readable. @@ -174,17 +241,17 @@ class LaplaceDerivativeTaker(MiDerivativeTaker): n = mi[i] if i == d: if dim == 3: - expr -= (2*n-1)*x*self.diff(tuple(mi_minus_one)) + expr -= (2*n-1)*(x/self.rscale)*self.diff(tuple(mi_minus_one)) expr -= (n-1)**2*self.diff(tuple(mi_minus_two)) else: - expr -= 2*x*(n-1)*self.diff(tuple(mi_minus_one)) + expr -= 2*(x/self.rscale)*(n-1)*self.diff(tuple(mi_minus_one)) expr -= (n-1)*(n-2)*self.diff(tuple(mi_minus_two)) if n == 2 and sum(mi) == 2: expr += 1 else: - expr -= 2*n*x*self.diff(tuple(mi_minus_one)) + expr -= 2*n*(x/self.rscale)*self.diff(tuple(mi_minus_one)) expr -= n*(n-1)*self.diff(tuple(mi_minus_two)) - expr /= self.r**2 + expr /= self.scaled_r**2 self.cache_by_mi[mi] = expr return expr diff --git a/test/test_codegen.py b/test/test_codegen.py index 02884eba..fa155927 100644 --- a/test/test_codegen.py +++ b/test/test_codegen.py @@ -91,7 +91,7 @@ def test_line_taylor_coeff_growth(): expn = LineTaylorLocalExpansion(LaplaceKernel(2), order) avec = make_sym_vector("a", 2) bvec = make_sym_vector("b", 2) - coeffs = expn.coefficients_from_source(avec, bvec, rscale=1, None) + coeffs = expn.coefficients_from_source(avec, bvec, rscale=1, sac=None) sym2pymbolic = SympyToPymbolicMapper() coeffs_pymbolic = [sym2pymbolic(c) for c in coeffs] diff --git a/test/test_kernels.py b/test/test_kernels.py index c52427c4..f37e51c6 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -366,7 +366,7 @@ def _m2l_translate_simple(tgt_expansion, src_expansion, src_coeff_exprs, src_rsc # # To get the local expansion coefficients, we take derivatives of # the multipole expansion. - taker = src_expansion.get_kernel_derivative_taker(dvec) + taker = src_expansion.get_kernel_derivative_taker(dvec, src_rscale) from sumpy.tools import add_mi @@ -377,12 +377,7 @@ def _m2l_translate_simple(tgt_expansion, src_expansion, src_coeff_exprs, src_rsc src_coeff_exprs, src_expansion.get_coefficient_identifiers()): - kernel_deriv = ( - src_expansion.get_scaled_multipole( - taker.diff(add_mi(deriv, term)), - dvec, src_rscale, - nderivatives=sum(deriv) + sum(term), - nderivatives_for_scaling=sum(term))) + kernel_deriv = taker.diff(add_mi(deriv, term)) / src_rscale**sum(deriv) local_result.append( coeff * kernel_deriv * tgt_rscale**sum(deriv)) -- GitLab From 6189de9d76e779b0590a14d8c038652cfef0ffe8 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Thu, 7 May 2020 18:24:57 -0500 Subject: [PATCH 27/48] Use the sac --- sumpy/expansion/__init__.py | 10 ++++++---- sumpy/expansion/local.py | 4 ++-- sumpy/expansion/multipole.py | 3 +-- sumpy/tools.py | 35 +++++++++++++++++++++++------------ test/test_kernels.py | 2 +- 5 files changed, 33 insertions(+), 21 deletions(-) diff --git a/sumpy/expansion/__init__.py b/sumpy/expansion/__init__.py index 5551537a..71af7eba 100644 --- a/sumpy/expansion/__init__.py +++ b/sumpy/expansion/__init__.py @@ -161,12 +161,13 @@ class ExpansionBase(object): def __ne__(self, other): return not self.__eq__(other) - def get_kernel_derivative_taker(self, dvec, rscale=1): + def get_kernel_derivative_taker(self, dvec, rscale, sac): """Return a MiDerivativeTaker instance that supports taking derivatives of the kernel with respect to dvec """ from sumpy.tools import MiDerivativeTaker - return MiDerivativeTaker(self.kernel.get_expression(dvec), dvec, rscale) + return MiDerivativeTaker(self.kernel.get_expression(dvec), dvec, rscale, + sac) # }}} @@ -648,9 +649,10 @@ class LaplaceConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): def __init__(self, kernel, order, use_rscale): self.expansion_terms_wrangler_key = (order, kernel.dim) - def get_kernel_derivative_taker(self, dvec, rscale=1): + def get_kernel_derivative_taker(self, dvec, rscale, sac): from sumpy.tools import LaplaceDerivativeTaker - return LaplaceDerivativeTaker(self.kernel.get_expression(dvec), dvec, rscale) + return LaplaceDerivativeTaker(self.kernel.get_expression(dvec), dvec, + rscale, sac) class HelmholtzConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index 032516ef..c2947c69 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -124,7 +124,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): from sumpy.tools import MiDerivativeTakerWrapper result = [] - taker = self.get_kernel_derivative_taker(avec, rscale) + taker = self.get_kernel_derivative_taker(avec, rscale, sac) expr_dict = {(0,)*self.dim: 1} expr_dict = self.kernel.get_derivative_transformation_at_source(expr_dict) pp_nderivatives = single_valued(sum(mi) for mi in expr_dict.keys()) @@ -215,7 +215,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # The vector has the kernel derivatives and depends only on the distance # between the two centers - taker = src_expansion.get_kernel_derivative_taker(dvec, src_rscale) + taker = src_expansion.get_kernel_derivative_taker(dvec, src_rscale, sac) vector_stored = [] # Calculate the kernel derivatives for the compressed set for term in \ diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py index de9aa500..fe8d30a1 100644 --- a/sumpy/expansion/multipole.py +++ b/sumpy/expansion/multipole.py @@ -25,7 +25,6 @@ THE SOFTWARE. from six.moves import range, zip import sumpy.symbolic as sym # noqa -from sumpy.symbolic import vector_xreplace from sumpy.expansion import ( ExpansionBase, VolumeTaylorExpansion, LaplaceConformingVolumeTaylorExpansion, HelmholtzConformingVolumeTaylorExpansion, @@ -88,7 +87,7 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): if knl is None: knl = self.kernel - taker = self.get_kernel_derivative_taker(bvec, rscale) + taker = self.get_kernel_derivative_taker(bvec, rscale, sac) expr_dict = {(0,)*self.dim: 1} expr_dict = knl.get_derivative_transformation_at_target(expr_dict) pp_nderivatives = single_valued(sum(mi) for mi in expr_dict.keys()) diff --git a/sumpy/tools.py b/sumpy/tools.py index 4cc529ed..993ae91e 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -104,7 +104,7 @@ def mi_power(vector, mi, evaluate=True): class MiDerivativeTaker(object): - def __init__(self, expr, var_list, rscale=1): + def __init__(self, expr, var_list, rscale=1, sac=None): r""" A class to take scaled derivatives of the symbolic expression expr w.r.t. variables var_list and the scaling parameter rscale. @@ -175,6 +175,7 @@ class MiDerivativeTaker(object): empty_mi = (0,) * len(var_list) self.cache_by_mi = {empty_mi: expr} self.rscale = rscale + self.sac = sac def mi_dist(self, a, b): return np.array(a, dtype=int) - np.array(b, dtype=int) @@ -207,13 +208,21 @@ class MiDerivativeTaker(object): if (np.array(mi) >= np.array(other_mi)).all()), key=lambda other_mi: sum(self.mi_dist(mi, other_mi))) + def add_to_sac(self, expr): + import sumpy.symbolic as sym + if self.sac is not None: + return sym.Symbol(self.sac.assign_unique("temp", expr)) + else: + return expr + class LaplaceDerivativeTaker(MiDerivativeTaker): - def __init__(self, expr, var_list, rscale=1): - super(LaplaceDerivativeTaker, self).__init__(expr, var_list, rscale) - self.r = sym.sqrt(sum(v**2 for v in var_list)) - self.scaled_r = sym.sqrt(sum((v/rscale)**2 for v in var_list)) + def __init__(self, expr, var_list, rscale=1, sac=None): + super(LaplaceDerivativeTaker, self).__init__(expr, var_list, rscale, sac) + self.scaled_var_list = [self.add_to_sac(v/rscale) for v in var_list] + self.scaled_r = self.add_to_sac( + sym.sqrt(sum(v**2 for v in self.scaled_var_list))) def diff(self, mi): # Return zero for negative values. Makes the algorithm readable. @@ -235,22 +244,24 @@ class LaplaceDerivativeTaker(MiDerivativeTaker): for i in range(dim): mi_minus_one = list(mi) mi_minus_one[i] -= 1 + mi_minus_one = tuple(mi_minus_one) mi_minus_two = list(mi) mi_minus_two[i] -= 2 - x = self.var_list[i] + mi_minus_two = tuple(mi_minus_two) + x = self.scaled_var_list[i] n = mi[i] if i == d: if dim == 3: - expr -= (2*n-1)*(x/self.rscale)*self.diff(tuple(mi_minus_one)) - expr -= (n-1)**2*self.diff(tuple(mi_minus_two)) + expr -= (2*n - 1) * x * self.diff(mi_minus_one) + expr -= (n - 1)**2 * self.diff(mi_minus_two) else: - expr -= 2*(x/self.rscale)*(n-1)*self.diff(tuple(mi_minus_one)) - expr -= (n-1)*(n-2)*self.diff(tuple(mi_minus_two)) + expr -= 2 * x * (n - 1) * self.diff(mi_minus_one) + expr -= (n - 1) * (n - 2) * self.diff(mi_minus_two) if n == 2 and sum(mi) == 2: expr += 1 else: - expr -= 2*n*(x/self.rscale)*self.diff(tuple(mi_minus_one)) - expr -= n*(n-1)*self.diff(tuple(mi_minus_two)) + expr -= 2 * n * x * self.diff(mi_minus_one) + expr -= n * (n - 1) * self.diff(mi_minus_two) expr /= self.scaled_r**2 self.cache_by_mi[mi] = expr return expr diff --git a/test/test_kernels.py b/test/test_kernels.py index f37e51c6..4e445888 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -366,7 +366,7 @@ def _m2l_translate_simple(tgt_expansion, src_expansion, src_coeff_exprs, src_rsc # # To get the local expansion coefficients, we take derivatives of # the multipole expansion. - taker = src_expansion.get_kernel_derivative_taker(dvec, src_rscale) + taker = src_expansion.get_kernel_derivative_taker(dvec, src_rscale, sac=None) from sumpy.tools import add_mi -- GitLab From bcf5310de4c96d80e57263439eaa6fe11890a5db Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Thu, 7 May 2020 22:17:27 -0500 Subject: [PATCH 28/48] Use the sac more --- sumpy/expansion/local.py | 12 ++++++++---- sumpy/tools.py | 25 +++++++++++++------------ 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index c2947c69..f6cce8fe 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -32,7 +32,7 @@ from sumpy.expansion import ( BiharmonicConformingVolumeTaylorExpansion) from sumpy.tools import (matvec_toeplitz_upper_triangular, - fft_toeplitz_upper_triangular) + fft_toeplitz_upper_triangular, add_to_sac) class LocalExpansionBase(ExpansionBase): @@ -238,14 +238,16 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): vector = [0]*len(toeplitz_matrix_coeffs) for i, term in enumerate(toeplitz_matrix_coeffs): if term in srcplusderiv_ident_to_index: - vector[i] = vector_full[srcplusderiv_ident_to_index[term]] + vector[i] = add_to_sac(sac, + vector_full[srcplusderiv_ident_to_index[term]]) # Calculate the first row of the upper triangular Toeplitz matrix toeplitz_first_row = [0] * len(toeplitz_matrix_coeffs) for coeff, term in zip( src_coeff_exprs, src_expansion.get_coefficient_identifiers()): - toeplitz_first_row[toeplitz_matrix_ident_to_index[term]] = coeff + toeplitz_first_row[toeplitz_matrix_ident_to_index[term]] = \ + add_to_sac(sac, coeff) # Do the matvec if use_fft: @@ -263,7 +265,9 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): logger.info("building translation operator: done") return result - rscale_ratio = sym.UnevaluatedExpr(tgt_rscale/src_rscale) + rscale_ratio = tgt_rscale/src_rscale + if sac is not None: + rscale_ratio = sym.Symbol(sac.assign_unique("temp"), rscale_ratio) from sumpy.tools import MiDerivativeTaker from math import factorial diff --git a/sumpy/tools.py b/sumpy/tools.py index 993ae91e..26b3f62e 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -102,6 +102,14 @@ def mi_power(vector, mi, evaluate=True): return result +def add_to_sac(sac, expr): + import sumpy.symbolic as sym + if sac is not None: + return sym.Symbol(sac.assign_unique("temp", expr)) + else: + return expr + + class MiDerivativeTaker(object): def __init__(self, expr, var_list, rscale=1, sac=None): @@ -208,20 +216,13 @@ class MiDerivativeTaker(object): if (np.array(mi) >= np.array(other_mi)).all()), key=lambda other_mi: sum(self.mi_dist(mi, other_mi))) - def add_to_sac(self, expr): - import sumpy.symbolic as sym - if self.sac is not None: - return sym.Symbol(self.sac.assign_unique("temp", expr)) - else: - return expr - class LaplaceDerivativeTaker(MiDerivativeTaker): def __init__(self, expr, var_list, rscale=1, sac=None): super(LaplaceDerivativeTaker, self).__init__(expr, var_list, rscale, sac) - self.scaled_var_list = [self.add_to_sac(v/rscale) for v in var_list] - self.scaled_r = self.add_to_sac( + self.scaled_var_list = [add_to_sac(self.sac, v/rscale) for v in var_list] + self.scaled_r = add_to_sac(self.sac, sym.sqrt(sum(v**2 for v in self.scaled_var_list))) def diff(self, mi): @@ -263,7 +264,7 @@ class LaplaceDerivativeTaker(MiDerivativeTaker): expr -= 2 * n * x * self.diff(mi_minus_one) expr -= n * (n - 1) * self.diff(mi_minus_two) expr /= self.scaled_r**2 - self.cache_by_mi[mi] = expr + self.cache_by_mi[mi] = add_to_sac(self.sac, expr) return expr @@ -1034,8 +1035,8 @@ def matvec_toeplitz_upper_triangular(first_row, vector): assert len(vector) == n output = [0]*n for row in range(n): - for col in range(row, n): - output[row] += first_row[col-row]*vector[col] + terms = tuple(first_row[col-row]*vector[col] for col in range(row, n)) + output[row] = sym.Add(*terms) return output # }}} -- GitLab From 44376fa2e02389c2663f151f09a7968d7fd0a7a1 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Thu, 7 May 2020 22:39:17 -0500 Subject: [PATCH 29/48] Fix typo --- sumpy/expansion/local.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index f6cce8fe..21743f45 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -267,7 +267,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): rscale_ratio = tgt_rscale/src_rscale if sac is not None: - rscale_ratio = sym.Symbol(sac.assign_unique("temp"), rscale_ratio) + rscale_ratio = sym.Symbol(sac.assign_unique("temp", rscale_ratio)) from sumpy.tools import MiDerivativeTaker from math import factorial -- GitLab From 4e0fed62453ccb15cf1b40eaeade2970c00d0358 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Thu, 7 May 2020 23:27:14 -0500 Subject: [PATCH 30/48] Fix benchmark --- benchmarks/bench_translations.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/benchmarks/bench_translations.py b/benchmarks/bench_translations.py index 6e87370f..8a9ef0da 100644 --- a/benchmarks/bench_translations.py +++ b/benchmarks/bench_translations.py @@ -66,8 +66,14 @@ class TranslationBenchmarkSuite: src_rscale = sym.Symbol("src_rscale") tgt_rscale = sym.Symbol("tgt_rscale") sac = SymbolicAssignmentCollection() - result = l_expn.translate_from(m_expn, src_coeff_exprs, src_rscale, + try: + result = l_expn.translate_from(m_expn, src_coeff_exprs, src_rscale, dvec, tgt_rscale, sac) + except TypeError: + # Support older interface to make it possible to compare + # in CI run + result = l_expn.translate_from(m_expn, src_coeff_exprs, src_rscale, + dvec, tgt_rscale) for i, expr in enumerate(result): sac.assign_unique("coeff%d" % i, expr) sac.run_global_cse() -- GitLab From e1bef318de1fe320d58ccab74aa19e3d5471d555 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Fri, 8 May 2020 08:12:09 -0500 Subject: [PATCH 31/48] Increase the timeout a bit --- benchmarks/bench_translations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/bench_translations.py b/benchmarks/bench_translations.py index 8a9ef0da..1ebc0d88 100644 --- a/benchmarks/bench_translations.py +++ b/benchmarks/bench_translations.py @@ -83,7 +83,7 @@ class TranslationBenchmarkSuite: return sum([counter.rec(insn.expression)+1 for insn in insns]) track_m2l_op_count.unit = "ops" - track_m2l_op_count.timeout = 200.0 + track_m2l_op_count.timeout = 300.0 class LaplaceVolumeTaylorTranslation(TranslationBenchmarkSuite): -- GitLab From a0907c0f39dd18ed726fed62bb4ad078959820f3 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Thu, 14 May 2020 22:58:32 -0500 Subject: [PATCH 32/48] Add RadialDerivativeTaker --- sumpy/tools.py | 138 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 103 insertions(+), 35 deletions(-) diff --git a/sumpy/tools.py b/sumpy/tools.py index 26b3f62e..1dd7c24e 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -184,21 +184,24 @@ class MiDerivativeTaker(object): self.cache_by_mi = {empty_mi: expr} self.rscale = rscale self.sac = sac + self.dim = len(self.var_list) def mi_dist(self, a, b): return np.array(a, dtype=int) - np.array(b, dtype=int) def diff(self, mi): try: - expr = self.cache_by_mi[mi] + return self.cache_by_mi[mi] except KeyError: - current_mi = self.get_closest_cached_mi(mi) - expr = self.cache_by_mi[current_mi] + pass - for next_deriv, next_mi in self.get_derivative_taking_sequence( - current_mi, mi): - expr = expr.diff(next_deriv) * self.rscale - self.cache_by_mi[next_mi] = expr + current_mi = self.get_closest_cached_mi(mi) + expr = self.cache_by_mi[current_mi] + + for next_deriv, next_mi in self.get_derivative_taking_sequence( + current_mi, mi): + expr = expr.diff(next_deriv) * self.rscale + self.cache_by_mi[next_mi] = expr return expr @@ -230,41 +233,106 @@ class LaplaceDerivativeTaker(MiDerivativeTaker): if min(mi) < 0: return 0 try: - expr = self.cache_by_mi[mi] + return self.cache_by_mi[mi] except KeyError: - dim = len(self.var_list) - if max(mi) == 1: - return MiDerivativeTaker.diff(self, mi) - d = -1 - for i in range(dim): - if mi[i] >= 2: - d = i - break - assert d >= 0 - expr = 0 - for i in range(dim): + pass + + dim = self.dim + if max(mi) == 1: + return MiDerivativeTaker.diff(self, mi) + d = -1 + for i in range(dim): + if mi[i] >= 2: + d = i + break + assert d >= 0 + expr = 0 + for i in range(dim): + mi_minus_one = list(mi) + mi_minus_one[i] -= 1 + mi_minus_one = tuple(mi_minus_one) + mi_minus_two = list(mi) + mi_minus_two[i] -= 2 + mi_minus_two = tuple(mi_minus_two) + x = self.scaled_var_list[i] + n = mi[i] + if i == d: + if dim == 3: + expr -= (2*n - 1) * x * self.diff(mi_minus_one) + expr -= (n - 1)**2 * self.diff(mi_minus_two) + else: + expr -= 2 * x * (n - 1) * self.diff(mi_minus_one) + expr -= (n - 1) * (n - 2) * self.diff(mi_minus_two) + if n == 2 and sum(mi) == 2: + expr += 1 + else: + expr -= 2 * n * x * self.diff(mi_minus_one) + expr -= n * (n - 1) * self.diff(mi_minus_two) + expr /= self.scaled_r**2 + expr = add_to_sac(self.sac, expr) + self.cache_by_mi[mi] = expr + return expr + + +class RadialDerivativeTaker(MiDerivativeTaker): + + def __init__(self, expr, var_list, rscale=1, sac=None): + """ + Takes the derivatives of a radial function. + """ + import sumpy.symbolic as sym + super(RadialDerivativeTaker, self).__init__(expr, var_list, rscale, sac) + empty_mi = (0,) * len(var_list) + self.cache_by_mi_q = {(empty_mi, 0): expr} + self.r = sym.sqrt(sum(v**2 for v in var_list)) + + def diff(self, mi, q=0): + """ + See [1] for the algorithm + + [1]: Tausch, J., 2003. The fast multipole method for arbitrary Green's + functions. Contemporary Mathematics, 329, pp.307-314. + """ + try: + return self.cache_by_mi_q[(mi, q)] + except KeyError: + pass + + for i in range(self.dim): + if mi[i] == 1: + mi_minus_one = list(mi) + mi_minus_one[i] = 0 + mi_minus_one = tuple(mi_minus_one) + expr = self.var_list[i] * self.diff(mi_minus_one, q=q+1) + expr *= self.rscale + self.cache_by_mi_q[(mi, q)] = expr + return expr + + for i in range(self.dim): + if mi[i] >= 2: mi_minus_one = list(mi) mi_minus_one[i] -= 1 mi_minus_one = tuple(mi_minus_one) mi_minus_two = list(mi) mi_minus_two[i] -= 2 mi_minus_two = tuple(mi_minus_two) - x = self.scaled_var_list[i] - n = mi[i] - if i == d: - if dim == 3: - expr -= (2*n - 1) * x * self.diff(mi_minus_one) - expr -= (n - 1)**2 * self.diff(mi_minus_two) - else: - expr -= 2 * x * (n - 1) * self.diff(mi_minus_one) - expr -= (n - 1) * (n - 2) * self.diff(mi_minus_two) - if n == 2 and sum(mi) == 2: - expr += 1 - else: - expr -= 2 * n * x * self.diff(mi_minus_one) - expr -= n * (n - 1) * self.diff(mi_minus_two) - expr /= self.scaled_r**2 - self.cache_by_mi[mi] = add_to_sac(self.sac, expr) + expr = (mi[i]-1)*self.diff(mi_minus_two, q=q+1) * self.rscale + expr += self.var_list[i] * self.diff(mi_minus_one, q=q+1) + expr *= self.rscale + expr = add_to_sac(self.sac, expr) + self.cache_by_mi_q[(mi, q)] = expr + return expr + + assert mi == (0,)*self.dim + assert q > 0 + + prev_expr = self.diff(mi, q=q-1) + # Need to get expr.diff(r)/r, but we can only do expr.diff(x) + # Use expr.diff(x) = expr.diff(r) * x / r + expr = prev_expr.diff(self.var_list[0])/self.var_list[0] + # We need to distribute the division above + expr = expr.expand(deep=False) + self.cache_by_mi_q[(mi, q)] = expr return expr -- GitLab From 688655b872e4170264aa89c3282135101d81cfa3 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Thu, 14 May 2020 23:13:40 -0500 Subject: [PATCH 33/48] Switch over Helmholtz and Biharmonic --- sumpy/expansion/__init__.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sumpy/expansion/__init__.py b/sumpy/expansion/__init__.py index 71af7eba..b598facc 100644 --- a/sumpy/expansion/__init__.py +++ b/sumpy/expansion/__init__.py @@ -665,6 +665,11 @@ class HelmholtzConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): helmholtz_k_name = kernel.get_base_kernel().helmholtz_k_name self.expansion_terms_wrangler_key = (order, kernel.dim, helmholtz_k_name) + def get_kernel_derivative_taker(self, dvec, rscale, sac): + from sumpy.tools import RadialDerivativeTaker + return RadialDerivativeTaker(self.kernel.get_expression(dvec), dvec, + rscale, sac) + class BiharmonicConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): @@ -675,6 +680,11 @@ class BiharmonicConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): def __init__(self, kernel, order, use_rscale): self.expansion_terms_wrangler_key = (order, kernel.dim) + def get_kernel_derivative_taker(self, dvec, rscale, sac): + from sumpy.tools import RadialDerivativeTaker + return RadialDerivativeTaker(self.kernel.get_expression(dvec), dvec, + rscale, sac) + # }}} -- GitLab From f08e60622fec0b65a7b3f83b57173560c2ace120 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Fri, 15 May 2020 09:42:56 -0500 Subject: [PATCH 34/48] Check for radial function --- sumpy/tools.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sumpy/tools.py b/sumpy/tools.py index 1dd7c24e..90540d7a 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -285,6 +285,8 @@ class RadialDerivativeTaker(MiDerivativeTaker): empty_mi = (0,) * len(var_list) self.cache_by_mi_q = {(empty_mi, 0): expr} self.r = sym.sqrt(sum(v**2 for v in var_list)) + rsym = sym.Symbol("_r") + self.is_radial = expr.xreplace({self.r**2: rsym**2}).free_symbols == {rsym} def diff(self, mi, q=0): """ @@ -293,6 +295,10 @@ class RadialDerivativeTaker(MiDerivativeTaker): [1]: Tausch, J., 2003. The fast multipole method for arbitrary Green's functions. Contemporary Mathematics, 329, pp.307-314. """ + if not self.is_radial: + assert q == 0 + return MiDerivativeTaker.diff(self, mi) + try: return self.cache_by_mi_q[(mi, q)] except KeyError: -- GitLab From 2d95aaa38625f0a173a6753199a21e4e2e7c81a5 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Fri, 15 May 2020 09:51:19 -0500 Subject: [PATCH 35/48] Fix new flake8 failures --- sumpy/expansion/local.py | 36 ++++++++++++++++++------------------ sumpy/expansion/multipole.py | 26 +++++++++++++------------- sumpy/point_calculus.py | 4 ++-- test/test_cse.py | 8 ++++---- 4 files changed, 37 insertions(+), 37 deletions(-) diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index b4b435de..e7e0403e 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -332,10 +332,10 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): source_angle_rel_center = sym.atan2(-avec[1], -avec[0]) avec_len = sym_real_norm_2(avec) return [self.kernel.postprocess_at_source( - hankel_1(l, arg_scale * avec_len) - * rscale ** abs(l) - * sym.exp(sym.I * l * source_angle_rel_center), avec) - for l in self.get_coefficient_identifiers()] + hankel_1(c, arg_scale * avec_len) + * rscale ** abs(c) + * sym.exp(sym.I * c * source_angle_rel_center), avec) + for c in self.get_coefficient_identifiers()] def evaluate(self, coeffs, bvec, rscale): if not self.use_rscale: @@ -348,12 +348,12 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): arg_scale = self.get_bessel_arg_scaling() - return sum(coeffs[self.get_storage_index(l)] + return sum(coeffs[self.get_storage_index(c)] * self.kernel.postprocess_at_target( - bessel_j(l, arg_scale * bvec_len) - / rscale ** abs(l) - * sym.exp(sym.I * l * -target_angle_rel_center), bvec) - for l in self.get_coefficient_identifiers()) + bessel_j(c, arg_scale * bvec_len) + / rscale ** abs(c) + * sym.exp(sym.I * c * -target_angle_rel_center), bvec) + for c in self.get_coefficient_identifiers()) def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, dvec, tgt_rscale): @@ -370,13 +370,13 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): bessel_j = sym.Function("bessel_j") new_center_angle_rel_old_center = sym.atan2(dvec[1], dvec[0]) translated_coeffs = [] - for l in self.get_coefficient_identifiers(): + for j in self.get_coefficient_identifiers(): translated_coeffs.append( sum(src_coeff_exprs[src_expansion.get_storage_index(m)] - * bessel_j(m - l, arg_scale * dvec_len) + * bessel_j(m - j, arg_scale * dvec_len) / src_rscale ** abs(m) - * tgt_rscale ** abs(l) - * sym.exp(sym.I * (m - l) * -new_center_angle_rel_old_center) + * tgt_rscale ** abs(j) + * sym.exp(sym.I * (m - j) * -new_center_angle_rel_old_center) for m in src_expansion.get_coefficient_identifiers())) return translated_coeffs @@ -385,14 +385,14 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): hankel_1 = sym.Function("hankel_1") new_center_angle_rel_old_center = sym.atan2(dvec[1], dvec[0]) translated_coeffs = [] - for l in self.get_coefficient_identifiers(): + for j in self.get_coefficient_identifiers(): translated_coeffs.append( sum( - (-1) ** l - * hankel_1(m + l, arg_scale * dvec_len) + (-1) ** j + * hankel_1(m + j, arg_scale * dvec_len) * src_rscale ** abs(m) - * tgt_rscale ** abs(l) - * sym.exp(sym.I * (m + l) * new_center_angle_rel_old_center) + * tgt_rscale ** abs(j) + * sym.exp(sym.I * (m + j) * new_center_angle_rel_old_center) * src_coeff_exprs[src_expansion.get_storage_index(m)] for m in src_expansion.get_coefficient_identifiers())) return translated_coeffs diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py index e3759d43..25148e2b 100644 --- a/sumpy/expansion/multipole.py +++ b/sumpy/expansion/multipole.py @@ -290,11 +290,11 @@ class _HankelBased2DMultipoleExpansion(MultipoleExpansionBase): source_angle_rel_center = sym.atan2(-avec[1], -avec[0]) return [ self.kernel.postprocess_at_source( - bessel_j(l, arg_scale * avec_len) - / rscale ** abs(l) - * sym.exp(sym.I * l * -source_angle_rel_center), + bessel_j(c, arg_scale * avec_len) + / rscale ** abs(c) + * sym.exp(sym.I * c * -source_angle_rel_center), avec) - for l in self.get_coefficient_identifiers()] + for c in self.get_coefficient_identifiers()] def evaluate(self, coeffs, bvec, rscale): if not self.use_rscale: @@ -307,12 +307,12 @@ class _HankelBased2DMultipoleExpansion(MultipoleExpansionBase): arg_scale = self.get_bessel_arg_scaling() - return sum(coeffs[self.get_storage_index(l)] + return sum(coeffs[self.get_storage_index(c)] * self.kernel.postprocess_at_target( - hankel_1(l, arg_scale * bvec_len) - * rscale ** abs(l) - * sym.exp(sym.I * l * target_angle_rel_center), bvec) - for l in self.get_coefficient_identifiers()) + hankel_1(c, arg_scale * bvec_len) + * rscale ** abs(c) + * sym.exp(sym.I * c * target_angle_rel_center), bvec) + for c in self.get_coefficient_identifiers()) def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, dvec, tgt_rscale): @@ -333,13 +333,13 @@ class _HankelBased2DMultipoleExpansion(MultipoleExpansionBase): arg_scale = self.get_bessel_arg_scaling() translated_coeffs = [] - for l in self.get_coefficient_identifiers(): + for j in self.get_coefficient_identifiers(): translated_coeffs.append( sum(src_coeff_exprs[src_expansion.get_storage_index(m)] - * bessel_j(m - l, arg_scale * dvec_len) + * bessel_j(m - j, arg_scale * dvec_len) * src_rscale ** abs(m) - / tgt_rscale ** abs(l) - * sym.exp(sym.I * (m - l) * new_center_angle_rel_old_center) + / tgt_rscale ** abs(j) + * sym.exp(sym.I * (m - j) * new_center_angle_rel_old_center) for m in src_expansion.get_coefficient_identifiers())) return translated_coeffs diff --git a/sumpy/point_calculus.py b/sumpy/point_calculus.py index f8b4f5bd..fc164d81 100644 --- a/sumpy/point_calculus.py +++ b/sumpy/point_calculus.py @@ -231,9 +231,9 @@ class CalculusPatch(object): from pytools.obj_array import make_obj_array return make_obj_array([ sum( - levi_civita((l, m, n)) * self.diff(m, arg[n]) + levi_civita((k, m, n)) * self.diff(m, arg[n]) for m in range(3) for n in range(3)) - for l in range(3)]) + for k in range(3)]) def eval_at_center(self, f_values): """Interpolate *f_values* to the center point. diff --git a/test/test_cse.py b/test/test_cse.py index 703e8c9f..2f54f34b 100644 --- a/test/test_cse.py +++ b/test/test_cse.py @@ -253,12 +253,12 @@ def test_issue_4499(): G = Function('G') # noqa t = Tuple( *(a, a + S(1)/2, 2*a, b, 2*a - b + 1, (sqrt(z)/2)**(-2*a + 1)*B(2*a - - b, sqrt(z))*B(b - 1, sqrt(z))*G(b)*G(2*a - b + 1), - sqrt(z)*(sqrt(z)/2)**(-2*a + 1)*B(b, sqrt(z))*B(2*a - b, + - b, sqrt(z))*B(b - 1, sqrt(z))*G(b)*G(2*a - b + 1), # noqa + sqrt(z)*(sqrt(z)/2)**(-2*a + 1)*B(b, sqrt(z))*B(2*a - b, # noqa sqrt(z))*G(b)*G(2*a - b + 1), sqrt(z)*(sqrt(z)/2)**(-2*a + 1)*B(b - 1, sqrt(z))*B(2*a - b + 1, sqrt(z))*G(b)*G(2*a - b + 1), - (sqrt(z)/2)**(-2*a + 1)*B(b, sqrt(z))*B(2*a - b + 1, - sqrt(z))*G(b)*G(2*a - b + 1), 1, 0, S(1)/2, z/2, -b + 1, -2*a + b, + (sqrt(z)/2)**(-2*a + 1)*B(b, sqrt(z))*B(2*a - b + 1, # noqa + sqrt(z))*G(b)*G(2*a - b + 1), 1, 0, S(1)/2, z/2, -b + 1, -2*a + b, # noqa -2*a)) # noqa c = cse(t) assert len(c[0]) == 11 -- GitLab From c53e078c9ca9b8281c10bc917bf0fc37124cf178 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Fri, 15 May 2020 11:22:43 -0500 Subject: [PATCH 36/48] Fix is_radial --- sumpy/tools.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sumpy/tools.py b/sumpy/tools.py index 90540d7a..29f89b1d 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -286,7 +286,8 @@ class RadialDerivativeTaker(MiDerivativeTaker): self.cache_by_mi_q = {(empty_mi, 0): expr} self.r = sym.sqrt(sum(v**2 for v in var_list)) rsym = sym.Symbol("_r") - self.is_radial = expr.xreplace({self.r**2: rsym**2}).free_symbols == {rsym} + r_expr = expr.xreplace({self.r**2: rsym**2}) + self.is_radial = not any(r_expr.has(v) for v in var_list) def diff(self, mi, q=0): """ -- GitLab From ec45a4e46136be2f6cfdda01dff5f175e55e3fcb Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Fri, 15 May 2020 16:34:53 -0500 Subject: [PATCH 37/48] Specialize Radial for Helmholtz --- sumpy/expansion/__init__.py | 4 ++-- sumpy/tools.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/sumpy/expansion/__init__.py b/sumpy/expansion/__init__.py index b598facc..023553a8 100644 --- a/sumpy/expansion/__init__.py +++ b/sumpy/expansion/__init__.py @@ -666,8 +666,8 @@ class HelmholtzConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): self.expansion_terms_wrangler_key = (order, kernel.dim, helmholtz_k_name) def get_kernel_derivative_taker(self, dvec, rscale, sac): - from sumpy.tools import RadialDerivativeTaker - return RadialDerivativeTaker(self.kernel.get_expression(dvec), dvec, + from sumpy.tools import HelmholtzDerivativeTaker, RadialDerivativeTaker + return HelmholtzDerivativeTaker(self.kernel.get_expression(dvec), dvec, rscale, sac) diff --git a/sumpy/tools.py b/sumpy/tools.py index 29f89b1d..fe35fb17 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -185,6 +185,7 @@ class MiDerivativeTaker(object): self.rscale = rscale self.sac = sac self.dim = len(self.var_list) + self.orig_expr = expr def mi_dist(self, a, b): return np.array(a, dtype=int) - np.array(b, dtype=int) @@ -343,6 +344,34 @@ class RadialDerivativeTaker(MiDerivativeTaker): return expr +class HelmholtzDerivativeTaker(RadialDerivativeTaker): + + def diff(self, mi, q=0): + import sumpy.symbolic as sym + if q < 2 or mi != (0,)*self.dim: + return RadialDerivativeTaker.diff(self, mi, q) + try: + return self.cache_by_mi_q[(mi, q)] + except KeyError: + pass + + if self.dim == 2: + # See https://dlmf.nist.gov/10.6.E6 + # and https://dlmf.nist.gov/10.6#E1 + k = self.orig_expr.args[1] / self.r + print("k", k) + expr = - 2 * (q - 1) * self.diff(mi, q - 1) + expr += - k**2 * self.diff(mi, q - 2) + expr /= self.r**2 + else: + # See reference [1] in RadialDerivativeTaker.diff + k = (self.orig_expr * self.r).args[-1] / sym.I / self.r + expr = -(2*q - 1)/self.r**2 * self.diff(mi, q - 1) + expr += -k**2 / self.r * self.diff(mi, q - 2) + self.cache_by_mi_q[(mi, q)] = expr + return expr + + MiDerivativeTakerWrapper = namedtuple('MiDerivativeTakerWrapper', ['taker', 'initial_mi']) -- GitLab From eec015c76ef1c5d28a2610e351e426749cbd1a9e Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Tue, 19 May 2020 21:08:03 -0500 Subject: [PATCH 38/48] Use the sac in more places --- sumpy/expansion/__init__.py | 4 ++-- sumpy/expansion/local.py | 10 ++++----- sumpy/tools.py | 43 ++++++++++++++----------------------- test/test_fmm.py | 6 +++--- 4 files changed, 26 insertions(+), 37 deletions(-) diff --git a/sumpy/expansion/__init__.py b/sumpy/expansion/__init__.py index 023553a8..e01ec5e1 100644 --- a/sumpy/expansion/__init__.py +++ b/sumpy/expansion/__init__.py @@ -666,8 +666,8 @@ class HelmholtzConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): self.expansion_terms_wrangler_key = (order, kernel.dim, helmholtz_k_name) def get_kernel_derivative_taker(self, dvec, rscale, sac): - from sumpy.tools import HelmholtzDerivativeTaker, RadialDerivativeTaker - return HelmholtzDerivativeTaker(self.kernel.get_expression(dvec), dvec, + from sumpy.tools import HelmholtzYukawaDerivativeTaker + return HelmholtzYukawaDerivativeTaker(self.kernel.get_expression(dvec), dvec, rscale, sac) diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index 21743f45..4af1a313 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -187,7 +187,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # This code speeds up derivative taking by caching all kernel # derivatives. - from sumpy.tools import add_mi, _fft_uneval_expr + from sumpy.tools import add_mi from pytools import generate_nonnegative_integer_tuples_below as gnitb max_mi = [0]*self.dim @@ -251,13 +251,14 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # Do the matvec if use_fft: - output = fft_toeplitz_upper_triangular(toeplitz_first_row, vector) + output = fft_toeplitz_upper_triangular(toeplitz_first_row, vector, + sac=sac) else: output = matvec_toeplitz_upper_triangular(toeplitz_first_row, vector) # Filter out the dummy rows and scale them for target result = [] - rscale_ratio = _fft_uneval_expr(tgt_rscale/src_rscale) + rscale_ratio = add_to_sac(sac, tgt_rscale/src_rscale) for term in self.get_coefficient_identifiers(): index = toeplitz_matrix_ident_to_index[term] result.append(output[index]*rscale_ratio**sum(term)) @@ -266,8 +267,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): return result rscale_ratio = tgt_rscale/src_rscale - if sac is not None: - rscale_ratio = sym.Symbol(sac.assign_unique("temp", rscale_ratio)) + rscale_ratio = add_to_sac(sac, rscale_ratio) from sumpy.tools import MiDerivativeTaker from math import factorial diff --git a/sumpy/tools.py b/sumpy/tools.py index fe35fb17..207e42e5 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -104,10 +104,11 @@ def mi_power(vector, mi, evaluate=True): def add_to_sac(sac, expr): import sumpy.symbolic as sym - if sac is not None: - return sym.Symbol(sac.assign_unique("temp", expr)) - else: + numeric_types = (numbers.Number, sym.Number, int, float, complex) + if sac is None or isinstance(expr, numeric_types): return expr + else: + return sym.Symbol(sac.assign_unique("temp", expr)) class MiDerivativeTaker(object): @@ -344,7 +345,7 @@ class RadialDerivativeTaker(MiDerivativeTaker): return expr -class HelmholtzDerivativeTaker(RadialDerivativeTaker): +class HelmholtzYukawaDerivativeTaker(RadialDerivativeTaker): def diff(self, mi, q=0): import sumpy.symbolic as sym @@ -359,7 +360,6 @@ class HelmholtzDerivativeTaker(RadialDerivativeTaker): # See https://dlmf.nist.gov/10.6.E6 # and https://dlmf.nist.gov/10.6#E1 k = self.orig_expr.args[1] / self.r - print("k", k) expr = - 2 * (q - 1) * self.diff(mi, q - 1) expr += - k**2 * self.diff(mi, q - 2) expr /= self.r**2 @@ -1036,25 +1036,13 @@ def solve_symbolic(A, b): # noqa: N803 # {{{ FFT -def _fft_uneval_expr(expr): - """ - Creates a CSE node if the expr is not a numeric type - """ - if isinstance(expr, (numbers.Number, sym.Number, int, float, complex)): - return expr - # UnevaluatedExpr is not implemented in SymEngine, but that's fine - # as SymEngine doesn't distribute 2*(a+b) to 2*a + 2*b. - # SymPy distributes which destroys the complexity. - return sym.UnevaluatedExpr(expr) - - -def _complex_tuple_mul(a, b): +def _complex_tuple_mul(a, b, sac): """ Multiply the two complex numbers represented as a tuple for real and imaginary parts """ - return (_fft_uneval_expr((a[0]*b[0])-(a[1]*b[1])), - _fft_uneval_expr((a[0]*b[1])+(a[1]*b[0]))) + return (add_to_sac(sac, (a[0]*b[0])-(a[1]*b[1])), + add_to_sac(sac, (a[0]*b[1])+(a[1]*b[0]))) def _binary_reverse(n, bits): @@ -1064,7 +1052,7 @@ def _binary_reverse(n, bits): return int(b[::-1], 2) -def fft(seq, inverse=False): +def fft(seq, inverse=False, sac=None): """ Return the discrete fourier transform of the sequence seq. seq should be a python iterable with tuples of length 2 @@ -1103,7 +1091,8 @@ def fft(seq, inverse=False): hf, ut = h // 2, n // h for i in range(0, n, h): for j in range(hf): - u, v = a[i + j], _complex_tuple_mul(a[i + j + hf], w[ut * j]) + u, v = a[i + j], _complex_tuple_mul(a[i + j + hf], w[ut * j], + sac=sac) a[i + j] = (u[0] + v[0], u[1] + v[1]) a[i + j + hf] = (u[0] - v[0], u[1] - v[1]) h *= 2 @@ -1114,7 +1103,7 @@ def fft(seq, inverse=False): return a -def fft_toeplitz_upper_triangular(first_row, x): +def fft_toeplitz_upper_triangular(first_row, x, sac=None): """ Returns the matvec of the Toeplitz matrix given by the first row and the vector x using a Fourier transform @@ -1127,10 +1116,10 @@ def fft_toeplitz_upper_triangular(first_row, x): x = list(reversed(x)) x += [0]*(n-1) - v_fft = fft([(a, 0) for a in v]) - x_fft = fft([(a, 0) for a in x]) - res_fft = [_complex_tuple_mul(a, b) for a, b in zip(v_fft, x_fft)] - res = fft(res_fft, inverse=True) + v_fft = fft([(a, 0) for a in v], sac) + x_fft = fft([(a, 0) for a in x], sac) + res_fft = [_complex_tuple_mul(a, b, sac) for a, b in zip(v_fft, x_fft)] + res = fft(res_fft, inverse=True, sac) return [a for a, _ in reversed(res[:n])] diff --git a/test/test_fmm.py b/test/test_fmm.py index 2bc6d69d..9a4a4e91 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -115,10 +115,10 @@ def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class): # {{{ plot tree if 0: - host_tree = tree.get() - host_trav = trav.get() + host_tree = tree.get(queue) + host_trav = trav.get(queue) - if 1: + if 0: print("src_box", host_tree.find_box_nr_for_source(403)) print("tgt_box", host_tree.find_box_nr_for_target(28)) print(list(host_trav.target_or_target_parent_boxes).index(37)) -- GitLab From a3b14c8ca2dff657b0578be2de06291faa4ff6ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= Date: Sun, 24 May 2020 03:26:36 +0200 Subject: [PATCH 39/48] Use "nodefaults" in CI conda env file, remove "inducer" conda channel --- .test-conda-env-py3.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.test-conda-env-py3.yml b/.test-conda-env-py3.yml index 2264e926..c9e29e12 100644 --- a/.test-conda-env-py3.yml +++ b/.test-conda-env-py3.yml @@ -1,13 +1,12 @@ name: test-conda-env-py3 channels: -- inducer - conda-forge -- defaults +- nodefaults dependencies: - git -- conda-forge::numpy -- conda-forge::sympy +- numpy +- sympy - pocl - islpy - pyopencl -- GitLab From 288ca4175fe23b0e34f637a4186f10690364a711 Mon Sep 17 00:00:00 2001 From: "[6~" Date: Thu, 28 May 2020 17:59:49 -0500 Subject: [PATCH 40/48] Gitlab CI: be specific about which pocl device to use --- .gitlab-ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d74930eb..7025cb63 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -20,7 +20,7 @@ Python 2.7 POCL: script: - export PY_EXE=python2.7 - - export PYOPENCL_TEST=portable + - export PYOPENCL_TEST=portable:pthread - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" @@ -36,7 +36,7 @@ Python 2.7 POCL: Python 3 POCL: script: - export PY_EXE=python3 - - export PYOPENCL_TEST=portable + - export PYOPENCL_TEST=portable:pthread - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" @@ -106,7 +106,7 @@ Benchmarks: - sed 's/python=3/python=3.6/' .test-conda-env-py3.yml > .test-conda-env.yml - CONDA_ENVIRONMENT=.test-conda-env.yml - PROJECT=sumpy - - PYOPENCL_TEST=portable + - PYOPENCL_TEST=portable:pthread - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-benchmark-py-project.sh - ". ./build-and-benchmark-py-project.sh" tags: -- GitLab From 6d87fb60b608bec4947ffb06a23ab8d0064c84e1 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Sun, 24 May 2020 14:00:49 -0500 Subject: [PATCH 41/48] Test if pocl-cuda works --- .gitlab-ci.yml | 20 +++++++++++++++++++- .test-conda-env-py3.yml | 1 + 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7025cb63..069eeae7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -71,11 +71,29 @@ Python 3.6 Conda: # Disable caching to ensure SymEngine code generation is exercised. - export SUMPY_NO_CACHE=1 - export SUMPY_FORCE_SYMBOLIC_BACKEND=symengine + - PYOPENCL_TEST=portable:pthread - CONDA_ENVIRONMENT=.test-conda-env-py3.yml - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - ". ./build-and-test-py-project-within-miniconda.sh" tags: - - linux + - porter + except: + - tags + artifacts: + reports: + junit: test/pytest.xml + +Python 3.6 Pocl Titan X: + script: + # Disable caching to ensure SymEngine code generation is exercised. + - export SUMPY_NO_CACHE=1 + - export SUMPY_FORCE_SYMBOLIC_BACKEND=symengine + - PYOPENCL_TEST=portable:titan + - CONDA_ENVIRONMENT=.test-conda-env-py3.yml + - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh + - ". ./build-and-test-py-project-within-miniconda.sh" + tags: + - nvidia-titan-x except: - tags artifacts: diff --git a/.test-conda-env-py3.yml b/.test-conda-env-py3.yml index c9e29e12..f9ede8fc 100644 --- a/.test-conda-env-py3.yml +++ b/.test-conda-env-py3.yml @@ -8,6 +8,7 @@ dependencies: - numpy - sympy - pocl +- pocl-cuda - islpy - pyopencl - python=3 -- GitLab From 22f7d11b67079cb1ef1bf3c574439db4492faf3e Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Fri, 29 May 2020 05:19:02 +0200 Subject: [PATCH 42/48] Add export --- .gitlab-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 069eeae7..cdcc0e80 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -71,7 +71,7 @@ Python 3.6 Conda: # Disable caching to ensure SymEngine code generation is exercised. - export SUMPY_NO_CACHE=1 - export SUMPY_FORCE_SYMBOLIC_BACKEND=symengine - - PYOPENCL_TEST=portable:pthread + - export PYOPENCL_TEST=portable:pthread - CONDA_ENVIRONMENT=.test-conda-env-py3.yml - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - ". ./build-and-test-py-project-within-miniconda.sh" @@ -88,7 +88,7 @@ Python 3.6 Pocl Titan X: # Disable caching to ensure SymEngine code generation is exercised. - export SUMPY_NO_CACHE=1 - export SUMPY_FORCE_SYMBOLIC_BACKEND=symengine - - PYOPENCL_TEST=portable:titan + - export PYOPENCL_TEST=portable:titan - CONDA_ENVIRONMENT=.test-conda-env-py3.yml - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - ". ./build-and-test-py-project-within-miniconda.sh" -- GitLab From 4eb31382128aaab0f5cf2114b7c5dc14387d1394 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= Date: Sun, 31 May 2020 02:58:58 +0200 Subject: [PATCH 43/48] Apply suggestion to .gitlab-ci.yml --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index cdcc0e80..0ff1485f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -83,7 +83,7 @@ Python 3.6 Conda: reports: junit: test/pytest.xml -Python 3.6 Pocl Titan X: +Python 3.6 POCL Titan X: script: # Disable caching to ensure SymEngine code generation is exercised. - export SUMPY_NO_CACHE=1 -- GitLab From 6c089fa3ecae4ecab7826161461ba9ba0dc06e0c Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 4 Jun 2020 01:12:45 -0500 Subject: [PATCH 44/48] Drop Py2 support; bump version --- .github/workflows/ci.yml | 14 -------------- .gitlab-ci.yml | 16 ---------------- sumpy/version.py | 2 +- 3 files changed, 1 insertion(+), 31 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 463b78d1..149f0473 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,20 +24,6 @@ jobs: curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh . ./prepare-and-run-flake8.sh sumpy test - pytest2: - name: Conda Pytest Py2 - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: "Main Script" - run: | - sed 's/python=3/python=2.7/' .test-conda-env-py3.yml > .test-conda-env-py2-pre.yml - grep -v symengine .test-conda-env-py2-pre.yml > .test-conda-env-py2.yml - cat .test-conda-env-py2.yml - CONDA_ENVIRONMENT=.test-conda-env-py2.yml - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh - . ./build-and-test-py-project-within-miniconda.sh - pytest3: name: Conda Pytest Py3 runs-on: ubuntu-latest diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7025cb63..216048c2 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -17,22 +17,6 @@ # reports: # junit: test/pytest.xml -Python 2.7 POCL: - script: - - export PY_EXE=python2.7 - - export PYOPENCL_TEST=portable:pthread - - export EXTRA_INSTALL="pybind11 numpy mako" - - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - - ". ./build-and-test-py-project.sh" - tags: - - python2.7 - - pocl - except: - - tags - artifacts: - reports: - junit: test/pytest.xml - Python 3 POCL: script: - export PY_EXE=python3 diff --git a/sumpy/version.py b/sumpy/version.py index f4233592..f44cfe20 100644 --- a/sumpy/version.py +++ b/sumpy/version.py @@ -43,7 +43,7 @@ else: # }}} -VERSION = (2016, 1) +VERSION = (2020, 1) VERSION_STATUS = "beta1" VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS -- GitLab From 8ce9a3aca14072d00a5fa18520f2efa11803f568 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Sun, 7 Jun 2020 15:21:51 -0500 Subject: [PATCH 45/48] Initial work for m2l precompute --- sumpy/e2e.py | 276 +++++++++++++++++++++++++++++++++++++++ sumpy/expansion/local.py | 163 ++++++++++++++--------- sumpy/fmm.py | 192 +++++++++++++++++++++++++-- sumpy/kernel.py | 7 + sumpy/tools.py | 20 ++- 5 files changed, 576 insertions(+), 82 deletions(-) diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 99f75e81..8a37428f 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -255,6 +255,282 @@ class E2EFromCSR(E2EBase): # }}} +# {{{ + +class E2EFromCSRTranslationInvariant(E2EFromCSR): + """Implements translation from a "compressed sparse row"-like source box + list for translation invariant Kernels. + """ + default_name = "e2e_from_csr_translation_invariant" + + def get_translation_loopy_insns(self): + from sumpy.symbolic import make_sym_vector + dvec = make_sym_vector("d", self.dim) + + src_coeff_exprs = [sym.Symbol("src_coeff%d" % i) + for i in range(len(self.src_expansion))] + src_rscale = sym.Symbol("src_rscale") + + tgt_rscale = sym.Symbol("tgt_rscale") + + nprecomputed_exprs = len(self.m2l_global_precompute_mis(self.src_expansion)) + + precomputed_exprs = [sym.Symbol("precomputed_expr%d" % i) + for i in range(nprecomputed_exprs)] + + from sumpy.assignment_collection import SymbolicAssignmentCollection + sac = SymbolicAssignmentCollection() + tgt_coeff_names = [ + sac.assign_unique("coeff%d" % i, coeff_i) + for i, coeff_i in enumerate( + self.tgt_expansion.translate_from( + self.src_expansion, src_coeff_exprs, src_rscale, + dvec, tgt_rscale, sac, + precomputed_exprs=precomputed_exprs))] + + sac.run_global_cse() + + from sumpy.codegen import to_loopy_insns + return to_loopy_insns( + six.iteritems(sac.assignments), + vector_names=set(["d"]), + pymbolic_expr_maps=[self.tgt_expansion.get_code_transformer()], + retain_names=tgt_coeff_names, + complex_dtype=np.complex128 # FIXME + ) + + def get_kernel(self): + ncoeff_src = len(self.src_expansion) + ncoeff_tgt = len(self.tgt_expansion) + nprecomputed_exprs = len(self.m2l_global_precompute_mis(self.src_expansion)) + + # To clarify terminology: + # + # isrc_box -> The index in a list of (in this case, source) boxes + # src_ibox -> The (global) box number for the (in this case, source) box + # + # (same for itgt_box, tgt_ibox) + + from sumpy.tools import gather_loopy_arguments + loopy_knl = lp.make_kernel( + [ + "{[itgt_box]: 0<=itgt_box tgt_ibox = target_boxes[itgt_box] + + <> tgt_center[idim] = centers[idim, tgt_ibox] \ + + <> isrc_start = src_box_starts[itgt_box] + <> isrc_stop = src_box_starts[itgt_box+1] + + for isrc_box + <> src_ibox = src_box_lists[isrc_box] \ + {id=read_src_ibox} + + <> src_center[idim] = centers[idim, src_ibox] {dup=idim} + <> d[idim] = tgt_center[idim] - src_center[idim] \ + {dup=idim} + <> translation_class = translation_classes_lists[isrc_box] + <> translation_class_idx_start = translation_class * \ + nprecomputed_exprs + """] + [""" + <> precomputed_expr{idx} = \ + m2l_precomputed_exprs[translation_class_idx_start + {idx}] + """.format(idx=idx) for idx in range(nprecomputed_exprs) + ] + [""" + <> src_coeff{coeffidx} = \ + src_expansions[src_ibox - src_base_ibox, {coeffidx}] \ + {{dep=read_src_ibox}} + """.format(coeffidx=i) for i in range(ncoeff_src)] + [ + + ] + self.get_translation_loopy_insns() + [""" + end + + """] + [""" + tgt_expansions[tgt_ibox - tgt_base_ibox, {coeffidx}] = \ + simul_reduce(sum, isrc_box, coeff{coeffidx}) \ + {{id_prefix=write_expn}} + """.format(coeffidx=i) for i in range(ncoeff_tgt)] + [""" + end + """], + [ + lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), + lp.ValueArg("src_rscale,tgt_rscale", None), + lp.GlobalArg("src_box_starts, src_box_lists", + None, shape=None, strides=(1,), offset=lp.auto), + lp.ValueArg("aligned_nboxes,tgt_base_ibox,src_base_ibox", + np.int32), + lp.ValueArg("nsrc_level_boxes,ntgt_level_boxes", + np.int32), + lp.GlobalArg("src_expansions", None, + shape=("nsrc_level_boxes", ncoeff_src), offset=lp.auto), + lp.GlobalArg("tgt_expansions", None, + shape=("ntgt_level_boxes", ncoeff_tgt), offset=lp.auto), + lp.GlobalArg("translation_classes_lists", + None, shape=None, strides=(1,), offset=np.int32), + lp.GlobalArg("m2l_precomputed_exprs", None, + shape=(nprecomputed_exprs*ntranslation_classes,), offset=lp.auto), + "..." + ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), + name=self.name, + assumptions="ntgt_boxes>=1", + silenced_warnings="write_race(write_expn*)", + default_offset=lp.auto, + fixed_parameters=dict(dim=self.dim, + nprecomputed_exprs=nprecomputed_exprs), + lang_version=MOST_RECENT_LANGUAGE_VERSION + ) + + for expn in [self.src_expansion, self.tgt_expansion]: + loopy_knl = expn.prepare_loopy_kernel(loopy_knl) + + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") + loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) + + return loopy_knl + + +class E2EFromCSRTranslationClassesPrecompute(E2EFromCSR): + """Implements precomputing the translation classes dependent + derivatives. + """ + default_name = "e2e_from_csr_translation_classes_precompute" + + def get_translation_loopy_insns(self): + from sumpy.symbolic import make_sym_vector + dvec = make_sym_vector("d", self.dim) + + src_coeff_exprs = [sym.Symbol("src_coeff%d" % i) + for i in range(len(self.src_expansion))] + src_rscale = sym.Symbol("src_rscale") + + tgt_rscale = sym.Symbol("tgt_rscale") + + nprecomputed_exprs = len(self.m2l_global_precompute_exprs(self.src_expansion)) + + precomputed_exprs = [sym.Symbol("precomputed_expr%d" % i) + for i in range(nprecomputed_exprs)] + + from sumpy.assignment_collection import SymbolicAssignmentCollection + sac = SymbolicAssignmentCollection() + tgt_coeff_names = [ + sac.assign_unique("coeff%d" % i, coeff_i) + for i, coeff_i in enumerate( + self.tgt_expansion.translate_from( + self.src_expansion, src_coeff_exprs, src_rscale, + dvec, tgt_rscale, sac, + precomputed_exprs=precomputed_exprs))] + + sac.run_global_cse() + + from sumpy.codegen import to_loopy_insns + return to_loopy_insns( + six.iteritems(sac.assignments), + vector_names=set(["d"]), + pymbolic_expr_maps=[self.tgt_expansion.get_code_transformer()], + retain_names=tgt_coeff_names, + complex_dtype=np.complex128 # FIXME + ) + + def get_kernel(self): + ncoeff_src = len(self.src_expansion) + ncoeff_tgt = len(self.tgt_expansion) + nprecomputed_exprs = len(self.m2l_global_precompute_mis(self.src_expansion)) + + # To clarify terminology: + # + # isrc_box -> The index in a list of (in this case, source) boxes + # src_ibox -> The (global) box number for the (in this case, source) box + # + # (same for itgt_box, tgt_ibox) + + from sumpy.tools import gather_loopy_arguments + loopy_knl = lp.make_kernel( + [ + "{[itgt_box]: 0<=itgt_box tgt_ibox = target_boxes[itgt_box] + + <> tgt_center[idim] = centers[idim, tgt_ibox] \ + + <> isrc_start = src_box_starts[itgt_box] + <> isrc_stop = src_box_starts[itgt_box+1] + + for isrc_box + <> src_ibox = src_box_lists[isrc_box] \ + {id=read_src_ibox} + + <> src_center[idim] = centers[idim, src_ibox] {dup=idim} + <> d[idim] = tgt_center[idim] - src_center[idim] \ + {dup=idim} + <> translation_class = translation_classes_lists[isrc_box] + <> translation_class_idx_start = translation_class * \ + nprecomputed_exprs + """] + [""" + <> precomputed_expr{idx} = \ + m2l_precomputed_exprs[translation_class_idx_start + {idx}] + """.format(idx=idx) for idx in range(nprecomputed_exprs) + ] + [""" + <> src_coeff{coeffidx} = \ + src_expansions[src_ibox - src_base_ibox, {coeffidx}] \ + {{dep=read_src_ibox}} + """.format(coeffidx=i) for i in range(ncoeff_src)] + [ + + ] + self.get_translation_loopy_insns() + [""" + end + + """] + [""" + tgt_expansions[tgt_ibox - tgt_base_ibox, {coeffidx}] = \ + simul_reduce(sum, isrc_box, coeff{coeffidx}) \ + {{id_prefix=write_expn}} + """.format(coeffidx=i) for i in range(ncoeff_tgt)] + [""" + end + """], + [ + lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), + lp.ValueArg("src_rscale,tgt_rscale", None), + lp.GlobalArg("src_box_starts, src_box_lists", + None, shape=None, strides=(1,), offset=lp.auto), + lp.ValueArg("aligned_nboxes,tgt_base_ibox,src_base_ibox", + np.int32), + lp.ValueArg("nsrc_level_boxes,ntgt_level_boxes", + np.int32), + lp.GlobalArg("src_expansions", None, + shape=("nsrc_level_boxes", ncoeff_src), offset=lp.auto), + lp.GlobalArg("tgt_expansions", None, + shape=("ntgt_level_boxes", ncoeff_tgt), offset=lp.auto), + lp.GlobalArg("translation_classes_lists", + None, shape=None, strides=(1,), offset=np.int32), + lp.GlobalArg("m2l_precomputed_exprs", None, + shape=(nprecomputed_exprs*ntranslation_classes,), offset=lp.auto), + "..." + ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), + name=self.name, + assumptions="ntgt_boxes>=1", + silenced_warnings="write_race(write_expn*)", + default_offset=lp.auto, + fixed_parameters=dict(dim=self.dim, + nprecomputed_exprs=nprecomputed_exprs), + lang_version=MOST_RECENT_LANGUAGE_VERSION + ) + + for expn in [self.src_expansion, self.tgt_expansion]: + loopy_knl = expn.prepare_loopy_kernel(loopy_knl) + + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") + loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) + + return loopy_knl + +# }}} # {{{ translation from a box's children diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index 4af1a313..4249e947 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -24,7 +24,7 @@ THE SOFTWARE. from six.moves import range, zip import sumpy.symbolic as sym -from pytools import single_valued +from pytools import single_valued, memoize_method from sumpy.expansion import ( ExpansionBase, VolumeTaylorExpansion, LaplaceConformingVolumeTaylorExpansion, @@ -112,7 +112,6 @@ class LineTaylorLocalExpansion(LocalExpansionBase): # }}} - # {{{ volume taylor class VolumeTaylorLocalExpansionBase(LocalExpansionBase): @@ -163,8 +162,94 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): knl = self.kernel return knl.postprocess_at_target(result, bvec) - def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, + + @memoize_method + def m2l_global_precompute_mis(self, src_expansion): + from pytools import generate_nonnegative_integer_tuples_below as gnitb + + max_mi = [0]*self.dim + for i in range(self.dim): + max_mi[i] = max(mi[i] for mi in + src_expansion.get_coefficient_identifiers()) + max_mi[i] += max(mi[i] for mi in + self.get_coefficient_identifiers()) + + toeplitz_matrix_coeffs = list(gnitb([m + 1 for m in max_mi])) + return toeplitz_matrix_coeffs, max_mi + + @memoize_method + def m2l_global_precompute_exprs(self, src_expansion, src_rscale, dvec, tgt_rscale, sac, use_fft=False): + # We know the general form of the multipole expansion is: + # + # coeff0 * diff(kernel(src - c1), mi0) + + # coeff1 * diff(kernel(src - c1, mi1) + ... + # + # To get the local expansion coefficients, we take derivatives of + # the multipole expansion. For eg: the coefficient w.r.t mir is + # + # coeff0 * diff(kernel(c2 - c1), mi0 + mir) + + # coeff1 * diff(kernel(c2 - c1, mi1 + mir) + ... + # + # The derivatives above depends only on `c2 - c1` and can be precomputed + # globally as there are only a finite number of values for `c2 - c1` for + # m2l. + + if not self.use_rscale: + src_rscale = 1 + tgt_rscale = 1 + + from sumpy.tools import add_mi + + toeplitz_matrix_coeffs, max_mi = self.m2l_global_precompute_mis(self, src_expansion) + toeplitz_matrix_ident_to_index = dict((ident, i) for i, ident in + enumerate(toeplitz_matrix_coeffs)) + + # Create a expansion terms wrangler for derivatives up to order + # (tgt order)+(src order) including a corresponding reduction matrix + # For eg: 2D full Taylor Laplace, this is (n, m), + # n+m<=2*p, n<=2*p, m<=2*p + srcplusderiv_terms_wrangler = \ + src_expansion.expansion_terms_wrangler.copy( + order=self.order + src_expansion.order, max_mi=tuple(max_mi)) + srcplusderiv_full_coeff_ids = \ + srcplusderiv_terms_wrangler.get_full_coefficient_identifiers() + srcplusderiv_ident_to_index = dict((ident, i) for i, ident in + enumerate(srcplusderiv_full_coeff_ids)) + + # The vector has the kernel derivatives and depends only on the distance + # between the two centers + taker = src_expansion.get_kernel_derivative_taker(dvec, src_rscale, sac) + vector_stored = [] + # Calculate the kernel derivatives for the compressed set + for term in \ + srcplusderiv_terms_wrangler.get_coefficient_identifiers(): + kernel_deriv = taker.diff(term) + vector_stored.append(kernel_deriv) + # Calculate the kernel derivatives for the full set + vector_full = \ + srcplusderiv_terms_wrangler.get_full_kernel_derivatives_from_stored( + vector_stored, src_rscale) + + needed_vector_terms = set([]) + # For eg: 2D full Taylor Laplace, we only need kernel derivatives + # (n1+n2, m1+m2), n1+m1<=p, n2+m2<=p + for tgt_deriv in self.get_coefficient_identifiers(): + for src_deriv in src_expansion.get_coefficient_identifiers(): + needed = add_mi(src_deriv, tgt_deriv) + needed_vector_terms.add(needed) + + vector = [0]*len(toeplitz_matrix_coeffs) + for i, term in enumerate(toeplitz_matrix_coeffs): + if term in srcplusderiv_ident_to_index: + vector[i] = add_to_sac(sac, + vector_full[srcplusderiv_ident_to_index[term]]) + + return vector + + + def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, + dvec, tgt_rscale, sac, precomputed_exprs=None, use_fft=False): logger.info("building translation operator: %s(%d) -> %s(%d): start" % (type(src_expansion).__name__, src_expansion.order, @@ -176,70 +261,18 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): tgt_rscale = 1 from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansionBase - if isinstance(src_expansion, VolumeTaylorMultipoleExpansionBase): - # We know the general form of the multipole expansion is: - # - # coeff0 * diff(kernel, mi0) + coeff1 * diff(kernel, mi1) + ... - # - # To get the local expansion coefficients, we take derivatives of - # the multipole expansion. - # - # This code speeds up derivative taking by caching all kernel - # derivatives. - - from sumpy.tools import add_mi - from pytools import generate_nonnegative_integer_tuples_below as gnitb - - max_mi = [0]*self.dim - for i in range(self.dim): - max_mi[i] = max(mi[i] for mi in - src_expansion.get_coefficient_identifiers()) - max_mi[i] += max(mi[i] for mi in - self.get_coefficient_identifiers()) - toeplitz_matrix_coeffs = list(gnitb([m + 1 for m in max_mi])) + if isinstance(src_expansion, VolumeTaylorMultipoleExpansionBase): + toeplitz_matrix_coeffs, max_mi = \ + self.m2l_global_precompute_mis(src_expansion) toeplitz_matrix_ident_to_index = dict((ident, i) for i, ident in enumerate(toeplitz_matrix_coeffs)) - # Create a expansion terms wrangler for derivatives up to order - # (tgt order)+(src order) including a corresponding reduction matrix - # For eg: 2D full Taylor Laplace, this is (n, m), - # n+m<=2*p, n<=2*p, m<=2*p - srcplusderiv_terms_wrangler = \ - src_expansion.expansion_terms_wrangler.copy( - order=self.order + src_expansion.order, max_mi=tuple(max_mi)) - srcplusderiv_full_coeff_ids = \ - srcplusderiv_terms_wrangler.get_full_coefficient_identifiers() - srcplusderiv_ident_to_index = dict((ident, i) for i, ident in - enumerate(srcplusderiv_full_coeff_ids)) - - # The vector has the kernel derivatives and depends only on the distance - # between the two centers - taker = src_expansion.get_kernel_derivative_taker(dvec, src_rscale, sac) - vector_stored = [] - # Calculate the kernel derivatives for the compressed set - for term in \ - srcplusderiv_terms_wrangler.get_coefficient_identifiers(): - kernel_deriv = taker.diff(term) - vector_stored.append(kernel_deriv) - # Calculate the kernel derivatives for the full set - vector_full = \ - srcplusderiv_terms_wrangler.get_full_kernel_derivatives_from_stored( - vector_stored, src_rscale) - - needed_vector_terms = set([]) - # For eg: 2D full Taylor Laplace, we only need kernel derivatives - # (n1+n2, m1+m2), n1+m1<=p, n2+m2<=p - for tgt_deriv in self.get_coefficient_identifiers(): - for src_deriv in src_expansion.get_coefficient_identifiers(): - needed = add_mi(src_deriv, tgt_deriv) - needed_vector_terms.add(needed) - - vector = [0]*len(toeplitz_matrix_coeffs) - for i, term in enumerate(toeplitz_matrix_coeffs): - if term in srcplusderiv_ident_to_index: - vector[i] = add_to_sac(sac, - vector_full[srcplusderiv_ident_to_index[term]]) + if precomputed_exprs is None: + derivatives = self.m2l_global_precompute_exprs(src_expansion, src_rscale, + dvec, tgt_rscale, sac, use_fft=use_fft) + else: + derivatives = precomputed_exprs # Calculate the first row of the upper triangular Toeplitz matrix toeplitz_first_row = [0] * len(toeplitz_matrix_coeffs) @@ -251,10 +284,10 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # Do the matvec if use_fft: - output = fft_toeplitz_upper_triangular(toeplitz_first_row, vector, + output = fft_toeplitz_upper_triangular(toeplitz_first_row, derivatives, sac=sac) else: - output = matvec_toeplitz_upper_triangular(toeplitz_first_row, vector) + output = matvec_toeplitz_upper_triangular(toeplitz_first_row, derivatives) # Filter out the dummy rows and scale them for target result = [] diff --git a/sumpy/fmm.py b/sumpy/fmm.py index a2331756..9611a9cf 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -116,6 +116,18 @@ class SumpyExpansionWranglerCodeContainer(object): self.multipole_expansion(src_order), self.local_expansion(tgt_order)) + @memoize_method + def m2l_optimized(self, src_order, tgt_order): + return E2EFromCSRTranslationInvariant(self.cl_context, + self.multipole_expansion(src_order), + self.local_expansion(tgt_order)) + + @memoize_method + def m2l_optimized_precompute_kernel(self, src_order, tgt_order): + return E2EFromCSRTranslationClassesPrecompute(self.cl_context, + self.multipole_expansion(src_order), + self.local_expansion(tgt_order)) + @memoize_method def l2l(self, src_order, tgt_order): return E2EFromParent(self.cl_context, @@ -196,6 +208,51 @@ class SumpyTimingFuture(object): # }}} +# {{{ translation classes data + +class SumpyTranslationClassesData: + """A class for building and storing additional, optional data for + precomputation of translation classes passed to the expansion wrangler.""" + + def __init__(self, queue, trav, is_translation_per_level=True): + self.queue = queue + self.trav = trav + self.tree = trav.tree + self.is_translation_per_level = is_translation_per_level + + @property + @memoize_method + def translation_classes_builder(self): + from boxtree.translation_classes import TranslationClassesBuilder + return TranslationClassesBuilder(self.queue.context) + + @memoize_method + def build_translation_classes_lists(self): + trav = self.trav.to_device(self.queue) + tree = self.tree.to_device(self.queue) + return self.rotation_classes_builder(self.queue, trav, tree, + is_translation_per_level)[0] + + @memoize_method + def m2l_translation_classes_lists(self): + return (self + .build_rotation_classes_lists() + .from_sep_siblings_translation_classes + .get(self.queue)) + + @memoize_method + def m2l_translation_vectors(self): + return (self + .build_rotation_classes_lists() + .from_sep_siblings_translation_class_to_distance_vector + .get(self.queue)) + + +class SumpyTranslationClassesDataNotSuppliedWarning(UserWarning): + pass + +# }}} + # {{{ expansion wrangler @@ -223,7 +280,8 @@ class SumpyExpansionWrangler(object): def __init__(self, code_container, queue, tree, dtype, fmm_level_to_order, source_extra_kwargs, kernel_extra_kwargs=None, - self_extra_kwargs=None): + self_extra_kwargs=None, + translation_classes_data=None): self.code = code_container self.queue = queue self.tree = tree @@ -253,14 +311,31 @@ class SumpyExpansionWrangler(object): self.extra_kwargs = source_extra_kwargs.copy() self.extra_kwargs.update(self.kernel_extra_kwargs) + if base_kernel.is_translation_invariant: + if translation_classes_data is None: + from warnings import warn + warn( + "List 2 (multipole-to-local) translations will be " + "unoptimized. Supply a translation_classes_data argument to " + "the wrangler for optimized List 2.", + SumpyTranslationClassesDataNotSuppliedWarning, + + stacklevel=2) + self.supports_optimized_m2l = False + else: + self.supports_optimized_m2l = True + else: + self.supports_optimized_m2l = False + + self.translation_classes_data = translation_classes_data + + # {{{ data vector utilities - def _expansions_level_starts(self, order_to_size): + def _expansions_level_starts(self, order_to_size, level_starts): result = [0] for lev in range(self.tree.nlevels): - lev_nboxes = ( - self.tree.level_start_box_nrs[lev+1] - - self.tree.level_start_box_nrs[lev]) + lev_nboxes = level_starts[lev+1] - level_starts[lev] expn_size = order_to_size(self.level_orders[lev]) result.append( @@ -272,12 +347,29 @@ class SumpyExpansionWrangler(object): @memoize_method def multipole_expansions_level_starts(self): return self._expansions_level_starts( - lambda order: len(self.code.multipole_expansion_factory(order))) + lambda order: len(self.code.multipole_expansion(order)), + level_starts=self.tree.level_start_box_nrs) @memoize_method def local_expansions_level_starts(self): return self._expansions_level_starts( - lambda order: len(self.code.local_expansion_factory(order))) + lambda order: len(self.code.local_expansion_factory(order)), + level_starts=self.tree.level_start_box_nrs) + + @memoize_method + def m2l_translation_class_level_start_box_nrs(self): + data = self.translation_classes_data + return data.from_sep_siblings_translation_class_level_starts.get() + + @memoize_method + def m2l_precomputed_exprs_level_starts(self): + def order_to_size(order): + mpole_expn = self.code.multipole_expansion_factory(order) + local_expn = self.code.local_expansion_factory(order) + return len(local_expn.m2l_global_precompute_mis(mpole_expn)[0]) + + return self._expansions_level_starts(order_to_size, + level_starts=self.m2l_translation_class_level_start_box_nrs()) def multipole_expansion_zeros(self): return cl.array.zeros( @@ -291,6 +383,12 @@ class SumpyExpansionWrangler(object): self.local_expansions_level_starts()[-1], dtype=self.dtype) + def m2l_precomputed_exprs_zeros(self): + return cl.array.zeros( + self.queue, + self.m2l_precomputed_exprs_level_starts()[-1], + dtype=self.dtype) + def multipole_expansions_view(self, mpole_exps, level): expn_start, expn_stop = \ self.multipole_expansions_level_starts()[level:level+2] @@ -307,6 +405,16 @@ class SumpyExpansionWrangler(object): return (box_start, local_exps[expn_start:expn_stop].reshape(box_stop-box_start, -1)) + def m2l_precomputed_exprs_view(self, m2l_precomputed_exprs, level): + expn_start, expn_stop = \ + self.m2l_precomputed_exprs_level_starts()[level:level+2] + translation_class_start, translation_class_stop = \ + self.m2l_translation_class_level_start_box_nrs()[level:level+2] + + exprs_level = m2l_precomputed_exprs[expn_start:expn_stop] + return (translation_class_start, exprs_level.reshape( + translation_class_stop - translation_class_start, -1)) + def output_zeros(self): from pytools.obj_array import make_obj_array return make_obj_array([ @@ -470,6 +578,50 @@ class SumpyExpansionWrangler(object): return (pot, SumpyTimingFuture(self.queue, events)) + @memoize_method + def _multipole_to_local_precompute(self, src_rscale): + m2l_precomputed_exprs = self.m2l_precomputed_exprs_zeros() + events = [] + for lev in range(self.tree.nlevels): + start, stop = level_start_target_box_nrs[lev:lev+2] + if start == stop: + events.append([]) + continue + + order = self.level_orders[lev] + precompute_kernel = self.code.m2l_precompute(order) + + translation_classes_level_start, precomputed_exprs_view = \ + self.m2l_precomputed_exprs_view(m2l_precomputed_exprs, lev) + + evt, _ = precompute_kernel( + self.queue, + src_rscale=src_rscale, + translation_classes_level_start=translation_classes_level_start, + precomputed_exprs = precomputed_exprs_view, + **self.kernel_extra_kwargs + ) + + events.append([evt]) + + return m2l_precomputed_exprs, events + + def multipole_to_local_precompute(self, info, lev): + if not self.supports_optimized_m2l: + return + src_rscale = info["src_rscale"] + m2l_precomputed_exprs, events = self._multipole_to_local_precompute(self, src_rscale) + _, precomputed_exprs_view = self.m2l_precomputed_exprs_view(m2l_precomputed_exprs, lev) + info["m2l_precomputed_exprs"] = precomputed_exprs_view + info["wait_for"] = events[lev] + + @property + def m2l_class(self): + if self.supports_optimized_m2l: + return self.code.m2l_optimized + else: + return self.code.m2l + def multipole_to_local(self, level_start_target_box_nrs, target_boxes, src_box_starts, src_box_lists, @@ -477,23 +629,20 @@ class SumpyExpansionWrangler(object): local_exps = self.local_expansion_zeros() events = [] - for lev in range(self.tree.nlevels): start, stop = level_start_target_box_nrs[lev:lev+2] if start == stop: continue order = self.level_orders[lev] - m2l = self.code.m2l(order, order) + m2l = self.m2l_class(order, order) source_level_start_ibox, source_mpoles_view = \ self.multipole_expansions_view(mpole_exps, lev) target_level_start_ibox, target_local_exps_view = \ self.local_expansions_view(local_exps, lev) - evt, (local_exps_res,) = m2l( - self.queue, - + kwargs = dict( src_expansions=source_mpoles_view, src_base_ibox=source_level_start_ibox, tgt_expansions=target_local_exps_view, @@ -507,7 +656,10 @@ class SumpyExpansionWrangler(object): src_rscale=level_to_rscale(self.tree, lev), tgt_rscale=level_to_rscale(self.tree, lev), - **self.kernel_extra_kwargs) + **self.kernel_extra_kwargs + ) + self.multipole_to_local_precompute(info, lev) + evt, _ = m2l(self.queue, **kwargs) events.append(evt) return (local_exps, SumpyTimingFuture(self.queue, events)) @@ -523,6 +675,20 @@ class SumpyExpansionWrangler(object): wait_for = mpole_exps.events + for isrc_level, ssn in enumerate(source_boxes_by_level): + if len(target_boxes_by_source_level[isrc_level]) == 0: + continue + + m2p = self.code.m2p(self.level_orders[isrc_level]) + + source_level_start_ibox, source_mpoles_view = \ + self.multipole_expansions_view(mpole_exps, isrc_level) + kwargs.update(self.box_target_list_kwargs()) + + events = [] + + wait_for = mpole_exps.events + for isrc_level, ssn in enumerate(source_boxes_by_level): if len(target_boxes_by_source_level[isrc_level]) == 0: continue diff --git a/sumpy/kernel.py b/sumpy/kernel.py index 092cbb49..e8d046b9 100644 --- a/sumpy/kernel.py +++ b/sumpy/kernel.py @@ -356,6 +356,8 @@ class ExpressionKernel(Kernel): key_builder.rec(key_hash, value) mapper_method = "map_expression_kernel" + # TODO: Allow kernels that are not translation invariant + is_translation_invariant = True one_kernel_2d = ExpressionKernel( @@ -401,6 +403,7 @@ class LaplaceKernel(ExpressionKernel): return "LapKnl%dD" % self.dim mapper_method = "map_laplace_kernel" + is_translation_invariant = True class BiharmonicKernel(ExpressionKernel): @@ -437,6 +440,7 @@ class BiharmonicKernel(ExpressionKernel): return "BiharmKnl%dD" % self.dim mapper_method = "map_biharmonic_kernel" + is_translation_invariant = True class HelmholtzKernel(ExpressionKernel): @@ -507,6 +511,7 @@ class HelmholtzKernel(ExpressionKernel): )] mapper_method = "map_helmholtz_kernel" + is_translation_invariant = True class YukawaKernel(ExpressionKernel): @@ -565,6 +570,7 @@ class YukawaKernel(ExpressionKernel): )] mapper_method = "map_yukawa_kernel" + is_translation_invariant = True class StokesletKernel(ExpressionKernel): @@ -632,6 +638,7 @@ class StokesletKernel(ExpressionKernel): )] mapper_method = "map_stokeslet_kernel" + is_translation_invariant = True class StressletKernel(ExpressionKernel): diff --git a/sumpy/tools.py b/sumpy/tools.py index 207e42e5..63c61058 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -1052,6 +1052,16 @@ def _binary_reverse(n, bits): return int(b[::-1], 2) +def _padded_fft_size(n): + if n < 2: + return n + b = n.bit_length() - 1 + if n & (n - 1): # not a power of 2 + b += 1 + n = 2**b + return n + + def fft(seq, inverse=False, sac=None): """ Return the discrete fourier transform of the sequence seq. @@ -1064,10 +1074,8 @@ def fft(seq, inverse=False, sac=None): if n < 2: return a + n = _padded_fft_size(n) b = n.bit_length() - 1 - if n & (n - 1): # not a power of 2 - b += 1 - n = 2**b a += [(0, 0)]*(n - len(a)) for i in range(1, n): @@ -1119,10 +1127,14 @@ def fft_toeplitz_upper_triangular(first_row, x, sac=None): v_fft = fft([(a, 0) for a in v], sac) x_fft = fft([(a, 0) for a in x], sac) res_fft = [_complex_tuple_mul(a, b, sac) for a, b in zip(v_fft, x_fft)] - res = fft(res_fft, inverse=True, sac) + res = fft(res_fft, inverse=True, sac=sac) return [a for a, _ in reversed(res[:n])] +def fft_toeplitz_upper_triangular_lwork(n): + return _padded_fft_size(2*n - 1) + + def matvec_toeplitz_upper_triangular(first_row, vector): n = len(first_row) assert len(vector) == n -- GitLab From 76353a83b60c77279f4dded98ba00e16bd5473dc Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Sun, 7 Jun 2020 21:46:25 -0500 Subject: [PATCH 46/48] More m2l precompute --- sumpy/__init__.py | 3 ++- sumpy/expansion/local.py | 5 +---- sumpy/fmm.py | 48 +++++++++++++++++++++++++--------------- test/test_fmm.py | 6 +++-- 4 files changed, 37 insertions(+), 25 deletions(-) diff --git a/sumpy/__init__.py b/sumpy/__init__.py index fd0fc515..1dd79a4a 100644 --- a/sumpy/__init__.py +++ b/sumpy/__init__.py @@ -26,7 +26,8 @@ import os from sumpy.p2p import P2P, P2PFromCSR from sumpy.p2e import P2EFromSingleBox, P2EFromCSR from sumpy.e2p import E2PFromSingleBox, E2PFromCSR -from sumpy.e2e import E2EFromCSR, E2EFromChildren, E2EFromParent +from sumpy.e2e import (E2EFromCSR, E2EFromChildren, E2EFromParent, + E2EFromCSRTranslationInvariant) from sumpy.version import VERSION_TEXT from pytools.persistent_dict import WriteOncePersistentDict diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index 4249e947..1845cd3f 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -162,8 +162,6 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): knl = self.kernel return knl.postprocess_at_target(result, bvec) - - @memoize_method def m2l_global_precompute_mis(self, src_expansion): from pytools import generate_nonnegative_integer_tuples_below as gnitb @@ -177,7 +175,6 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): toeplitz_matrix_coeffs = list(gnitb([m + 1 for m in max_mi])) return toeplitz_matrix_coeffs, max_mi - @memoize_method def m2l_global_precompute_exprs(self, src_expansion, src_rscale, dvec, tgt_rscale, sac, use_fft=False): # We know the general form of the multipole expansion is: @@ -201,7 +198,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): from sumpy.tools import add_mi - toeplitz_matrix_coeffs, max_mi = self.m2l_global_precompute_mis(self, src_expansion) + toeplitz_matrix_coeffs, max_mi = self.m2l_global_precompute_mis(src_expansion) toeplitz_matrix_ident_to_index = dict((ident, i) for i, ident in enumerate(toeplitz_matrix_coeffs)) diff --git a/sumpy/fmm.py b/sumpy/fmm.py index 9611a9cf..6f4192bc 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -40,7 +40,8 @@ from sumpy import ( P2EFromSingleBox, P2EFromCSR, E2PFromSingleBox, E2PFromCSR, P2PFromCSR, - E2EFromCSR, E2EFromChildren, E2EFromParent) + E2EFromCSR, E2EFromCSRTranslationInvariant, + E2EFromChildren, E2EFromParent) def level_to_rscale(tree, level): @@ -154,9 +155,11 @@ class SumpyExpansionWranglerCodeContainer(object): def get_wrangler(self, queue, tree, dtype, fmm_level_to_order, source_extra_kwargs={}, kernel_extra_kwargs=None, - self_extra_kwargs=None): + self_extra_kwargs=None, + translation_classes_data=None): return SumpyExpansionWrangler(self, queue, tree, dtype, fmm_level_to_order, - source_extra_kwargs, kernel_extra_kwargs, self_extra_kwargs) + source_extra_kwargs, kernel_extra_kwargs, self_extra_kwargs, + translation_classes_data=translation_classes_data) # }}} @@ -230,22 +233,26 @@ class SumpyTranslationClassesData: def build_translation_classes_lists(self): trav = self.trav.to_device(self.queue) tree = self.tree.to_device(self.queue) - return self.rotation_classes_builder(self.queue, trav, tree, - is_translation_per_level)[0] + return self.translation_classes_builder(self.queue, trav, tree, + is_translation_per_level=self.is_translation_per_level)[0] @memoize_method def m2l_translation_classes_lists(self): return (self - .build_rotation_classes_lists() - .from_sep_siblings_translation_classes - .get(self.queue)) + .build_translation_classes_lists() + .from_sep_siblings_translation_classes) @memoize_method def m2l_translation_vectors(self): return (self - .build_rotation_classes_lists() - .from_sep_siblings_translation_class_to_distance_vector - .get(self.queue)) + .build_translation_classes_lists() + .from_sep_siblings_translation_class_to_distance_vector) + + @memoize_method + def m2l_translation_classes_level_starts(self): + return (self + .build_translation_classes_lists() + .from_sep_siblings_translation_classes_level_starts) class SumpyTranslationClassesDataNotSuppliedWarning(UserWarning): @@ -359,7 +366,7 @@ class SumpyExpansionWrangler(object): @memoize_method def m2l_translation_class_level_start_box_nrs(self): data = self.translation_classes_data - return data.from_sep_siblings_translation_class_level_starts.get() + return data.m2l_translation_classes_level_starts().get(self.queue) @memoize_method def m2l_precomputed_exprs_level_starts(self): @@ -579,7 +586,7 @@ class SumpyExpansionWrangler(object): return (pot, SumpyTimingFuture(self.queue, events)) @memoize_method - def _multipole_to_local_precompute(self, src_rscale): + def _multipole_to_local_precompute(self, src_rscale, level_start_target_box_nrs): m2l_precomputed_exprs = self.m2l_precomputed_exprs_zeros() events = [] for lev in range(self.tree.nlevels): @@ -589,7 +596,8 @@ class SumpyExpansionWrangler(object): continue order = self.level_orders[lev] - precompute_kernel = self.code.m2l_precompute(order) + precompute_kernel = \ + self.code.m2l_optimized_precompute_kernel(order, order) translation_classes_level_start, precomputed_exprs_view = \ self.m2l_precomputed_exprs_view(m2l_precomputed_exprs, lev) @@ -606,12 +614,15 @@ class SumpyExpansionWrangler(object): return m2l_precomputed_exprs, events - def multipole_to_local_precompute(self, info, lev): + def multipole_to_local_precompute(self, info, level_start_target_box_nrs, lev): if not self.supports_optimized_m2l: return src_rscale = info["src_rscale"] - m2l_precomputed_exprs, events = self._multipole_to_local_precompute(self, src_rscale) - _, precomputed_exprs_view = self.m2l_precomputed_exprs_view(m2l_precomputed_exprs, lev) + m2l_precomputed_exprs, events = \ + self._multipole_to_local_precompute(src_rscale, + level_start_target_box_nrs) + _, precomputed_exprs_view = \ + self.m2l_precomputed_exprs_view(m2l_precomputed_exprs, lev) info["m2l_precomputed_exprs"] = precomputed_exprs_view info["wait_for"] = events[lev] @@ -658,7 +669,8 @@ class SumpyExpansionWrangler(object): **self.kernel_extra_kwargs ) - self.multipole_to_local_precompute(info, lev) + self.multipole_to_local_precompute(kwargs, level_start_target_box_nrs, + lev) evt, _ = m2l(self.queue, **kwargs) events.append(evt) diff --git a/test/test_fmm.py b/test/test_fmm.py index 9a4a4e91..baa7f5ef 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -170,7 +170,8 @@ def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class): for order in order_values: out_kernels = [knl] - from sumpy.fmm import SumpyExpansionWranglerCodeContainer + from sumpy.fmm import (SumpyExpansionWranglerCodeContainer, + SumpyTranslationClassesData) wcc = SumpyExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), @@ -178,7 +179,8 @@ def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class): out_kernels) wrangler = wcc.get_wrangler(queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order, - kernel_extra_kwargs=extra_kwargs) + kernel_extra_kwargs=extra_kwargs, + translation_classes_data=SumpyTranslationClassesData(queue, trav)) from boxtree.fmm import drive_fmm -- GitLab From bd1d17db93a4a279ad1c8e057718b05022faa02d Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Mon, 8 Jun 2020 03:07:13 -0500 Subject: [PATCH 47/48] First working version of m2l precompute --- sumpy/__init__.py | 5 +- sumpy/e2e.py | 164 +++++++++++++++++---------------------- sumpy/expansion/local.py | 21 +++-- sumpy/fmm.py | 50 ++++++++---- 4 files changed, 118 insertions(+), 122 deletions(-) diff --git a/sumpy/__init__.py b/sumpy/__init__.py index 1dd79a4a..d7eb1bf2 100644 --- a/sumpy/__init__.py +++ b/sumpy/__init__.py @@ -27,7 +27,7 @@ from sumpy.p2p import P2P, P2PFromCSR from sumpy.p2e import P2EFromSingleBox, P2EFromCSR from sumpy.e2p import E2PFromSingleBox, E2PFromCSR from sumpy.e2e import (E2EFromCSR, E2EFromChildren, E2EFromParent, - E2EFromCSRTranslationInvariant) + E2EFromCSRTranslationInvariant, E2EFromCSRTranslationClassesPrecompute) from sumpy.version import VERSION_TEXT from pytools.persistent_dict import WriteOncePersistentDict @@ -35,7 +35,8 @@ __all__ = [ "P2P", "P2PFromCSR", "P2EFromSingleBox", "P2EFromCSR", "E2PFromSingleBox", "E2PFromCSR", - "E2EFromCSR", "E2EFromChildren", "E2EFromParent"] + "E2EFromCSR", "E2EFromChildren", "E2EFromParent", + "E2EFromCSRTranslationInvariant", "E2EFromCSRTranslationClassesPrecompute"] code_cache = WriteOncePersistentDict("sumpy-code-cache-v6-"+VERSION_TEXT) diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 8a37428f..b23cd2fa 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -255,6 +255,7 @@ class E2EFromCSR(E2EBase): # }}} + # {{{ class E2EFromCSRTranslationInvariant(E2EFromCSR): @@ -273,7 +274,8 @@ class E2EFromCSRTranslationInvariant(E2EFromCSR): tgt_rscale = sym.Symbol("tgt_rscale") - nprecomputed_exprs = len(self.m2l_global_precompute_mis(self.src_expansion)) + nprecomputed_exprs = \ + len(self.tgt_expansion.m2l_global_precompute_mis(self.src_expansion)[0]) precomputed_exprs = [sym.Symbol("precomputed_expr%d" % i) for i in range(nprecomputed_exprs)] @@ -302,7 +304,8 @@ class E2EFromCSRTranslationInvariant(E2EFromCSR): def get_kernel(self): ncoeff_src = len(self.src_expansion) ncoeff_tgt = len(self.tgt_expansion) - nprecomputed_exprs = len(self.m2l_global_precompute_mis(self.src_expansion)) + nprecomputed_exprs = \ + len(self.tgt_expansion.m2l_global_precompute_mis(self.src_expansion)[0]) # To clarify terminology: # @@ -334,14 +337,15 @@ class E2EFromCSRTranslationInvariant(E2EFromCSR): <> src_center[idim] = centers[idim, src_ibox] {dup=idim} <> d[idim] = tgt_center[idim] - src_center[idim] \ {dup=idim} - <> translation_class = translation_classes_lists[isrc_box] - <> translation_class_idx_start = translation_class * \ - nprecomputed_exprs + <> translation_class = \ + m2l_translation_classes_lists[isrc_box] + <> translation_class_rel = translation_class - \ + translation_classes_level_start """] + [""" <> precomputed_expr{idx} = \ - m2l_precomputed_exprs[translation_class_idx_start + {idx}] - """.format(idx=idx) for idx in range(nprecomputed_exprs) - ] + [""" + m2l_precomputed_exprs[translation_class_rel, {idx}] + """.format(idx=idx) for idx in range( + nprecomputed_exprs)] + [""" <> src_coeff{coeffidx} = \ src_expansions[src_ibox - src_base_ibox, {coeffidx}] \ {{dep=read_src_ibox}} @@ -366,14 +370,20 @@ class E2EFromCSRTranslationInvariant(E2EFromCSR): np.int32), lp.ValueArg("nsrc_level_boxes,ntgt_level_boxes", np.int32), + lp.ValueArg("translation_classes_level_start", + np.int32), lp.GlobalArg("src_expansions", None, shape=("nsrc_level_boxes", ncoeff_src), offset=lp.auto), lp.GlobalArg("tgt_expansions", None, shape=("ntgt_level_boxes", ncoeff_tgt), offset=lp.auto), - lp.GlobalArg("translation_classes_lists", - None, shape=None, strides=(1,), offset=np.int32), + lp.ValueArg("ntranslation_classes, ntranslation_classes_lists", + np.int32), + lp.GlobalArg("m2l_translation_classes_lists", np.int32, + shape=("ntranslation_classes_lists"), strides=(1,), + offset=lp.auto), lp.GlobalArg("m2l_precomputed_exprs", None, - shape=(nprecomputed_exprs*ntranslation_classes,), offset=lp.auto), + shape=("ntranslation_classes", nprecomputed_exprs), + offset=lp.auto), "..." ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, @@ -399,31 +409,22 @@ class E2EFromCSRTranslationClassesPrecompute(E2EFromCSR): derivatives. """ default_name = "e2e_from_csr_translation_classes_precompute" - + def get_translation_loopy_insns(self): from sumpy.symbolic import make_sym_vector dvec = make_sym_vector("d", self.dim) - src_coeff_exprs = [sym.Symbol("src_coeff%d" % i) - for i in range(len(self.src_expansion))] src_rscale = sym.Symbol("src_rscale") - tgt_rscale = sym.Symbol("tgt_rscale") - nprecomputed_exprs = len(self.m2l_global_precompute_exprs(self.src_expansion)) - - precomputed_exprs = [sym.Symbol("precomputed_expr%d" % i) - for i in range(nprecomputed_exprs)] - from sumpy.assignment_collection import SymbolicAssignmentCollection sac = SymbolicAssignmentCollection() tgt_coeff_names = [ - sac.assign_unique("coeff%d" % i, coeff_i) + sac.assign_unique("precomputed_expr%d" % i, coeff_i) for i, coeff_i in enumerate( - self.tgt_expansion.translate_from( - self.src_expansion, src_coeff_exprs, src_rscale, - dvec, tgt_rscale, sac, - precomputed_exprs=precomputed_exprs))] + self.tgt_expansion.m2l_global_precompute_exprs( + self.src_expansion, src_rscale, + dvec, tgt_rscale, sac))] sac.run_global_cse() @@ -437,88 +438,40 @@ class E2EFromCSRTranslationClassesPrecompute(E2EFromCSR): ) def get_kernel(self): - ncoeff_src = len(self.src_expansion) - ncoeff_tgt = len(self.tgt_expansion) - nprecomputed_exprs = len(self.m2l_global_precompute_mis(self.src_expansion)) - - # To clarify terminology: - # - # isrc_box -> The index in a list of (in this case, source) boxes - # src_ibox -> The (global) box number for the (in this case, source) box - # - # (same for itgt_box, tgt_ibox) - + nprecomputed_exprs = \ + len(self.tgt_expansion.m2l_global_precompute_mis(self.src_expansion)[0]) from sumpy.tools import gather_loopy_arguments loopy_knl = lp.make_kernel( [ - "{[itgt_box]: 0<=itgt_box tgt_ibox = target_boxes[itgt_box] - - <> tgt_center[idim] = centers[idim, tgt_ibox] \ - - <> isrc_start = src_box_starts[itgt_box] - <> isrc_stop = src_box_starts[itgt_box+1] - - for isrc_box - <> src_ibox = src_box_lists[isrc_box] \ - {id=read_src_ibox} + for itr_class + <> d[idim] = m2l_translation_vectors[idim, \ + itr_class + translation_classes_level_start] {dup=idim} - <> src_center[idim] = centers[idim, src_ibox] {dup=idim} - <> d[idim] = tgt_center[idim] - src_center[idim] \ - {dup=idim} - <> translation_class = translation_classes_lists[isrc_box] - <> translation_class_idx_start = translation_class * \ - nprecomputed_exprs - """] + [""" - <> precomputed_expr{idx} = \ - m2l_precomputed_exprs[translation_class_idx_start + {idx}] - """.format(idx=idx) for idx in range(nprecomputed_exprs) - ] + [""" - <> src_coeff{coeffidx} = \ - src_expansions[src_ibox - src_base_ibox, {coeffidx}] \ - {{dep=read_src_ibox}} - """.format(coeffidx=i) for i in range(ncoeff_src)] + [ - - ] + self.get_translation_loopy_insns() + [""" - end - - """] + [""" - tgt_expansions[tgt_ibox - tgt_base_ibox, {coeffidx}] = \ - simul_reduce(sum, isrc_box, coeff{coeffidx}) \ - {{id_prefix=write_expn}} - """.format(coeffidx=i) for i in range(ncoeff_tgt)] + [""" + """] + self.get_translation_loopy_insns() + [""" + m2l_precomputed_exprs[itr_class, {idx}] = precomputed_expr{idx} + """.format(idx=i) for i in range(nprecomputed_exprs)] + [""" end """], [ - lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), - lp.ValueArg("src_rscale,tgt_rscale", None), - lp.GlobalArg("src_box_starts, src_box_lists", - None, shape=None, strides=(1,), offset=lp.auto), - lp.ValueArg("aligned_nboxes,tgt_base_ibox,src_base_ibox", - np.int32), - lp.ValueArg("nsrc_level_boxes,ntgt_level_boxes", - np.int32), - lp.GlobalArg("src_expansions", None, - shape=("nsrc_level_boxes", ncoeff_src), offset=lp.auto), - lp.GlobalArg("tgt_expansions", None, - shape=("ntgt_level_boxes", ncoeff_tgt), offset=lp.auto), - lp.GlobalArg("translation_classes_lists", - None, shape=None, strides=(1,), offset=np.int32), + lp.ValueArg("src_rscale", None), lp.GlobalArg("m2l_precomputed_exprs", None, - shape=(nprecomputed_exprs*ntranslation_classes,), offset=lp.auto), + shape=("ntranslation_classes", nprecomputed_exprs), + offset=lp.auto), + lp.GlobalArg("m2l_translation_vectors", None, + shape=("dim", "ntranslation_vectors")), + lp.ValueArg("ntranslation_classes", np.int32), + lp.ValueArg("ntranslation_vectors", np.int32), + lp.ValueArg("translation_classes_level_start", np.int32), "..." ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, - assumptions="ntgt_boxes>=1", - silenced_warnings="write_race(write_expn*)", + assumptions="ntranslation_classes>=1", default_offset=lp.auto, - fixed_parameters=dict(dim=self.dim, - nprecomputed_exprs=nprecomputed_exprs), + fixed_parameters=dict(dim=self.dim), lang_version=MOST_RECENT_LANGUAGE_VERSION ) @@ -530,8 +483,35 @@ class E2EFromCSRTranslationClassesPrecompute(E2EFromCSR): return loopy_knl + def get_optimized_kernel(self): + # FIXME + knl = self.get_kernel() + knl = lp.split_iname(knl, "itr_class", 16, outer_tag="g.0") + return knl + + def __call__(self, queue, **kwargs): + """ + :arg src_rscale: + :arg translation_classes_level_start: + :arg ntranslation_classes: + :arg m2l_precomputed_exprs: + :arg m2l_translation_vectors: + """ + knl = self.get_cached_optimized_kernel() + + m2l_translation_vectors = kwargs.pop("m2l_translation_vectors") + # "1" may be passed for rscale, which won't have its type + # meaningfully inferred. Make the type of rscale explicit. + src_rscale = m2l_translation_vectors.dtype.type(kwargs.pop("src_rscale")) + + return knl(queue, + src_rscale=src_rscale, + m2l_translation_vectors=m2l_translation_vectors, + **kwargs) + # }}} + # {{{ translation from a box's children class E2EFromChildren(E2EBase): diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index 2cba448b..40b02ead 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -24,7 +24,7 @@ THE SOFTWARE. from six.moves import range, zip import sumpy.symbolic as sym -from pytools import single_valued, memoize_method +from pytools import single_valued from sumpy.expansion import ( ExpansionBase, VolumeTaylorExpansion, LaplaceConformingVolumeTaylorExpansion, @@ -112,6 +112,7 @@ class LineTaylorLocalExpansion(LocalExpansionBase): # }}} + # {{{ volume taylor class VolumeTaylorLocalExpansionBase(LocalExpansionBase): @@ -194,13 +195,11 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): if not self.use_rscale: src_rscale = 1 - tgt_rscale = 1 from sumpy.tools import add_mi - toeplitz_matrix_coeffs, max_mi = self.m2l_global_precompute_mis(src_expansion) - toeplitz_matrix_ident_to_index = dict((ident, i) for i, ident in - enumerate(toeplitz_matrix_coeffs)) + toeplitz_matrix_coeffs, max_mi = \ + self.m2l_global_precompute_mis(src_expansion) # Create a expansion terms wrangler for derivatives up to order # (tgt order)+(src order) including a corresponding reduction matrix @@ -244,7 +243,6 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): return vector - def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, dvec, tgt_rscale, sac, precomputed_exprs=None, use_fft=False): logger.info("building translation operator: %s(%d) -> %s(%d): start" @@ -266,8 +264,8 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): enumerate(toeplitz_matrix_coeffs)) if precomputed_exprs is None: - derivatives = self.m2l_global_precompute_exprs(src_expansion, src_rscale, - dvec, tgt_rscale, sac, use_fft=use_fft) + derivatives = self.m2l_global_precompute_exprs(src_expansion, + src_rscale, dvec, tgt_rscale, sac, use_fft=use_fft) else: derivatives = precomputed_exprs @@ -281,10 +279,11 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # Do the matvec if use_fft: - output = fft_toeplitz_upper_triangular(toeplitz_first_row, derivatives, - sac=sac) + output = fft_toeplitz_upper_triangular(toeplitz_first_row, + derivatives, sac=sac) else: - output = matvec_toeplitz_upper_triangular(toeplitz_first_row, derivatives) + output = matvec_toeplitz_upper_triangular(toeplitz_first_row, + derivatives) # Filter out the dummy rows and scale them for target result = [] diff --git a/sumpy/fmm.py b/sumpy/fmm.py index 6f4192bc..ff355b06 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -40,7 +40,8 @@ from sumpy import ( P2EFromSingleBox, P2EFromCSR, E2PFromSingleBox, E2PFromCSR, P2PFromCSR, - E2EFromCSR, E2EFromCSRTranslationInvariant, + E2EFromCSR, + E2EFromCSRTranslationInvariant, E2EFromCSRTranslationClassesPrecompute, E2EFromChildren, E2EFromParent) @@ -211,6 +212,7 @@ class SumpyTimingFuture(object): # }}} + # {{{ translation classes data class SumpyTranslationClassesData: @@ -336,7 +338,6 @@ class SumpyExpansionWrangler(object): self.translation_classes_data = translation_classes_data - # {{{ data vector utilities def _expansions_level_starts(self, order_to_size, level_starts): @@ -367,7 +368,7 @@ class SumpyExpansionWrangler(object): def m2l_translation_class_level_start_box_nrs(self): data = self.translation_classes_data return data.m2l_translation_classes_level_starts().get(self.queue) - + @memoize_method def m2l_precomputed_exprs_level_starts(self): def order_to_size(order): @@ -586,15 +587,10 @@ class SumpyExpansionWrangler(object): return (pot, SumpyTimingFuture(self.queue, events)) @memoize_method - def _multipole_to_local_precompute(self, src_rscale, level_start_target_box_nrs): + def _multipole_to_local_precompute(self, src_rscale): m2l_precomputed_exprs = self.m2l_precomputed_exprs_zeros() events = [] for lev in range(self.tree.nlevels): - start, stop = level_start_target_box_nrs[lev:lev+2] - if start == stop: - events.append([]) - continue - order = self.level_orders[lev] precompute_kernel = \ self.code.m2l_optimized_precompute_kernel(order, order) @@ -602,11 +598,23 @@ class SumpyExpansionWrangler(object): translation_classes_level_start, precomputed_exprs_view = \ self.m2l_precomputed_exprs_view(m2l_precomputed_exprs, lev) + ntranslation_classes = precomputed_exprs_view.shape[0] + + if ntranslation_classes == 0: + events.append([]) + continue + + m2l_translation_vectors = ( + self.translation_classes_data.m2l_translation_vectors()) + evt, _ = precompute_kernel( self.queue, src_rscale=src_rscale, translation_classes_level_start=translation_classes_level_start, - precomputed_exprs = precomputed_exprs_view, + ntranslation_classes=ntranslation_classes, + m2l_precomputed_exprs=precomputed_exprs_view, + m2l_translation_vectors=m2l_translation_vectors, + ntranslation_vectors=m2l_translation_vectors.shape[1], **self.kernel_extra_kwargs ) @@ -614,17 +622,25 @@ class SumpyExpansionWrangler(object): return m2l_precomputed_exprs, events - def multipole_to_local_precompute(self, info, level_start_target_box_nrs, lev): + def multipole_to_local_precompute(self, info, lev): + """Adds to info dict, the information needed for a multipole-to-local + translation with precomputation. Returns true if the level `lev` + has translations to do. If the wrangler does not support an optimized + translation, returns true. + """ if not self.supports_optimized_m2l: - return + return True src_rscale = info["src_rscale"] m2l_precomputed_exprs, events = \ - self._multipole_to_local_precompute(src_rscale, - level_start_target_box_nrs) - _, precomputed_exprs_view = \ + self._multipole_to_local_precompute(src_rscale) + translation_classes_level_start, precomputed_exprs_view = \ self.m2l_precomputed_exprs_view(m2l_precomputed_exprs, lev) info["m2l_precomputed_exprs"] = precomputed_exprs_view + info["translation_classes_level_start"] = translation_classes_level_start + info["m2l_translation_classes_lists"] = \ + self.translation_classes_data.m2l_translation_classes_lists() info["wait_for"] = events[lev] + return precomputed_exprs_view.shape[0] != 0 @property def m2l_class(self): @@ -669,8 +685,8 @@ class SumpyExpansionWrangler(object): **self.kernel_extra_kwargs ) - self.multipole_to_local_precompute(kwargs, level_start_target_box_nrs, - lev) + if not self.multipole_to_local_precompute(kwargs, lev): + continue evt, _ = m2l(self.queue, **kwargs) events.append(evt) -- GitLab From 9e7060cec903a8b76794c48b7ecde229bcbb065e Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Mon, 8 Jun 2020 11:30:54 -0500 Subject: [PATCH 48/48] Don't store zero expressions --- sumpy/e2e.py | 7 ++++--- sumpy/expansion/local.py | 45 +++++++++++++++++++++++++--------------- sumpy/fmm.py | 2 +- 3 files changed, 33 insertions(+), 21 deletions(-) diff --git a/sumpy/e2e.py b/sumpy/e2e.py index b23cd2fa..c5451e27 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -275,7 +275,7 @@ class E2EFromCSRTranslationInvariant(E2EFromCSR): tgt_rscale = sym.Symbol("tgt_rscale") nprecomputed_exprs = \ - len(self.tgt_expansion.m2l_global_precompute_mis(self.src_expansion)[0]) + self.tgt_expansion.m2l_global_precompute_nexpr(self.src_expansion) precomputed_exprs = [sym.Symbol("precomputed_expr%d" % i) for i in range(nprecomputed_exprs)] @@ -305,7 +305,7 @@ class E2EFromCSRTranslationInvariant(E2EFromCSR): ncoeff_src = len(self.src_expansion) ncoeff_tgt = len(self.tgt_expansion) nprecomputed_exprs = \ - len(self.tgt_expansion.m2l_global_precompute_mis(self.src_expansion)[0]) + self.tgt_expansion.m2l_global_precompute_nexpr(self.src_expansion) # To clarify terminology: # @@ -439,7 +439,7 @@ class E2EFromCSRTranslationClassesPrecompute(E2EFromCSR): def get_kernel(self): nprecomputed_exprs = \ - len(self.tgt_expansion.m2l_global_precompute_mis(self.src_expansion)[0]) + self.tgt_expansion.m2l_global_precompute_nexpr(self.src_expansion) from sumpy.tools import gather_loopy_arguments loopy_knl = lp.make_kernel( [ @@ -487,6 +487,7 @@ class E2EFromCSRTranslationClassesPrecompute(E2EFromCSR): # FIXME knl = self.get_kernel() knl = lp.split_iname(knl, "itr_class", 16, outer_tag="g.0") + return knl def __call__(self, queue, **kwargs): diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index 40b02ead..7e646e8f 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -163,8 +163,12 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): knl = self.kernel return knl.postprocess_at_target(result, bvec) + def m2l_global_precompute_nexpr(self, src_expansion): + return len(self.m2l_global_precompute_mis(src_expansion)[1]) + def m2l_global_precompute_mis(self, src_expansion): from pytools import generate_nonnegative_integer_tuples_below as gnitb + from sumpy.tools import add_mi max_mi = [0]*self.dim for i in range(self.dim): @@ -174,7 +178,17 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): self.get_coefficient_identifiers()) toeplitz_matrix_coeffs = list(gnitb([m + 1 for m in max_mi])) - return toeplitz_matrix_coeffs, max_mi + + needed_vector_terms = [] + # For eg: 2D full Taylor Laplace, we only need kernel derivatives + # (n1+n2, m1+m2), n1+m1<=p, n2+m2<=p + for tgt_deriv in self.get_coefficient_identifiers(): + for src_deriv in src_expansion.get_coefficient_identifiers(): + needed = add_mi(src_deriv, tgt_deriv) + if needed not in needed_vector_terms: + needed_vector_terms.append(needed) + + return toeplitz_matrix_coeffs, needed_vector_terms, max_mi def m2l_global_precompute_exprs(self, src_expansion, src_rscale, dvec, tgt_rscale, sac, use_fft=False): @@ -198,7 +212,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): from sumpy.tools import add_mi - toeplitz_matrix_coeffs, max_mi = \ + toeplitz_matrix_coeffs, needed_vector_terms, max_mi = \ self.m2l_global_precompute_mis(src_expansion) # Create a expansion terms wrangler for derivatives up to order @@ -227,20 +241,13 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): srcplusderiv_terms_wrangler.get_full_kernel_derivatives_from_stored( vector_stored, src_rscale) - needed_vector_terms = set([]) - # For eg: 2D full Taylor Laplace, we only need kernel derivatives - # (n1+n2, m1+m2), n1+m1<=p, n2+m2<=p - for tgt_deriv in self.get_coefficient_identifiers(): - for src_deriv in src_expansion.get_coefficient_identifiers(): - needed = add_mi(src_deriv, tgt_deriv) - needed_vector_terms.add(needed) + for term in srcplusderiv_full_coeff_ids: + assert term in needed_vector_terms - vector = [0]*len(toeplitz_matrix_coeffs) - for i, term in enumerate(toeplitz_matrix_coeffs): - if term in srcplusderiv_ident_to_index: - vector[i] = add_to_sac(sac, + vector = [0]*len(needed_vector_terms) + for i, term in enumerate(needed_vector_terms): + vector[i] = add_to_sac(sac, vector_full[srcplusderiv_ident_to_index[term]]) - return vector def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, @@ -258,7 +265,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansionBase if isinstance(src_expansion, VolumeTaylorMultipoleExpansionBase): - toeplitz_matrix_coeffs, max_mi = \ + toeplitz_matrix_coeffs, needed_vector_terms, max_mi = \ self.m2l_global_precompute_mis(src_expansion) toeplitz_matrix_ident_to_index = dict((ident, i) for i, ident in enumerate(toeplitz_matrix_coeffs)) @@ -269,6 +276,10 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): else: derivatives = precomputed_exprs + derivatives_full = [0]*len(toeplitz_matrix_coeffs) + for expr, mi in zip(derivatives, needed_vector_terms): + derivatives_full[toeplitz_matrix_ident_to_index[mi]] = expr + # Calculate the first row of the upper triangular Toeplitz matrix toeplitz_first_row = [0] * len(toeplitz_matrix_coeffs) for coeff, term in zip( @@ -280,10 +291,10 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # Do the matvec if use_fft: output = fft_toeplitz_upper_triangular(toeplitz_first_row, - derivatives, sac=sac) + derivatives_full, sac=sac) else: output = matvec_toeplitz_upper_triangular(toeplitz_first_row, - derivatives) + derivatives_full) # Filter out the dummy rows and scale them for target result = [] diff --git a/sumpy/fmm.py b/sumpy/fmm.py index ff355b06..289ab511 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -374,7 +374,7 @@ class SumpyExpansionWrangler(object): def order_to_size(order): mpole_expn = self.code.multipole_expansion_factory(order) local_expn = self.code.local_expansion_factory(order) - return len(local_expn.m2l_global_precompute_mis(mpole_expn)[0]) + return local_expn.m2l_global_precompute_nexpr(mpole_expn) return self._expansions_level_starts(order_to_size, level_starts=self.m2l_translation_class_level_start_box_nrs()) -- GitLab