From daa4118947d5d4a1b892280720d48dde611288e2 Mon Sep 17 00:00:00 2001 From: Isuru Fernando <idf2@illinois.edu> Date: Fri, 13 May 2022 17:41:12 -0500 Subject: [PATCH] Use a separate class for M2L translation and change expansion default to FFT (#113) * Use a separate class for M2L translation * Fix docs and caching * M2L Translation Factory * vim markers * Fix tests * Fix toys * Fix test_m2l_toeplitz * Fix more tests * Use a better rscale to get the test passing * Use pytential dev branch * Try 2r/order instead of r/order --- .github/workflows/ci.yml | 4 +- doc/expansion.rst | 5 + sumpy/e2e.py | 79 ++-- sumpy/expansion/local.py | 581 +++-------------------------- sumpy/expansion/m2l.py | 770 +++++++++++++++++++++++++++++++++++++++ sumpy/fmm.py | 40 +- sumpy/toys.py | 8 + test/test_fmm.py | 14 +- test/test_kernels.py | 10 +- test/test_misc.py | 6 +- 10 files changed, 912 insertions(+), 605 deletions(-) create mode 100644 sumpy/expansion/m2l.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 12c8d4ce..c369d1fc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -88,8 +88,8 @@ jobs: run: | curl -L -O https://tiker.net/ci-support-v0 . ./ci-support-v0 - if [[ "$DOWNSTREAM_PROJECT" == "pytential" && "$GITHUB_HEAD_REF" == "rscale" ]]; then - DOWNSTREAM_PROJECT=https://github.com/isuruf/pytential.git@level_to_rscale + if [[ "$DOWNSTREAM_PROJECT" == "pytential" && "$GITHUB_HEAD_REF" == "m2l" ]]; then + DOWNSTREAM_PROJECT=https://github.com/isuruf/pytential.git@m2l_translation fi test_downstream "$DOWNSTREAM_PROJECT" diff --git a/doc/expansion.rst b/doc/expansion.rst index ffb1e122..5d72d735 100644 --- a/doc/expansion.rst +++ b/doc/expansion.rst @@ -18,6 +18,11 @@ Multipole Expansions .. automodule:: sumpy.expansion.multipole +Multipole to Local Translations +------------------------------- + +.. automodule:: sumpy.expansion.m2l + Estimating Expansion Orders --------------------------- diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 6f742f72..b35f86cd 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -306,12 +306,13 @@ class M2LUsingTranslationClassesDependentData(E2EFromCSR): src_rscale = sym.Symbol("src_rscale") tgt_rscale = sym.Symbol("tgt_rscale") + m2l_translation = self.tgt_expansion.m2l_translation m2l_translation_classes_dependent_ndata = ( - self.tgt_expansion.m2l_translation_classes_dependent_ndata( - self.src_expansion)) + m2l_translation.translation_classes_dependent_ndata(self.tgt_expansion, + self.src_expansion)) m2l_translation_classes_dependent_data = \ - [sym.Symbol(f"data{i}") - for i in range(m2l_translation_classes_dependent_ndata)] + [sym.Symbol(f"data{i}") + for i in range(m2l_translation_classes_dependent_ndata)] ncoeff_src = len(self.src_expansion) @@ -346,13 +347,14 @@ class M2LUsingTranslationClassesDependentData(E2EFromCSR): except NotImplementedError: pass - ndata = self.tgt_expansion.m2l_translation_classes_dependent_ndata( - self.src_expansion) - if self.tgt_expansion.use_preprocessing_for_m2l: - ncoeff_src = self.tgt_expansion.m2l_preprocess_multipole_nexprs( - self.src_expansion) - ncoeff_tgt = self.tgt_expansion.m2l_postprocess_local_nexprs( - self.src_expansion) + m2l_translation = self.tgt_expansion.m2l_translation + ndata = m2l_translation.translation_classes_dependent_ndata( + self.tgt_expansion, self.src_expansion) + if m2l_translation.use_preprocessing: + ncoeff_src = m2l_translation.preprocess_multipole_nexprs( + self.tgt_expansion, self.src_expansion) + ncoeff_tgt = m2l_translation.postprocess_local_nexprs( + self.tgt_expansion, self.src_expansion) else: ncoeff_src = len(self.src_expansion) ncoeff_tgt = len(self.tgt_expansion) @@ -380,15 +382,16 @@ class M2LUsingTranslationClassesDependentData(E2EFromCSR): ) def get_kernel(self, result_dtype): + m2l_translation = self.tgt_expansion.m2l_translation m2l_translation_classes_dependent_ndata = \ - self.tgt_expansion.m2l_translation_classes_dependent_ndata( - self.src_expansion) - - if self.tgt_expansion.use_preprocessing_for_m2l: - ncoeff_src = self.tgt_expansion.m2l_preprocess_multipole_nexprs( - self.src_expansion) - ncoeff_tgt = self.tgt_expansion.m2l_postprocess_local_nexprs( - self.src_expansion) + m2l_translation.translation_classes_dependent_ndata( + self.tgt_expansion, self.src_expansion) + + if m2l_translation.use_preprocessing: + ncoeff_src = m2l_translation.preprocess_multipole_nexprs( + self.tgt_expansion, self.src_expansion) + ncoeff_tgt = m2l_translation.postprocess_local_nexprs( + self.tgt_expansion, self.src_expansion) else: ncoeff_src = len(self.src_expansion) ncoeff_tgt = len(self.tgt_expansion) @@ -535,17 +538,18 @@ class M2LGenerateTranslationClassesDependentData(E2EBase): dvec = make_sym_vector("d", self.dim) src_rscale = sym.Symbol("src_rscale") - tgt_rscale = sym.Symbol("tgt_rscale") + + m2l_translation = self.tgt_expansion.m2l_translation from sumpy.assignment_collection import SymbolicAssignmentCollection sac = SymbolicAssignmentCollection() tgt_coeff_names = [ - sac.assign_unique( - f"m2l_translation_classes_dependent_expr{i}", coeff_i) - for i, coeff_i in enumerate( - self.tgt_expansion.m2l_translation_classes_dependent_data( - self.src_expansion, src_rscale, - dvec, tgt_rscale, sac))] + sac.assign_unique( + f"m2l_translation_classes_dependent_expr{i}", coeff_i) + for i, coeff_i in enumerate( + m2l_translation.translation_classes_dependent_data( + self.tgt_expansion, self.src_expansion, src_rscale, + dvec, sac=sac))] sac.run_global_cse() @@ -560,8 +564,8 @@ class M2LGenerateTranslationClassesDependentData(E2EBase): def get_kernel(self, result_dtype): m2l_translation_classes_dependent_ndata = \ - self.tgt_expansion.m2l_translation_classes_dependent_ndata( - self.src_expansion) + self.tgt_expansion.m2l_translation.translation_classes_dependent_ndata( + self.tgt_expansion, self.src_expansion) from sumpy.tools import gather_loopy_arguments loopy_knl = lp.make_kernel( [ @@ -665,8 +669,8 @@ class M2LPreprocessMultipole(E2EBase): preprocessed_src_coeff_names = [ sac.assign_unique(f"preprocessed_src_coeff{i}", coeff_i) for i, coeff_i in enumerate( - self.tgt_expansion.m2l_preprocess_multipole_exprs( - self.src_expansion, src_coeff_exprs, + self.tgt_expansion.m2l_translation.preprocess_multipole_exprs( + self.tgt_expansion, self.src_expansion, src_coeff_exprs, sac=sac, src_rscale=src_rscale))] sac.run_global_cse() @@ -683,7 +687,8 @@ class M2LPreprocessMultipole(E2EBase): def get_kernel(self, result_dtype): nsrc_coeffs = len(self.src_expansion) npreprocessed_src_coeffs = \ - self.tgt_expansion.m2l_preprocess_multipole_nexprs(self.src_expansion) + self.tgt_expansion.m2l_translation.preprocess_multipole_nexprs( + self.tgt_expansion, self.src_expansion) from sumpy.tools import gather_loopy_arguments loopy_knl = lp.make_kernel( [ @@ -752,8 +757,10 @@ class M2LPostprocessLocal(E2EBase): default_name = "m2l_postprocess_local" def get_loopy_insns(self, result_dtype): + m2l_translation = self.tgt_expansion.m2l_translation ncoeffs_before_postprocessing = \ - self.tgt_expansion.m2l_postprocess_local_nexprs(self.tgt_expansion) + m2l_translation.postprocess_local_nexprs(self.tgt_expansion, + self.src_expansion) tgt_coeff_exprs_before_postprocessing = [ sym.Symbol(f"tgt_coeff_before_postprocessing{i}") @@ -765,8 +772,9 @@ class M2LPostprocessLocal(E2EBase): from sumpy.assignment_collection import SymbolicAssignmentCollection sac = SymbolicAssignmentCollection() - tgt_coeff_exprs = self.tgt_expansion.m2l_postprocess_local_exprs( - self.tgt_expansion, tgt_coeff_exprs_before_postprocessing, + tgt_coeff_exprs = m2l_translation.postprocess_local_exprs( + self.tgt_expansion, self.src_expansion, + tgt_coeff_exprs_before_postprocessing, sac=sac, src_rscale=src_rscale, tgt_rscale=tgt_rscale) if result_dtype in (np.float32, np.float64): @@ -792,7 +800,8 @@ class M2LPostprocessLocal(E2EBase): def get_kernel(self, result_dtype): ntgt_coeffs = len(self.tgt_expansion) ntgt_coeffs_before_postprocessing = \ - self.tgt_expansion.m2l_postprocess_local_nexprs(self.tgt_expansion) + self.tgt_expansion.m2l_translation.postprocess_local_nexprs( + self.tgt_expansion, self.src_expansion) from sumpy.tools import gather_loopy_arguments loopy_knl = lp.make_kernel( [ diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index 72d91c01..d170aba9 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -21,10 +21,7 @@ THE SOFTWARE. """ import math -from typing import Tuple, Any -import pymbolic -import loopy as lp from pytools import single_valued import sumpy.symbolic as sym @@ -32,9 +29,7 @@ from sumpy.expansion import ( ExpansionBase, VolumeTaylorExpansion, LinearPDEConformingVolumeTaylorExpansion) -from sumpy.tools import ( - add_to_sac, fft, - mi_increment_axis, matvec_toeplitz_upper_triangular) +from sumpy.tools import add_to_sac, mi_increment_axis import logging logger = logging.getLogger(__name__) @@ -56,38 +51,23 @@ class LocalExpansionBase(ExpansionBase): .. attribute:: kernel .. attribute:: order .. attribute:: use_rscale - .. attribute:: use_fft_for_m2l - .. attribute:: use_preprocessing_for_m2l - - .. automethod:: m2l_translation_classes_dependent_data - .. automethod:: m2l_translation_classes_dependent_ndata - .. automethod:: m2l_preprocess_multipole_exprs - .. automethod:: m2l_preprocess_multipole_nexprs - .. automethod:: m2l_postprocess_local_exprs - .. automethod:: m2l_postprocess_local_nexprs + .. automethod:: translate_from """ - init_arg_names = ("kernel", "order", "use_rscale", "use_fft_for_m2l", - "use_preprocessing_for_m2l") + init_arg_names = ("kernel", "order", "use_rscale", "m2l_translation") def __init__(self, kernel, order, use_rscale=None, - use_fft_for_m2l=False, use_preprocessing_for_m2l=None): + m2l_translation=None): super().__init__(kernel, order, use_rscale) - self.use_fft_for_m2l = use_fft_for_m2l - if use_preprocessing_for_m2l is None: - self.use_preprocessing_for_m2l = use_fft_for_m2l - else: - self.use_preprocessing_for_m2l = use_preprocessing_for_m2l + self.m2l_translation = m2l_translation def with_kernel(self, kernel): return type(self)(kernel, self.order, self.use_rscale, - use_fft_for_m2l=self.use_fft_for_m2l, - use_preprocessing_for_m2l=self.use_preprocessing_for_m2l) + self.m2l_translation) def update_persistent_hash(self, key_hash, key_builder): super().update_persistent_hash(key_hash, key_builder) - key_builder.rec(key_hash, self.use_fft_for_m2l) - key_builder.rec(key_hash, self.use_preprocessing_for_m2l) + key_builder.rec(key_hash, self.m2l_translation) def __eq__(self, other): return ( @@ -95,83 +75,9 @@ class LocalExpansionBase(ExpansionBase): and self.kernel == other.kernel and self.order == other.order and self.use_rscale == other.use_rscale - and self.use_fft_for_m2l == other.use_fft_for_m2l - and self.use_preprocessing_for_m2l == other.use_preprocessing_for_m2l + and self.m2l_translation == other.m2l_translation ) - def m2l_translation_classes_dependent_data(self, src_expansion, src_rscale, - dvec, tgt_rscale, sac) -> Tuple[Any]: - """Return an iterable of expressions that needs to be precomputed - for multipole-to-local translations that depend only on the - distance between the multipole center and the local center which - is given as *dvec*. - - Since there are only a finite number of different values for the - distance between per level, these can be precomputed for the tree. - In :mod:`boxtree`, these distances are referred to as translation - classes. - """ - return tuple() - - def m2l_translation_classes_dependent_ndata(self, src_expansion): - """Return the number of expressions returned by - :func:`~sumpy.expansion.local.LocalExpansionBase.m2l_translation_classes_dependent_data`. - This method exists because calculating the number of expressions using - the above method might be costly and - :func:`~sumpy.expansion.local.LocalExpansionBase.m2l_translation_classes_dependent_data` - cannot be memoized due to it having side effects through the argument - *sac*. - """ - return 0 - - def m2l_preprocess_multipole_exprs(self, src_expansion, src_coeff_exprs, sac, - src_rscale): - """Return the preprocessed multipole expansion for an optimized M2L. - Preprocessing happens once per source box before M2L translation is done. - - When FFT is turned on, the input expressions are transformed into Fourier - space. These expressions are used in a separate :mod:`loopy` kernel - to avoid having to transform for each target and source box pair. - When FFT is turned off, the expressions are equal to the multipole - expansion coefficients with zeros added - to make the M2L computation a circulant matvec. - """ - raise NotImplementedError - - def m2l_preprocess_multipole_nexprs(self, src_expansion): - """Return the number of expressions returned by - :func:`~sumpy.expansion.local.LocalExpansionBase.m2l_preprocess_multipole_exprs`. - This method exists because calculating the number of expressions using - the above method might be costly and it cannot be memoized due to it having - side effects through the argument *sac*. - """ - # For all use-cases we have right now, this is equal to the number of - # translation classes dependent exprs. Use that as a default. - return self.m2l_translation_classes_dependent_ndata(src_expansion) - - def m2l_postprocess_local_exprs(self, src_expansion, m2l_result, src_rscale, - tgt_rscale, sac): - """Return postprocessed local expansion for an optimized M2L. - Postprocessing happens once per target box just after the M2L translation - is done and before storing the expansion coefficients for the local - expansion. - - When FFT is turned on, the output expressions are transformed from Fourier - space back to the original space. - """ - raise NotImplementedError - - def m2l_postprocess_local_nexprs(self, src_expansion): - """Return the number of expressions given as input to - :func:`~sumpy.expansion.local.LocalExpansionBase.m2l_postprocess_local_exprs`. - This method exists because calculating the number of expressions using - the above method might be costly and it cannot be memoized due to it - having side effects through the argument *sac*. - """ - # For all use-cases we have right now, this is equal to the number of - # translation classes dependent exprs. Use that as a default. - return self.m2l_translation_classes_dependent_ndata(src_expansion) - def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, dvec, tgt_rscale, sac=None, m2l_translation_classes_dependent_data=None): """Translate from a multipole or local expansion to a local expansion @@ -188,7 +94,7 @@ class LocalExpansionBase(ExpansionBase): to collect common subexpressions or None. :arg m2l_translation_classes_dependent_data: An iterable of symbolic expressions representing the expressions returned by - :func:`~sumpy.expansion.local.LocalExpansionBase.m2l_translation_classes_dependent_data`. + :func:`~sumpy.expansion.m2l.M2LTranslationBase.translation_classes_dependent_data`. """ raise NotImplementedError @@ -254,6 +160,15 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): Coefficients represent derivative values of the kernel. """ + def __init__(self, kernel, order, use_rscale=None, + m2l_translation=None): + if not m2l_translation: + from sumpy.expansion.m2l import DefaultM2LTranslationClassFactory + factory = DefaultM2LTranslationClassFactory() + m2l_translation = factory.get_m2l_translation_class(kernel, + self.__class__)() + super().__init__(kernel, order, use_rscale, m2l_translation) + def coefficients_from_source_vec(self, kernels, avec, bvec, rscale, weights, sac=None): """Form an expansion with a linear combination of kernels and weights. @@ -319,195 +234,6 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): return kernel.postprocess_at_target(result, bvec) - def m2l_translation_classes_dependent_ndata(self, src_expansion): - """Returns number of expressions in M2L global precomputation step. - """ - mis_with_dummy_rows, _, _ = \ - self._m2l_translation_classes_dependent_data_mis(src_expansion) - - return len(mis_with_dummy_rows) - - def _m2l_translation_classes_dependent_data_mis(self, src_expansion): - """We would like to compute the M2L by way of a circulant matrix below. - To get the matrix representing the M2L into circulant form, a certain - numbering of rows and columns (as identified by multi-indices) is - required. This routine returns that numbering. - - .. note:: - - The set of multi-indices returned may be a superset of the - coefficients used by the expansion. On the input end, those - coefficients are taken as zero. On output, they are simply - dropped from the computed result. - - This method returns the multi-indices representing the rows - of the circulant matrix, the multi-indices representing the rows - of the M2L translation matrix and the maximum multi-index of the - latter. - """ - from pytools import generate_nonnegative_integer_tuples_below as gnitb - from sumpy.tools import add_mi - - # max_mi is the multi-index which is the sum of the - # element-wise maximum of source multi-indices and the - # element-wise maximum of target multi-indices. - max_mi = [0]*self.dim - for i in range(self.dim): - max_mi[i] = max(mi[i] for mi in - src_expansion.get_coefficient_identifiers()) - max_mi[i] += max(mi[i] for mi in - self.get_coefficient_identifiers()) - - # These are the multi-indices representing the rows - # in the circulant matrix. Note that to get the circulant - # matrix structure some multi-indices that are not in the - # M2L translation matrix are added. - # This corresponds to adding O(p^(d-1)) - # additional rows and columns in the case of some PDEs - # like Laplace and O(p^d) in other cases. - circulant_matrix_mis = list(gnitb([m + 1 for m in max_mi])) - - # These are the multi-indices representing the rows - # in the M2L translation matrix without the additional - # multi-indices in the circulant matrix - needed_vector_terms = set() - # For eg: 2D full Taylor Laplace, we only need kernel derivatives - # (n1+n2, m1+m2), n1+m1<=p, n2+m2<=p - for tgt_deriv in self.get_coefficient_identifiers(): - for src_deriv in src_expansion.get_coefficient_identifiers(): - needed = add_mi(src_deriv, tgt_deriv) - if needed not in needed_vector_terms: - needed_vector_terms.add(needed) - - return circulant_matrix_mis, tuple(needed_vector_terms), max_mi - - def m2l_translation_classes_dependent_data(self, src_expansion, src_rscale, - dvec, tgt_rscale, sac): - - # We know the general form of the multipole expansion is: - # - # coeff0 * diff(kernel(src - c1), mi0) + - # coeff1 * diff(kernel(src - c1), mi1) + ... - # - # To get the local expansion coefficients, we take derivatives of - # the multipole expansion. For eg: the coefficient w.r.t mir is - # - # coeff0 * diff(kernel(c2 - c1), mi0 + mir) + - # coeff1 * diff(kernel(c2 - c1), mi1 + mir) + ... - # - # The derivatives above depends only on `c2 - c1` and can be precomputed - # globally as there are only a finite number of values for `c2 - c1` for - # m2l. - - if not self.use_rscale: - src_rscale = 1 - - circulant_matrix_mis, needed_vector_terms, max_mi = \ - self._m2l_translation_classes_dependent_data_mis(src_expansion) - - circulant_matrix_ident_to_index = {ident: i for i, ident in - enumerate(circulant_matrix_mis)} - - # Create a expansion terms wrangler for derivatives up to order - # (tgt order)+(src order) including a corresponding reduction matrix - # For eg: 2D full Taylor Laplace, this is (n, m), - # n+m<=2*p, n<=2*p, m<=2*p - srcplusderiv_terms_wrangler = \ - src_expansion.expansion_terms_wrangler.copy( - order=self.order + src_expansion.order, max_mi=tuple(max_mi)) - srcplusderiv_full_coeff_ids = \ - srcplusderiv_terms_wrangler.get_full_coefficient_identifiers() - srcplusderiv_ident_to_index = {ident: i for i, ident in - enumerate(srcplusderiv_full_coeff_ids)} - - # The vector has the kernel derivatives and depends only on the distance - # between the two centers - taker = src_expansion.kernel.get_derivative_taker(dvec, src_rscale, sac) - vector_stored = [] - # Calculate the kernel derivatives for the compressed set - for term in \ - srcplusderiv_terms_wrangler.get_coefficient_identifiers(): - kernel_deriv = taker.diff(term) - vector_stored.append(kernel_deriv) - # Calculate the kernel derivatives for the full set - vector_full = \ - srcplusderiv_terms_wrangler.get_full_kernel_derivatives_from_stored( - vector_stored, src_rscale) - - for term in srcplusderiv_full_coeff_ids: - assert term in needed_vector_terms - - vector = [0]*len(needed_vector_terms) - for i, term in enumerate(needed_vector_terms): - vector[i] = add_to_sac(sac, - vector_full[srcplusderiv_ident_to_index[term]]) - - # Add zero values needed to make the translation matrix circulant - derivatives_full = [0]*len(circulant_matrix_mis) - for expr, mi in zip(vector, needed_vector_terms): - derivatives_full[circulant_matrix_ident_to_index[mi]] = expr - - if self.use_fft_for_m2l: - # Note that the matrix we have now is a mirror image of a - # circulant matrix. We reverse the first column to get the - # first column for the circulant matrix and then finally - # use the FFT for convolution represented by the circulant - # matrix. - return fft(list(reversed(derivatives_full)), sac=sac) - - return derivatives_full - - def m2l_preprocess_multipole_exprs(self, src_expansion, src_coeff_exprs, sac, - src_rscale): - circulant_matrix_mis, needed_vector_terms, max_mi = \ - self._m2l_translation_classes_dependent_data_mis(src_expansion) - circulant_matrix_ident_to_index = {ident: i for i, ident in - enumerate(circulant_matrix_mis)} - - # Calculate the input vector for the circulant matrix - input_vector = [0] * len(circulant_matrix_mis) - for coeff, term in zip( - src_coeff_exprs, - src_expansion.get_coefficient_identifiers()): - input_vector[circulant_matrix_ident_to_index[term]] = \ - add_to_sac(sac, coeff) - - if self.use_fft_for_m2l: - return fft(input_vector, sac=sac) - else: - return input_vector - - def m2l_preprocess_multipole_nexprs(self, src_expansion): - circulant_matrix_mis, _, _ = \ - self._m2l_translation_classes_dependent_data_mis(src_expansion) - return len(circulant_matrix_mis) - - def m2l_postprocess_local_exprs(self, src_expansion, m2l_result, src_rscale, - tgt_rscale, sac): - circulant_matrix_mis, needed_vector_terms, max_mi = \ - self._m2l_translation_classes_dependent_data_mis(src_expansion) - circulant_matrix_ident_to_index = {ident: i for i, ident in - enumerate(circulant_matrix_mis)} - - if self.use_fft_for_m2l: - n = len(circulant_matrix_mis) - m2l_result = fft(m2l_result, inverse=True, sac=sac) - # since we reversed the M2L matrix, we reverse the result - # to get the correct result - m2l_result = list(reversed(m2l_result[:n])) - - # Filter out the dummy rows and scale them for target - rscale_ratio = add_to_sac(sac, tgt_rscale/src_rscale) - result = [ - m2l_result[circulant_matrix_ident_to_index[term]] - * rscale_ratio**sum(term) - for term in self.get_coefficient_identifiers()] - - return result - - def m2l_postprocess_local_nexprs(self, src_expansion): - return self.m2l_translation_classes_dependent_ndata(src_expansion) - def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, dvec, tgt_rscale, sac=None, _fast_version=True, m2l_translation_classes_dependent_data=None): @@ -527,32 +253,9 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # {{{ M2L if isinstance(src_expansion, VolumeTaylorMultipoleExpansionBase): - circulant_matrix_mis, needed_vector_terms, max_mi = \ - self._m2l_translation_classes_dependent_data_mis(src_expansion) - - if not m2l_translation_classes_dependent_data: - derivatives = self.m2l_translation_classes_dependent_data( - src_expansion, src_rscale, dvec, tgt_rscale, sac) - else: - derivatives = m2l_translation_classes_dependent_data - - if self.use_fft_for_m2l: - assert len(src_coeff_exprs) == len(derivatives) - result = [a*b for a, b in zip(derivatives, src_coeff_exprs)] - else: - if not self.use_preprocessing_for_m2l: - src_coeff_exprs = self.m2l_preprocess_multipole_exprs( - src_expansion, src_coeff_exprs, sac, src_rscale) - - # Returns a big symbolic sum of matrix entries - # (FIXME? Though this is just the correctness-checking - # fallback for the FFT anyhow) - result = matvec_toeplitz_upper_triangular(src_coeff_exprs, - derivatives) - - if not self.use_preprocessing_for_m2l: - result = self.m2l_postprocess_local_exprs(src_expansion, - result, src_rscale, tgt_rscale, sac) + result = self.m2l_translation.translate(self, src_expansion, + src_coeff_exprs, src_rscale, dvec, tgt_rscale, sac, + m2l_translation_classes_dependent_data) logger.info("building translation operator: done") return result @@ -687,7 +390,6 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): (taker.diff(mi).xreplace(replace_dict) * rscale_ratio**sum(mi)) for mi in self.get_coefficient_identifiers()] # }}} - # }}} logger.info("building translation operator: done") return result @@ -695,44 +397,8 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansionBase if isinstance(src_expansion, VolumeTaylorMultipoleExpansionBase): - if self.use_preprocessing_for_m2l: - ncoeff_src = self.m2l_preprocess_multipole_nexprs(src_expansion) - ncoeff_tgt = self.m2l_postprocess_local_nexprs(src_expansion) - icoeff_src = pymbolic.var("icoeff_src") - icoeff_tgt = pymbolic.var("icoeff_tgt") - domains = [f"{{[icoeff_tgt]: 0<=icoeff_tgt<{ncoeff_tgt} }}"] - - coeff = pymbolic.var("coeff") - src_coeffs = pymbolic.var("src_coeffs") - m2l_translation_classes_dependent_data = pymbolic.var("data") - - if self.use_fft_for_m2l: - expr = src_coeffs[icoeff_tgt] \ - * m2l_translation_classes_dependent_data[icoeff_tgt] - else: - toeplitz_first_row = src_coeffs[icoeff_src-icoeff_tgt] - vector = m2l_translation_classes_dependent_data[icoeff_src] - expr = toeplitz_first_row * vector - domains.append( - f"{{[icoeff_src]: icoeff_tgt<=icoeff_src<{ncoeff_src} }}") - - expr = src_coeffs[icoeff_tgt] \ - * m2l_translation_classes_dependent_data[icoeff_tgt] - - insns = [ - lp.Assignment( - assignee=coeff[icoeff_tgt], - expression=coeff[icoeff_tgt] + expr), - ] - return lp.make_function(domains, insns, - kernel_data=[ - lp.GlobalArg("coeff, src_coeffs, data", - shape=lp.auto), - lp.ValueArg("src_rscale, tgt_rscale"), - ...], - name="e2e", - lang_version=lp.MOST_RECENT_LANGUAGE_VERSION, - ) + return self.m2l_translation.loopy_translate(self, src_expansion) + raise NotImplementedError( f"A direct loopy kernel for translation from " f"{src_expansion} to {self} is not implemented.") @@ -742,10 +408,9 @@ class VolumeTaylorLocalExpansion( VolumeTaylorExpansion, VolumeTaylorLocalExpansionBase): - def __init__(self, kernel, order, use_rscale=None, - use_fft_for_m2l=False, use_preprocessing_for_m2l=None): + def __init__(self, kernel, order, use_rscale=None, m2l_translation=None): VolumeTaylorLocalExpansionBase.__init__(self, kernel, order, use_rscale, - use_fft_for_m2l, use_preprocessing_for_m2l=None) + m2l_translation=m2l_translation) VolumeTaylorExpansion.__init__(self, kernel, order, use_rscale) @@ -753,10 +418,9 @@ class LinearPDEConformingVolumeTaylorLocalExpansion( LinearPDEConformingVolumeTaylorExpansion, VolumeTaylorLocalExpansionBase): - def __init__(self, kernel, order, use_rscale=None, - use_fft_for_m2l=False, use_preprocessing_for_m2l=None): + def __init__(self, kernel, order, use_rscale=None, m2l_translation=None): VolumeTaylorLocalExpansionBase.__init__(self, kernel, order, use_rscale, - use_fft_for_m2l, use_preprocessing_for_m2l) + m2l_translation=m2l_translation) LinearPDEConformingVolumeTaylorExpansion.__init__( self, kernel, order, use_rscale) @@ -800,21 +464,13 @@ class BiharmonicConformingVolumeTaylorLocalExpansion( class _FourierBesselLocalExpansion(LocalExpansionBase): def __init__(self, kernel, order, use_rscale=None, - use_fft_for_m2l=False, use_preprocessing_for_m2l=None): - - if use_fft_for_m2l: - # FIXME: expansion with FFT is correct symbolically and can be verified - # with sympy. However there are numerical issues that we have to deal - # with. Greengard and Rokhlin 1988 attributes this to numerical - # instability but gives rscale as a possible solution. Sumpy's rscale - # choice is slightly different from Greengard and Rokhlin and that - # might be the reason for this numerical issue. - raise ValueError("Bessel based expansions with FFT is not fully " - "supported yet.") - - super().__init__(kernel, order, use_rscale, - use_fft_for_m2l=use_fft_for_m2l, - use_preprocessing_for_m2l=use_preprocessing_for_m2l) + m2l_translation=None): + if not m2l_translation: + from sumpy.expansion.m2l import DefaultM2LTranslationClassFactory + factory = DefaultM2LTranslationClassFactory() + m2l_translation = factory.get_m2l_translation_class(kernel, + self.__class__)() + super().__init__(kernel, order, use_rscale, m2l_translation) def get_storage_index(self, k): return self.order+k @@ -856,94 +512,6 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): * sym.exp(sym.I * c * -target_angle_rel_center), bvec) for c in self.get_coefficient_identifiers()) - def m2l_translation_classes_dependent_ndata(self, src_expansion): - nexpr = 2 * self.order + 2 * src_expansion.order + 1 - return nexpr - - def m2l_translation_classes_dependent_data(self, src_expansion, src_rscale, - dvec, tgt_rscale, sac): - - from sumpy.symbolic import sym_real_norm_2, Hankel1 - from sumpy.tools import fft - - dvec_len = sym_real_norm_2(dvec) - new_center_angle_rel_old_center = sym.atan2(dvec[1], dvec[0]) - arg_scale = self.get_bessel_arg_scaling() - # [-(src_order+tgt_order), ..., 0, ..., (src_order + tgt_order)] - m2l_translation_classes_dependent_data = \ - [0] * (2*self.order + 2 * src_expansion.order + 1) - - # The M2L is a mirror image of a Toeplitz matvec with Hankel function - # evaluations. https://dlmf.nist.gov/10.23.F1 - # This loop computes the first row and the last column vector sufficient - # to specify the matrix entries. - for j in self.get_coefficient_identifiers(): - idx_j = self.get_storage_index(j) - for m in src_expansion.get_coefficient_identifiers(): - idx_m = src_expansion.get_storage_index(m) - m2l_translation_classes_dependent_data[idx_j + idx_m] = ( - Hankel1(m + j, arg_scale * dvec_len, 0) - * sym.exp(sym.I * (m + j) * new_center_angle_rel_old_center)) - - if self.use_fft_for_m2l: - order = src_expansion.order - # For this expansion, we have a mirror image of a Toeplitz matrix. - # First, we have to take the mirror image of the M2L matrix. - # - # After that the Toeplitz matrix has to be embedded in a circulant - # matrix. In this cicrcular matrix the first part of the first - # column is the first column of the Toeplitz matrix which is - # the last column of the M2L matrix. The second part is the - # reverse of the first row of the Toeplitz matrix which - # is the reverse of the first row of the M2L matrix. - first_row_m2l, last_column_m2l = \ - m2l_translation_classes_dependent_data[:2*order], \ - m2l_translation_classes_dependent_data[2*order:] - first_column_toeplitz = last_column_m2l - first_row_toeplitz = list(reversed(first_row_m2l)) - - first_column_circulant = list(first_column_toeplitz) + \ - list(reversed(first_row_toeplitz)) - return fft(first_column_circulant, sac) - else: - return m2l_translation_classes_dependent_data - - def m2l_preprocess_multipole_exprs(self, src_expansion, src_coeff_exprs, sac, - src_rscale): - - from sumpy.tools import fft - src_coeff_exprs = list(src_coeff_exprs) - for m in src_expansion.get_coefficient_identifiers(): - src_coeff_exprs[src_expansion.get_storage_index(m)] *= src_rscale**abs(m) - - if self.use_fft_for_m2l: - src_coeff_exprs = list(reversed(src_coeff_exprs)) - src_coeff_exprs += [0] * (len(src_coeff_exprs) - 1) - return fft(src_coeff_exprs, sac=sac) - else: - return src_coeff_exprs - - def m2l_preprocess_multipole_nexprs(self, src_expansion): - return 2*src_expansion.order + 1 - - def m2l_postprocess_local_exprs(self, src_expansion, m2l_result, src_rscale, - tgt_rscale, sac): - - if self.use_fft_for_m2l: - m2l_result = fft(m2l_result, inverse=True, sac=sac) - m2l_result = m2l_result[:2*self.order+1] - - # Filter out the dummy rows and scale them for target - result = [] - for j in self.get_coefficient_identifiers(): - result.append(m2l_result[self.get_storage_index(j)] - * tgt_rscale**(abs(j)) * sym.Integer(-1)**j) - - return result - - def m2l_postprocess_local_nexprs(self, src_expansion): - return 2*self.order + 1 - def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, dvec, tgt_rscale, sac=None, m2l_translation_classes_dependent_data=None): from sumpy.symbolic import sym_real_norm_2, BesselJ @@ -970,34 +538,9 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): return translated_coeffs if isinstance(src_expansion, self.mpole_expn_class): - if m2l_translation_classes_dependent_data is None: - derivatives = self.m2l_translation_classes_dependent_data( - src_expansion, src_rscale, dvec, tgt_rscale, sac=sac) - else: - derivatives = m2l_translation_classes_dependent_data - - if self.use_fft_for_m2l: - assert m2l_translation_classes_dependent_data is not None - assert len(derivatives) == len(src_coeff_exprs) - translated_coeffs = [a * b for a, b in zip(derivatives, - src_coeff_exprs)] - else: - if not self.use_preprocessing_for_m2l: - src_coeff_exprs = self.m2l_preprocess_multipole_exprs( - src_expansion, src_coeff_exprs, sac, src_rscale) - - translated_coeffs = [ - sum(derivatives[m + j + self.order + src_expansion.order] - * src_coeff_exprs[src_expansion.get_storage_index(m)] - for m in src_expansion.get_coefficient_identifiers()) - for j in self.get_coefficient_identifiers()] - - if not self.use_preprocessing_for_m2l: - translated_coeffs = self.m2l_postprocess_local_exprs( - src_expansion, translated_coeffs, src_rscale, tgt_rscale, - sac) - - return translated_coeffs + return self.m2l_translation.translate(self, src_expansion, + src_coeff_exprs, src_rscale, dvec, tgt_rscale, sac, + m2l_translation_classes_dependent_data) raise RuntimeError( "do not know how to translate " @@ -1005,57 +548,20 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): def loopy_translate_from(self, src_expansion): if isinstance(src_expansion, self.mpole_expn_class): - if self.use_preprocessing_for_m2l: - ncoeff_src = self.m2l_preprocess_multipole_nexprs(src_expansion) - ncoeff_tgt = self.m2l_postprocess_local_nexprs(src_expansion) - - icoeff_src = pymbolic.var("icoeff_src") - icoeff_tgt = pymbolic.var("icoeff_tgt") - domains = [f"{{[icoeff_tgt]: 0<=icoeff_tgt<{ncoeff_tgt} }}"] + return self.m2l_translation.loopy_translate(self, src_expansion) - coeff = pymbolic.var("coeff") - src_coeffs = pymbolic.var("src_coeffs") - m2l_translation_classes_dependent_data = pymbolic.var("data") - - if self.use_fft_for_m2l: - expr = src_coeffs[icoeff_tgt] \ - * m2l_translation_classes_dependent_data[icoeff_tgt] - else: - expr = src_coeffs[icoeff_src] \ - * m2l_translation_classes_dependent_data[ - icoeff_tgt + icoeff_src] - domains.append( - f"{{[icoeff_src]: 0<=icoeff_src<{ncoeff_src} }}") - - insns = [ - lp.Assignment( - assignee=coeff[icoeff_tgt], - expression=coeff[icoeff_tgt] + expr), - ] - return lp.make_function(domains, insns, - kernel_data=[ - lp.GlobalArg("coeff, src_coeffs, data", - shape=lp.auto), - lp.ValueArg("src_rscale, tgt_rscale"), - ...], - name="e2e", - lang_version=lp.MOST_RECENT_LANGUAGE_VERSION, - ) raise NotImplementedError( f"A direct loopy kernel for translation from " f"{src_expansion} to {self} is not implemented.") class H2DLocalExpansion(_FourierBesselLocalExpansion): - def __init__(self, kernel, order, use_rscale=None, - use_fft_for_m2l=False, use_preprocessing_for_m2l=None): + def __init__(self, kernel, order, use_rscale=None, m2l_translation=None): from sumpy.kernel import HelmholtzKernel assert (isinstance(kernel.get_base_kernel(), HelmholtzKernel) and kernel.dim == 2) - super().__init__(kernel, order, use_rscale, - use_fft_for_m2l=use_fft_for_m2l, - use_preprocessing_for_m2l=use_preprocessing_for_m2l) + super().__init__(kernel, order, use_rscale, m2l_translation=m2l_translation) from sumpy.expansion.multipole import H2DMultipoleExpansion self.mpole_expn_class = H2DMultipoleExpansion @@ -1065,15 +571,12 @@ class H2DLocalExpansion(_FourierBesselLocalExpansion): class Y2DLocalExpansion(_FourierBesselLocalExpansion): - def __init__(self, kernel, order, use_rscale=None, - use_fft_for_m2l=False, use_preprocessing_for_m2l=None): + def __init__(self, kernel, order, use_rscale=None, m2l_translation=None): from sumpy.kernel import YukawaKernel assert (isinstance(kernel.get_base_kernel(), YukawaKernel) and kernel.dim == 2) - super().__init__(kernel, order, use_rscale, - use_fft_for_m2l=use_fft_for_m2l, - use_preprocessing_for_m2l=use_preprocessing_for_m2l) + super().__init__(kernel, order, use_rscale, m2l_translation=m2l_translation) from sumpy.expansion.multipole import Y2DMultipoleExpansion self.mpole_expn_class = Y2DMultipoleExpansion diff --git a/sumpy/expansion/m2l.py b/sumpy/expansion/m2l.py new file mode 100644 index 00000000..8e61f125 --- /dev/null +++ b/sumpy/expansion/m2l.py @@ -0,0 +1,770 @@ +__copyright__ = "Copyright (C) 2022 Isuru Fernando" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from typing import Tuple, Any + +import pymbolic +import loopy as lp +import sumpy.symbolic as sym +from sumpy.tools import ( + add_to_sac, fft, + matvec_toeplitz_upper_triangular) + +import logging +logger = logging.getLogger(__name__) + +__doc__ = """ +.. autoclass:: M2LTranslationClassFactoryBase +.. autoclass:: NonFFTM2LTranslationClassFactory +.. autoclass:: FFTM2LTranslationClassFactory +.. autoclass:: DefaultM2LTranslationClassFactory + +.. autoclass:: M2LTranslationBase +.. autoclass:: VolumeTaylorM2LTranslation +.. autoclass:: VolumeTaylorM2LWithFFT +.. autoclass:: FourierBesselM2LTranslation +""" + + +# {{{ M2L translation factory + +class M2LTranslationClassFactoryBase: + """An interface + .. automethod:: get_m2l_translation_class + """ + + def get_m2l_translation_class(self, base_kernel, local_expansion_class): + """Returns a subclass of :class:`M2LTranslationBase` suitable for + *base_kernel* and *local_expansion_class*. + """ + raise NotImplementedError() + + +class NonFFTM2LTranslationClassFactory(M2LTranslationClassFactoryBase): + """An implementation of :class:`M2LTranslationClassFactoryBase` that uses + non FFT M2L translation class. + """ + + def get_m2l_translation_class(self, base_kernel, local_expansion_class): + """Returns a subclass of :class:`M2LTranslationBase` suitable for + *base_kernel* and *local_expansion_class*. + """ + from sumpy.expansion.local import (VolumeTaylorLocalExpansionBase, + _FourierBesselLocalExpansion) + if issubclass(local_expansion_class, VolumeTaylorLocalExpansionBase): + return VolumeTaylorM2LTranslation + elif issubclass(local_expansion_class, _FourierBesselLocalExpansion): + return FourierBesselM2LTranslation + else: + raise RuntimeError( + f"Unknown local_expansion_class: {local_expansion_class}") + + +class FFTM2LTranslationClassFactory(M2LTranslationClassFactoryBase): + """An implementation of :class:`M2LTranslationClassFactoryBase` that uses + FFT M2L translation class. + """ + + def get_m2l_translation_class(self, base_kernel, local_expansion_class): + """Returns a subclass of :class:`M2LTranslationBase` suitable for + *base_kernel* and *local_expansion_class*. + """ + from sumpy.expansion.local import (VolumeTaylorLocalExpansionBase, + _FourierBesselLocalExpansion) + if issubclass(local_expansion_class, VolumeTaylorLocalExpansionBase): + return VolumeTaylorM2LWithFFT + elif issubclass(local_expansion_class, _FourierBesselLocalExpansion): + return FourierBesselM2LWithFFT + else: + raise RuntimeError( + f"Unknown local_expansion_class: {local_expansion_class}") + + +class DefaultM2LTranslationClassFactory(M2LTranslationClassFactoryBase): + """An implementation of :class:`M2LTranslationClassFactoryBase` that gives the + 'best known' translation type for each kernel and local expansion class""" + def get_m2l_translation_class(self, base_kernel, local_expansion_class): + from sumpy.expansion.local import (VolumeTaylorLocalExpansionBase, + _FourierBesselLocalExpansion) + from sumpy.kernel import LaplaceKernel + if issubclass(local_expansion_class, VolumeTaylorLocalExpansionBase): + if isinstance(base_kernel, LaplaceKernel): + return VolumeTaylorM2LWithFFT + else: + return VolumeTaylorM2LTranslation + elif issubclass(local_expansion_class, _FourierBesselLocalExpansion): + return FourierBesselM2LTranslation + else: + raise RuntimeError( + f"Unknown local_expansion_class: {local_expansion_class}") + + +# }}} + +# {{{ M2LTranslationBase + +class M2LTranslationBase: + """Base class for Multipole to Local Translation + + .. automethod:: translate + .. automethod:: loopy_translate + .. automethod:: translation_classes_dependent_data + .. automethod:: translation_classes_dependent_ndata + .. automethod:: preprocess_multipole_exprs + .. automethod:: preprocess_multipole_nexprs + .. automethod:: postprocess_local_exprs + .. automethod:: postprocess_local_nexprs + .. autoattribute:: use_fft + .. autoattribute:: use_preprocessing + """ + + use_fft = False + use_preprocessing = False + + def translate(self, tgt_expansion, src_expansion, src_coeff_exprs, src_rscale, + dvec, tgt_rscale, sac=None, translation_classes_dependent_data=None): + raise NotImplementedError + + def loopy_translate(self, tgt_expansion, src_expansion): + raise NotImplementedError( + f"A direct loopy kernel for translation from " + f"{src_expansion} to {tgt_expansion} using {self} is not implemented.") + + def translation_classes_dependent_data(self, tgt_expansion, src_expansion, + src_rscale, dvec, sac) -> Tuple[Any]: + """Return an iterable of expressions that needs to be precomputed + for multipole-to-local translations that depend only on the + distance between the multipole center and the local center which + is given as *dvec*. + + Since there are only a finite number of different values for the + distance between per level, these can be precomputed for the tree. + In :mod:`boxtree`, these distances are referred to as translation + classes. + """ + return tuple() + + def translation_classes_dependent_ndata(self, tgt_expansion, src_expansion): + """Return the number of expressions returned by + :func:`~sumpy.expansion.m2l.M2LTranslationBase.translation_classes_dependent_data`. + This method exists because calculating the number of expressions using + the above method might be costly and + :func:`~sumpy.expansion.m2l.M2LTranslationBase.translation_classes_dependent_data` + cannot be memoized due to it having side effects through the argument + *sac*. + """ + return 0 + + def preprocess_multipole_exprs(self, tgt_expansion, src_expansion, + src_coeff_exprs, sac, src_rscale): + """Return the preprocessed multipole expansion for an optimized M2L. + Preprocessing happens once per source box before M2L translation is done. + + When FFT is turned on, the input expressions are transformed into Fourier + space. These expressions are used in a separate :mod:`loopy` kernel + to avoid having to transform for each target and source box pair. + When FFT is turned off, the expressions are equal to the multipole + expansion coefficients with zeros added + to make the M2L computation a circulant matvec. + """ + raise NotImplementedError + + def preprocess_multipole_nexprs(self, tgt_expansion, src_expansion): + """Return the number of expressions returned by + :func:`~sumpy.expansion.m2l.M2LTranslationBase.preprocess_multipole_exprs`. + This method exists because calculating the number of expressions using + the above method might be costly and it cannot be memoized due to it having + side effects through the argument *sac*. + """ + # For all use-cases we have right now, this is equal to the number of + # translation classes dependent exprs. Use that as a default. + return self.translation_classes_dependent_ndata(tgt_expansion, + src_expansion) + + def postprocess_local_exprs(self, tgt_expansion, src_expansion, m2l_result, + src_rscale, tgt_rscale, sac): + """Return postprocessed local expansion for an optimized M2L. + Postprocessing happens once per target box just after the M2L translation + is done and before storing the expansion coefficients for the local + expansion. + + When FFT is turned on, the output expressions are transformed from Fourier + space back to the original space. + """ + raise NotImplementedError + + def postprocess_local_nexprs(self, tgt_expansion, src_expansion): + """Return the number of expressions given as input to + :func:`~sumpy.expansion.m2l.M2LTranslationBase.postprocess_local_exprs`. + This method exists because calculating the number of expressions using + the above method might be costly and it cannot be memoized due to it + having side effects through the argument *sac*. + """ + # For all use-cases we have right now, this is equal to the number of + # translation classes dependent exprs. Use that as a default. + return self.translation_classes_dependent_ndata(tgt_expansion, + src_expansion) + + def update_persistent_hash(self, key_hash, key_builder): + key_hash.update(type(self).__name__.encode("utf8")) + + +# }}} M2LTranslationBase + +# {{{ VolumeTaylorM2LTranslation + +class VolumeTaylorM2LTranslation(M2LTranslationBase): + def translate(self, tgt_expansion, src_expansion, src_coeff_exprs, src_rscale, + dvec, tgt_rscale, sac=None, translation_classes_dependent_data=None): + + if translation_classes_dependent_data: + derivatives = translation_classes_dependent_data + else: + derivatives = self.translation_classes_dependent_data( + tgt_expansion, src_expansion, src_rscale, dvec, sac=sac) + + src_coeff_exprs = self.preprocess_multipole_exprs( + tgt_expansion, src_expansion, src_coeff_exprs, sac, src_rscale) + + # Returns a big symbolic sum of matrix entries + # (FIXME? Though this is just the correctness-checking + # fallback for the FFT anyhow) + result = matvec_toeplitz_upper_triangular(src_coeff_exprs, + derivatives) + + result = self.postprocess_local_exprs(tgt_expansion, src_expansion, + result, src_rscale, tgt_rscale, sac) + + return result + + def translation_classes_dependent_ndata(self, tgt_expansion, src_expansion): + """Returns number of expressions in M2L global precomputation step. + """ + mis_with_dummy_rows, _, _ = \ + self._translation_classes_dependent_data_mis(tgt_expansion, + src_expansion) + + return len(mis_with_dummy_rows) + + def _translation_classes_dependent_data_mis(self, tgt_expansion, + src_expansion): + """We would like to compute the M2L by way of a circulant matrix below. + To get the matrix representing the M2L into circulant form, a certain + numbering of rows and columns (as identified by multi-indices) is + required. This routine returns that numbering. + + .. note:: + + The set of multi-indices returned may be a superset of the + coefficients used by the expansion. On the input end, those + coefficients are taken as zero. On output, they are simply + dropped from the computed result. + + This method returns the multi-indices representing the rows + of the circulant matrix, the multi-indices representing the rows + of the M2L translation matrix and the maximum multi-index of the + latter. + """ + from pytools import generate_nonnegative_integer_tuples_below as gnitb + from sumpy.tools import add_mi + + dim = tgt_expansion.dim + # max_mi is the multi-index which is the sum of the + # element-wise maximum of source multi-indices and the + # element-wise maximum of target multi-indices. + max_mi = [0]*dim + for i in range(dim): + max_mi[i] = max(mi[i] for mi in + src_expansion.get_coefficient_identifiers()) + max_mi[i] += max(mi[i] for mi in + tgt_expansion.get_coefficient_identifiers()) + + # These are the multi-indices representing the rows + # in the circulant matrix. Note that to get the circulant + # matrix structure some multi-indices that are not in the + # M2L translation matrix are added. + # This corresponds to adding O(p^(d-1)) + # additional rows and columns in the case of some PDEs + # like Laplace and O(p^d) in other cases. + circulant_matrix_mis = list(gnitb([m + 1 for m in max_mi])) + + # These are the multi-indices representing the rows + # in the M2L translation matrix without the additional + # multi-indices in the circulant matrix + needed_vector_terms = set() + # For eg: 2D full Taylor Laplace, we only need kernel derivatives + # (n1+n2, m1+m2), n1+m1<=p, n2+m2<=p + for tgt_deriv in tgt_expansion.get_coefficient_identifiers(): + for src_deriv in src_expansion.get_coefficient_identifiers(): + needed = add_mi(src_deriv, tgt_deriv) + if needed not in needed_vector_terms: + needed_vector_terms.add(needed) + + return circulant_matrix_mis, tuple(needed_vector_terms), max_mi + + def translation_classes_dependent_data(self, tgt_expansion, src_expansion, + src_rscale, dvec, sac): + + # We know the general form of the multipole expansion is: + # + # coeff0 * diff(kernel(src - c1), mi0) + + # coeff1 * diff(kernel(src - c1), mi1) + ... + # + # To get the local expansion coefficients, we take derivatives of + # the multipole expansion. For eg: the coefficient w.r.t mir is + # + # coeff0 * diff(kernel(c2 - c1), mi0 + mir) + + # coeff1 * diff(kernel(c2 - c1), mi1 + mir) + ... + # + # The derivatives above depends only on `c2 - c1` and can be precomputed + # globally as there are only a finite number of values for `c2 - c1` for + # m2l. + + if not tgt_expansion.use_rscale: + src_rscale = 1 + + circulant_matrix_mis, needed_vector_terms, max_mi = \ + self._translation_classes_dependent_data_mis(tgt_expansion, + src_expansion) + + circulant_matrix_ident_to_index = {ident: i for i, ident in + enumerate(circulant_matrix_mis)} + + # Create a expansion terms wrangler for derivatives up to order + # (tgt order)+(src order) including a corresponding reduction matrix + # For eg: 2D full Taylor Laplace, this is (n, m), + # n+m<=2*p, n<=2*p, m<=2*p + srcplusderiv_terms_wrangler = \ + src_expansion.expansion_terms_wrangler.copy( + order=tgt_expansion.order + src_expansion.order, + max_mi=tuple(max_mi)) + srcplusderiv_full_coeff_ids = \ + srcplusderiv_terms_wrangler.get_full_coefficient_identifiers() + srcplusderiv_ident_to_index = {ident: i for i, ident in + enumerate(srcplusderiv_full_coeff_ids)} + + # The vector has the kernel derivatives and depends only on the distance + # between the two centers + taker = src_expansion.kernel.get_derivative_taker(dvec, src_rscale, sac) + vector_stored = [] + # Calculate the kernel derivatives for the compressed set + for term in \ + srcplusderiv_terms_wrangler.get_coefficient_identifiers(): + kernel_deriv = taker.diff(term) + vector_stored.append(kernel_deriv) + # Calculate the kernel derivatives for the full set + vector_full = \ + srcplusderiv_terms_wrangler.get_full_kernel_derivatives_from_stored( + vector_stored, src_rscale) + + for term in srcplusderiv_full_coeff_ids: + assert term in needed_vector_terms + + vector = [0]*len(needed_vector_terms) + for i, term in enumerate(needed_vector_terms): + vector[i] = add_to_sac(sac, + vector_full[srcplusderiv_ident_to_index[term]]) + + # Add zero values needed to make the translation matrix circulant + derivatives_full = [0]*len(circulant_matrix_mis) + for expr, mi in zip(vector, needed_vector_terms): + derivatives_full[circulant_matrix_ident_to_index[mi]] = expr + + return derivatives_full + + def preprocess_multipole_exprs(self, tgt_expansion, src_expansion, + src_coeff_exprs, sac, src_rscale): + circulant_matrix_mis, needed_vector_terms, max_mi = \ + self._translation_classes_dependent_data_mis(tgt_expansion, + src_expansion) + circulant_matrix_ident_to_index = {ident: i for i, ident in + enumerate(circulant_matrix_mis)} + + # Calculate the input vector for the circulant matrix + input_vector = [0] * len(circulant_matrix_mis) + for coeff, term in zip( + src_coeff_exprs, + src_expansion.get_coefficient_identifiers()): + input_vector[circulant_matrix_ident_to_index[term]] = \ + add_to_sac(sac, coeff) + + return input_vector + + def preprocess_multipole_nexprs(self, tgt_expansion, src_expansion): + circulant_matrix_mis, _, _ = \ + self._translation_classes_dependent_data_mis(tgt_expansion, + src_expansion) + return len(circulant_matrix_mis) + + def postprocess_local_exprs(self, tgt_expansion, src_expansion, m2l_result, + src_rscale, tgt_rscale, sac): + circulant_matrix_mis, needed_vector_terms, max_mi = \ + self._translation_classes_dependent_data_mis(tgt_expansion, + src_expansion) + circulant_matrix_ident_to_index = {ident: i for i, ident in + enumerate(circulant_matrix_mis)} + + # Filter out the dummy rows and scale them for target + rscale_ratio = add_to_sac(sac, tgt_rscale/src_rscale) + result = [ + m2l_result[circulant_matrix_ident_to_index[term]] + * rscale_ratio**sum(term) + for term in tgt_expansion.get_coefficient_identifiers()] + + return result + + def postprocess_local_nexprs(self, tgt_expansion, src_expansion): + return self.translation_classes_dependent_ndata( + tgt_expansion, src_expansion) + + +# }}} VolumeTaylorM2LTranslation + +# {{{ VolumeTaylorM2LWithPreprocessedMultipoles + +class VolumeTaylorM2LWithPreprocessedMultipoles(VolumeTaylorM2LTranslation): + use_preprocessing = True + + def translate(self, tgt_expansion, src_expansion, src_coeff_exprs, src_rscale, + dvec, tgt_rscale, sac=None, translation_classes_dependent_data=None): + + assert translation_classes_dependent_data + derivatives = translation_classes_dependent_data + # Returns a big symbolic sum of matrix entries + # (FIXME? Though this is just the correctness-checking + # fallback for the FFT anyhow) + result = matvec_toeplitz_upper_triangular(src_coeff_exprs, + derivatives) + return result + + def loopy_translate(self, tgt_expansion, src_expansion): + ncoeff_src = self.preprocess_multipole_nexprs(tgt_expansion, + src_expansion) + ncoeff_tgt = self.postprocess_local_nexprs(tgt_expansion, src_expansion) + icoeff_src = pymbolic.var("icoeff_src") + icoeff_tgt = pymbolic.var("icoeff_tgt") + domains = [f"{{[icoeff_tgt]: 0<=icoeff_tgt<{ncoeff_tgt} }}"] + + coeff = pymbolic.var("coeff") + src_coeffs = pymbolic.var("src_coeffs") + translation_classes_dependent_data = pymbolic.var("data") + + if self.use_fft: + expr = src_coeffs[icoeff_tgt] \ + * translation_classes_dependent_data[icoeff_tgt] + else: + toeplitz_first_row = src_coeffs[icoeff_src-icoeff_tgt] + vector = translation_classes_dependent_data[icoeff_src] + expr = toeplitz_first_row * vector + domains.append( + f"{{[icoeff_src]: icoeff_tgt<=icoeff_src<{ncoeff_src} }}") + + expr = src_coeffs[icoeff_tgt] \ + * translation_classes_dependent_data[icoeff_tgt] + + insns = [ + lp.Assignment( + assignee=coeff[icoeff_tgt], + expression=coeff[icoeff_tgt] + expr), + ] + return lp.make_function(domains, insns, + kernel_data=[ + lp.GlobalArg("coeff, src_coeffs, data", + shape=lp.auto), + lp.ValueArg("src_rscale, tgt_rscale"), + ...], + name="e2e", + lang_version=lp.MOST_RECENT_LANGUAGE_VERSION, + ) + + +# }}} VolumeTaylorM2LWithPreprocessedMultipoles + +# {{{ VolumeTaylorM2LWithFFT + +class VolumeTaylorM2LWithFFT(VolumeTaylorM2LWithPreprocessedMultipoles): + use_fft = True + + def translate(self, tgt_expansion, src_expansion, src_coeff_exprs, src_rscale, + dvec, tgt_rscale, sac=None, translation_classes_dependent_data=None): + + assert translation_classes_dependent_data + derivatives = translation_classes_dependent_data + print(src_coeff_exprs, derivatives) + assert len(src_coeff_exprs) == len(derivatives) + result = [a*b for a, b in zip(derivatives, src_coeff_exprs)] + return result + + def translation_classes_dependent_data(self, tgt_expansion, src_expansion, + src_rscale, dvec, sac): + + derivatives_full = super().translation_classes_dependent_data( + tgt_expansion, src_expansion, src_rscale, dvec, sac) + # Note that the matrix we have now is a mirror image of a + # circulant matrix. We reverse the first column to get the + # first column for the circulant matrix and then finally + # use the FFT for convolution represented by the circulant + # matrix. + return fft(list(reversed(derivatives_full)), sac=sac) + + def preprocess_multipole_exprs(self, tgt_expansion, src_expansion, + src_coeff_exprs, sac, src_rscale): + input_vector = super().preprocess_multipole_exprs( + tgt_expansion, src_expansion, src_coeff_exprs, sac, src_rscale) + + return fft(input_vector, sac=sac) + + def postprocess_local_exprs(self, tgt_expansion, src_expansion, m2l_result, + src_rscale, tgt_rscale, sac): + circulant_matrix_mis, _, _ = \ + self._translation_classes_dependent_data_mis(tgt_expansion, + src_expansion) + n = len(circulant_matrix_mis) + m2l_result = fft(m2l_result, inverse=True, sac=sac) + # since we reversed the M2L matrix, we reverse the result + # to get the correct result + m2l_result = list(reversed(m2l_result[:n])) + + return super().postprocess_local_exprs(tgt_expansion, + src_expansion, m2l_result, src_rscale, tgt_rscale, sac) + + +# }}} VolumeTaylorM2LWithFFT + +# {{{ FourierBesselM2LTranslation + +class FourierBesselM2LTranslation(M2LTranslationBase): + def translate(self, tgt_expansion, src_expansion, src_coeff_exprs, src_rscale, + dvec, tgt_rscale, sac=None, translation_classes_dependent_data=None): + + if translation_classes_dependent_data is None: + derivatives = self.translation_classes_dependent_data(tgt_expansion, + src_expansion, src_rscale, dvec, sac=sac) + else: + derivatives = translation_classes_dependent_data + + src_coeff_exprs = self.preprocess_multipole_exprs(tgt_expansion, + src_expansion, src_coeff_exprs, sac, src_rscale) + + translated_coeffs = [ + sum(derivatives[m + j + tgt_expansion.order + src_expansion.order] + * src_coeff_exprs[src_expansion.get_storage_index(m)] + for m in src_expansion.get_coefficient_identifiers()) + for j in tgt_expansion.get_coefficient_identifiers()] + + translated_coeffs = self.postprocess_local_exprs(tgt_expansion, + src_expansion, translated_coeffs, src_rscale, tgt_rscale, + sac) + + return translated_coeffs + + def translation_classes_dependent_ndata(self, tgt_expansion, src_expansion): + nexpr = 2 * tgt_expansion.order + 2 * src_expansion.order + 1 + return nexpr + + def translation_classes_dependent_data(self, tgt_expansion, src_expansion, + src_rscale, dvec, sac): + + from sumpy.symbolic import sym_real_norm_2, Hankel1 + + dvec_len = sym_real_norm_2(dvec) + new_center_angle_rel_old_center = sym.atan2(dvec[1], dvec[0]) + arg_scale = tgt_expansion.get_bessel_arg_scaling() + # [-(src_order+tgt_order), ..., 0, ..., (src_order + tgt_order)] + translation_classes_dependent_data = \ + [0] * (2*tgt_expansion.order + 2 * src_expansion.order + 1) + + # The M2L is a mirror image of a Toeplitz matvec with Hankel function + # evaluations. https://dlmf.nist.gov/10.23.F1 + # This loop computes the first row and the last column vector sufficient + # to specify the matrix entries. + for j in tgt_expansion.get_coefficient_identifiers(): + idx_j = tgt_expansion.get_storage_index(j) + for m in src_expansion.get_coefficient_identifiers(): + idx_m = src_expansion.get_storage_index(m) + translation_classes_dependent_data[idx_j + idx_m] = ( + Hankel1(m + j, arg_scale * dvec_len, 0) + * sym.exp(sym.I * (m + j) * new_center_angle_rel_old_center)) + + return translation_classes_dependent_data + + def preprocess_multipole_exprs(self, tgt_expansion, src_expansion, + src_coeff_exprs, sac, src_rscale): + + src_coeff_exprs = list(src_coeff_exprs) + for m in src_expansion.get_coefficient_identifiers(): + src_coeff_exprs[src_expansion.get_storage_index(m)] *= src_rscale**abs(m) + return src_coeff_exprs + + def preprocess_multipole_nexprs(self, tgt_expansion, src_expansion): + return 2*src_expansion.order + 1 + + def postprocess_local_exprs(self, tgt_expansion, src_expansion, + m2l_result, src_rscale, tgt_rscale, sac): + + # Filter out the dummy rows and scale them for target + result = [] + for j in tgt_expansion.get_coefficient_identifiers(): + result.append(m2l_result[tgt_expansion.get_storage_index(j)] + * tgt_rscale**(abs(j)) * sym.Integer(-1)**j) + + return result + + def postprocess_local_nexprs(self, tgt_expansion, src_expansion): + return 2*tgt_expansion.order + 1 + + +# }}} FourierBesselM2LTranslation + +# {{{ FourierBesselM2LWithPreprocessedMultipoles + +class FourierBesselM2LWithPreprocessedMultipoles(FourierBesselM2LTranslation): + use_preprocessing = True + + def translate(self, tgt_expansion, src_expansion, src_coeff_exprs, src_rscale, + dvec, tgt_rscale, sac=None, translation_classes_dependent_data=None): + + assert translation_classes_dependent_data + derivatives = translation_classes_dependent_data + + translated_coeffs = [ + sum(derivatives[m + j + tgt_expansion.order + src_expansion.order] + * src_coeff_exprs[src_expansion.get_storage_index(m)] + for m in src_expansion.get_coefficient_identifiers()) + for j in tgt_expansion.get_coefficient_identifiers()] + + return translated_coeffs + + def loopy_translate(self, tgt_expansion, src_expansion): + ncoeff_src = self.preprocess_multipole_nexprs(src_expansion) + ncoeff_tgt = self.postprocess_local_nexprs(src_expansion) + + icoeff_src = pymbolic.var("icoeff_src") + icoeff_tgt = pymbolic.var("icoeff_tgt") + domains = [f"{{[icoeff_tgt]: 0<=icoeff_tgt<{ncoeff_tgt} }}"] + + coeff = pymbolic.var("coeff") + src_coeffs = pymbolic.var("src_coeffs") + translation_classes_dependent_data = pymbolic.var("data") + + if self.use_fft_for_m2l: + expr = src_coeffs[icoeff_tgt] \ + * translation_classes_dependent_data[icoeff_tgt] + else: + expr = src_coeffs[icoeff_src] \ + * translation_classes_dependent_data[ + icoeff_tgt + icoeff_src] + domains.append( + f"{{[icoeff_src]: 0<=icoeff_src<{ncoeff_src} }}") + + insns = [ + lp.Assignment( + assignee=coeff[icoeff_tgt], + expression=coeff[icoeff_tgt] + expr), + ] + return lp.make_function(domains, insns, + kernel_data=[ + lp.GlobalArg("coeff, src_coeffs, data", + shape=lp.auto), + lp.ValueArg("src_rscale, tgt_rscale"), + ...], + name="e2e", + lang_version=lp.MOST_RECENT_LANGUAGE_VERSION, + ) + + +# }}} FourierBesselM2LWithPreprocessedMultipoles + +# {{{ FourierBesselM2LWithFFT + +class FourierBesselM2LWithFFT(FourierBesselM2LWithPreprocessedMultipoles): + use_fft = True + + def __init__(self): + # FIXME: expansion with FFT is correct symbolically and can be verified + # with sympy. However there are numerical issues that we have to deal + # with. Greengard and Rokhlin 1988 attributes this to numerical + # instability but gives rscale as a possible solution. Sumpy's rscale + # choice is slightly different from Greengard and Rokhlin and that + # might be the reason for this numerical issue. + raise ValueError("Bessel based expansions with FFT is not fully " + "supported yet.") + + def translate(self, tgt_expansion, src_expansion, src_coeff_exprs, src_rscale, + dvec, tgt_rscale, sac=None, translation_classes_dependent_data=None): + + assert translation_classes_dependent_data is not None + derivatives = translation_classes_dependent_data + assert len(derivatives) == len(src_coeff_exprs) + return [a * b for a, b in zip(derivatives, src_coeff_exprs)] + + def loopy_translate(self, tgt_expansion, src_expansion): + raise NotImplementedError + + def translation_classes_dependent_data(self, tgt_expansion, src_expansion, + src_rscale, dvec, sac): + + from sumpy.tools import fft + translation_classes_dependent_data = \ + super().translation_classes_dependent_data(tgt_expansion, + src_expansion, src_rscale, dvec, sac) + order = src_expansion.order + # For this expansion, we have a mirror image of a Toeplitz matrix. + # First, we have to take the mirror image of the M2L matrix. + # + # After that the Toeplitz matrix has to be embedded in a circulant + # matrix. In this cicrcular matrix the first part of the first + # column is the first column of the Toeplitz matrix which is + # the last column of the M2L matrix. The second part is the + # reverse of the first row of the Toeplitz matrix which + # is the reverse of the first row of the M2L matrix. + first_row_m2l, last_column_m2l = \ + translation_classes_dependent_data[:2*order], \ + translation_classes_dependent_data[2*order:] + first_column_toeplitz = last_column_m2l + first_row_toeplitz = list(reversed(first_row_m2l)) + + first_column_circulant = list(first_column_toeplitz) + \ + list(reversed(first_row_toeplitz)) + return fft(first_column_circulant, sac) + + def preprocess_multipole_exprs(self, tgt_expansion, src_expansion, + src_coeff_exprs, sac, src_rscale): + + from sumpy.tools import fft + result = super().preprocess_multipole_exprs(tgt_expansion, + src_expansion, src_coeff_exprs, sac, src_rscale) + + result = list(reversed(result)) + result += [0] * (len(result) - 1) + return fft(result, sac=sac) + + def postprocess_local_exprs(self, tgt_expansion, src_expansion, + m2l_result, src_rscale, tgt_rscale, sac): + + m2l_result = fft(m2l_result, inverse=True, sac=sac) + m2l_result = m2l_result[:2*tgt_expansion.order+1] + return super().postprocess_local_exprs(tgt_expansion, + src_expansion, m2l_result, src_rscale, tgt_rscale, sac) + +# }}} FourierBesselM2LWithFFT +# vim: fdm=marker diff --git a/sumpy/fmm.py b/sumpy/fmm.py index 52cb9a60..519b1200 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -62,8 +62,7 @@ class SumpyTreeIndependentDataForWrangler(TreeIndependentDataForWrangler): multipole_expansion_factory, local_expansion_factory, target_kernels, exclude_self=False, use_rscale=None, - strength_usage=None, source_kernels=None, - use_fft_for_m2l=False, use_preprocessing_for_m2l=None): + strength_usage=None, source_kernels=None): """ :arg multipole_expansion_factory: a callable of a single argument (order) that returns a multipole expansion. @@ -73,10 +72,6 @@ class SumpyTreeIndependentDataForWrangler(TreeIndependentDataForWrangler): :arg exclude_self: whether the self contribution should be excluded :arg strength_usage: passed unchanged to p2l, p2m and p2p. :arg source_kernels: passed unchanged to p2l, p2m and p2p. - :arg use_fft_for_m2l: Use an FFT based multipole-to-local expansion. - :arg use_preprocessing_for_m2l: do preprocessing of the source multipole - expansion and postprocessing of the target local expansion for - multipole-to-local expansion. """ self.multipole_expansion_factory = multipole_expansion_factory self.local_expansion_factory = local_expansion_factory @@ -85,11 +80,6 @@ class SumpyTreeIndependentDataForWrangler(TreeIndependentDataForWrangler): self.exclude_self = exclude_self self.use_rscale = use_rscale self.strength_usage = strength_usage - self.use_fft_for_m2l = use_fft_for_m2l - if use_preprocessing_for_m2l is None: - self.use_preprocessing_for_m2l = use_fft_for_m2l - else: - self.use_preprocessing_for_m2l = use_preprocessing_for_m2l super().__init__() @@ -106,9 +96,11 @@ class SumpyTreeIndependentDataForWrangler(TreeIndependentDataForWrangler): @memoize_method def local_expansion(self, order): - return self.local_expansion_factory(order, self.use_rscale, - use_fft_for_m2l=self.use_fft_for_m2l, - use_preprocessing_for_m2l=self.use_preprocessing_for_m2l) + return self.local_expansion_factory(order, self.use_rscale) + + @property + def m2l_translation(self): + return self.local_expansion(0).m2l_translation @memoize_method def p2m(self, tgt_order): @@ -274,7 +266,7 @@ class SumpyExpansionWrangler(ExpansionWranglerInterface): self.dtype = dtype - if not self.tree_indep.use_fft_for_m2l: + if not self.tree_indep.m2l_translation.use_fft: # If not FFT, we don't need complex dtypes self.preprocessed_mpole_dtype = dtype elif preprocessed_mpole_dtype is not None: @@ -320,16 +312,19 @@ class SumpyExpansionWrangler(ExpansionWranglerInterface): self.supports_translation_classes = True self.translation_classes_data = translation_classes_data - self.use_fft_for_m2l = self.tree_indep.use_fft_for_m2l def level_to_rscale(self, level): tree = self.tree order = self.level_orders[level] + r = tree.root_extent * (2**-level) # See L. Greengard and V. Rokhlin. On the efficient implementation of the # fast multipole algorithm. Technical report, # YALE UNIV NEW HAVEN CT DEPT OF COMPUTER SCIENCE, 1988. - return tree.root_extent * (2**-level) / order + # rscale that we use in sumpy is the inverse of the scaling used in the + # paper and therefore we should use r / order. However empirically + # we have observed that 2r / order is better for numerical stability. + return r * 2 / order # {{{ data vector utilities @@ -358,7 +353,9 @@ class SumpyExpansionWrangler(ExpansionWranglerInterface): def order_to_size(order): mpole_expn = self.tree_indep.multipole_expansion(order) local_expn = self.tree_indep.local_expansion(order) - return local_expn.m2l_translation_classes_dependent_ndata(mpole_expn) + m2l_translation = local_expn.m2l_translation + return m2l_translation.translation_classes_dependent_ndata( + local_expn, mpole_expn) return build_csr_level_starts(self.level_orders, order_to_size, level_starts=self.m2l_translation_class_level_start_box_nrs()) @@ -429,7 +426,8 @@ class SumpyExpansionWrangler(ExpansionWranglerInterface): def order_to_size(order): mpole_expn = self.tree_indep.multipole_expansion(order) local_expn = self.tree_indep.local_expansion(order) - res = local_expn.m2l_preprocess_multipole_nexprs(mpole_expn) + res = local_expn.m2l_translation.preprocess_multipole_nexprs( + local_expn, mpole_expn) return res return build_csr_level_starts(self.level_orders, order_to_size, @@ -723,7 +721,7 @@ class SumpyExpansionWrangler(ExpansionWranglerInterface): queue = mpole_exps.queue local_exps = self.local_expansion_zeros(mpole_exps) - if self.tree_indep.use_preprocessing_for_m2l: + if self.tree_indep.m2l_translation.use_preprocessing: preprocessed_mpole_exps = \ self.m2l_preproc_mpole_expansion_zeros(mpole_exps) for lev in range(self.tree.nlevels): @@ -803,7 +801,7 @@ class SumpyExpansionWrangler(ExpansionWranglerInterface): postprocess_evts = [] - if self.tree_indep.use_preprocessing_for_m2l: + if self.tree_indep.m2l_translation.use_preprocessing: for lev in range(self.tree.nlevels): order = self.level_orders[lev] postprocess_local_kernel = \ diff --git a/sumpy/toys.py b/sumpy/toys.py index 3b8876db..ded4a54e 100644 --- a/sumpy/toys.py +++ b/sumpy/toys.py @@ -25,6 +25,7 @@ THE SOFTWARE. from pytools import memoize_method from numbers import Number +from functools import partial from sumpy.kernel import TargetTransformationRemover import numpy as np # noqa: F401 @@ -103,8 +104,15 @@ class ToyContext: mpole_expn_class = \ expansion_factory.get_multipole_expansion_class(kernel) if local_expn_class is None: + from sumpy.expansion.m2l import NonFFTM2LTranslationClassFactory local_expn_class = \ expansion_factory.get_local_expansion_class(kernel) + m2l_translation_class_factory = NonFFTM2LTranslationClassFactory() + m2l_translation_class = \ + m2l_translation_class_factory.get_m2l_translation_class( + kernel, local_expn_class) + local_expn_class = partial(local_expn_class, + m2l_translation=m2l_translation_class()) self.mpole_expn_class = mpole_expn_class self.local_expn_class = local_expn_class diff --git a/test/test_fmm.py b/test/test_fmm.py index 8e7bc329..cbf66e6d 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -185,11 +185,21 @@ def test_sumpy_fmm(ctx_factory, knl, local_expn_class, mpole_expn_class, for order in order_values: target_kernels = [knl] + if use_fft: + from sumpy.expansion.m2l import FFTM2LTranslationClassFactory + m2l_translation_factory = FFTM2LTranslationClassFactory() + else: + from sumpy.expansion.m2l import NonFFTM2LTranslationClassFactory + m2l_translation_factory = NonFFTM2LTranslationClassFactory() + + m2l_translation = m2l_translation_factory.get_m2l_translation_class( + knl, local_expn_class)() + tree_indep = SumpyTreeIndependentDataForWrangler( ctx, partial(mpole_expn_class, knl), - partial(local_expn_class, knl), - target_kernels, use_fft_for_m2l=use_fft) + partial(local_expn_class, knl, m2l_translation=m2l_translation), + target_kernels) if order_varies_with_level: def fmm_level_to_order(kernel, kernel_args, tree, lev): diff --git a/test/test_kernels.py b/test/test_kernels.py index 1e0db7ce..f2d89b86 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -36,6 +36,7 @@ from sumpy.expansion.multipole import ( from sumpy.expansion.local import ( VolumeTaylorLocalExpansion, H2DLocalExpansion, LinearPDEConformingVolumeTaylorLocalExpansion) +from sumpy.expansion.m2l import NonFFTM2LTranslationClassFactory from sumpy.kernel import (LaplaceKernel, HelmholtzKernel, AxisTargetDerivative, DirectionalSourceDerivative, BiharmonicKernel, StokesletKernel) import sumpy.symbolic as sym @@ -560,9 +561,12 @@ def test_translations(ctx_factory, knl, local_expn_class, mpole_expn_class): return pot + m2l_factory = NonFFTM2LTranslationClassFactory() + m2l_translation = m2l_factory.get_m2l_translation_class(knl, local_expn_class)() + for order in orders: m_expn = mpole_expn_class(knl, order=order) - l_expn = local_expn_class(knl, order=order) + l_expn = local_expn_class(knl, order=order, m2l_translation=m2l_translation) from sumpy import P2EFromSingleBox, E2PFromSingleBox, P2P, E2EFromCSR p2m = P2EFromSingleBox(ctx, m_expn) @@ -843,8 +847,10 @@ def test_m2l_toeplitz(): knl = LaplaceKernel(dim) local_expn_class = LinearPDEConformingVolumeTaylorLocalExpansion mpole_expn_class = LinearPDEConformingVolumeTaylorMultipoleExpansion + m2l_factory = NonFFTM2LTranslationClassFactory() + m2l_translation = m2l_factory.get_m2l_translation_class(knl, local_expn_class)() - local_expn = local_expn_class(knl, order=5) + local_expn = local_expn_class(knl, order=5, m2l_translation=m2l_translation) mpole_expn = mpole_expn_class(knl, order=5) dvec = sym.make_sym_vector("d", dim) diff --git a/test/test_misc.py b/test/test_misc.py index 42956bc6..9aeb7f12 100644 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -267,14 +267,12 @@ def test_toy_p2e2e2p(ctx_factory, case): raise ValueError( f"convergence factor not in valid range: {case_conv_factor}") - from sumpy.expansion.local import VolumeTaylorLocalExpansion - from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansion + from sumpy.expansion import VolumeTaylorExpansionFactory cl_ctx = ctx_factory() ctx = t.ToyContext(cl_ctx, LaplaceKernel(dim), - VolumeTaylorMultipoleExpansion, - VolumeTaylorLocalExpansion) + expansion_factory=VolumeTaylorExpansionFactory()) errors = [] -- GitLab