diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py index 2d6cbf409b73b83023e75731738033cc219cddca..068fea2393875f7b58904f088f7d58e8939e09b3 100644 --- a/sumpy/expansion/multipole.py +++ b/sumpy/expansion/multipole.py @@ -165,39 +165,77 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): src_mi_to_index = dict((mi, i) for i, mi in enumerate( src_expansion.get_coefficient_identifiers())) + tgt_mi_to_index = dict((mi, i) for i, mi in enumerate( + self.get_full_coefficient_identifiers())) + + src_coeff_exprs = list(src_coeff_exprs) for i, mi in enumerate(src_expansion.get_coefficient_identifiers()): - src_coeff_exprs[i] *= mi_factorial(mi) + src_coeff_exprs[i] *= mi_factorial(mi) * \ + sym.UnevaluatedExpr(src_rscale/tgt_rscale)**sum(mi) result = [0] * len(self.get_full_coefficient_identifiers()) - from pytools import generate_nonnegative_integer_tuples_below as gnitb - - for i, tgt_mi in enumerate( - self.get_full_coefficient_identifiers()): - tgt_mi_plus_one = tuple(mi_i + 1 for mi_i in tgt_mi) + # This algorithm uses the observation that M2M coefficients + # have the following form in 2D + # + # $B_{m, n} = \sum_{i\le m, j\le n} A_{i, j} + # d_x^i d_y^j \binom{m}{i} \binom{n}{j}$ + # and can be rewritten as follows. + # + # Let $T_{m, n} = \sum_{i\le m} A_{i, n} d_x^i \binom{m}{i}$. + # + # Then, $B_{m, n} = \sum_{j\le n} T_{m, j} d_y^j \binom{n}{j}$. + # + # $T_{m, n}$ are $p^2$ number of temporary variables that are + # reused for different M2M coefficients and costs $p$ per variable. + # Total cost for calculating $T_{m, n}$ is $p^3$ and similar + # for $B_{m, n}$ + + # In other words, we're better off computing the translation + # one dimension at a time. This is realized here by using + # the output from one dimension as the input to the next. + # per_dim_coeffs_to_translate serves as the array of input + # coefficients for each dimension's translation. + + dim_coeffs_to_translate = src_coeff_exprs + + mi_to_index = src_mi_to_index + for d in range(self.dim): + result = [0] * len(self.get_full_coefficient_identifiers()) + for i, tgt_mi in enumerate( + self.get_full_coefficient_identifiers()): + + src_mis_per_dim = [] + for mi_i in range(tgt_mi[d]+1): + new_mi = list(tgt_mi) + new_mi[d] = mi_i + src_mis_per_dim.append(tuple(new_mi)) + + for src_mi in src_mis_per_dim: + try: + src_index = mi_to_index[src_mi] + except KeyError: + # Omitted coefficients: not life-threatening + continue - for src_mi in gnitb(tgt_mi_plus_one): - try: - src_index = src_mi_to_index[src_mi] - except KeyError: - # Omitted coefficients: not life-threatening - continue + contrib = dim_coeffs_to_translate[src_index] - contrib = src_coeff_exprs[src_index] + for idim in range(self.dim): + n = tgt_mi[idim] + k = src_mi[idim] + assert n >= k + from sympy import binomial + contrib *= (binomial(n, k) + * sym.UnevaluatedExpr(dvec[idim]/tgt_rscale)**(n-k)) - for idim in range(self.dim): - n = tgt_mi[idim] - k = src_mi[idim] - assert n >= k - from sympy import binomial - contrib *= (binomial(n, k) - * sym.UnevaluatedExpr(dvec[idim]/tgt_rscale)**(n-k)) + result[i] += contrib - result[i] += ( - contrib - * sym.UnevaluatedExpr(src_rscale/tgt_rscale)**sum(src_mi)) + # Defer division by target factorial until the very end + if d == self.dim-1: + result[i] /= mi_factorial(tgt_mi) - result[i] /= mi_factorial(tgt_mi) + dim_coeffs_to_translate = result[:] + mi_to_index = tgt_mi_to_index logger.info("building translation operator: done") return (