From 9b917db20a82514dcad85de648a5912947d8b5f6 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Tue, 3 Dec 2019 19:16:00 -0600 Subject: [PATCH 1/4] Asymptotically better algorithm for M2M On master, M2M is $`O(p^2d)`$ for order p and dimension d, but CSE reduces this down to $`O(p^{2d-1})`$ sometimes. For eg: in Helmholtz 2D, full taylor is $`O(p^{2d-1})`$ and HelmholtzConformingTaylor is $`O(p^{2d-1.5})`$. This commit produces expressions in $`O(p^{2d-1})`$ consistently regardless of how good CSE is and $`O(p^{2d-2})`$ for compressed. The new algorithm uses the observation that M2M coefficients have the form in 2D, $`B_{m, n} = \sum_{i\le m, j\le n} A_{i, j} d_x^i d_y^j \binom{m}{i} \binom{n}{j}`$ and can be rewritten as follows, Let $`T_{m, n} = \sum_{i\le m} A_{i, n} d_x^i \binom{m}{i}`$. Then, $`B_{m, n} = \sum_{j\le n} T_{m, j} d_y^j \binom{n}{j}`$ and $`T_{m, n}`$ are $`p^2`$ number of temporary variables that are reused for different M2M coefficients and costs $`p`$ per variable. Total cost for calculating $`T_{m, n}`$ is $`p^3`$ and similar for $`B_{m, n}`$ --- sumpy/expansion/multipole.py | 69 ++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 23 deletions(-) diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py index 2d6cbf40..aa797b73 100644 --- a/sumpy/expansion/multipole.py +++ b/sumpy/expansion/multipole.py @@ -169,35 +169,58 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): src_coeff_exprs[i] *= mi_factorial(mi) result = [0] * len(self.get_full_coefficient_identifiers()) - from pytools import generate_nonnegative_integer_tuples_below as gnitb - for i, tgt_mi in enumerate( - self.get_full_coefficient_identifiers()): - - tgt_mi_plus_one = tuple(mi_i + 1 for mi_i in tgt_mi) + r""" + This algorithm uses the observation that M2M coefficients + have the following form in 2D, + + $`B_{m, n} = \sum_{i\le m, j\le n} A_{i, j} d_x^i d_y^j \binom{m}{i} \binom{n}{j}`$ + and can be rewritten as follows. + + Let $`T_{m, n} = \sum_{i\le m} A_{i, n} d_x^i \binom{m}{i}`$. + + Then, $`B_{m, n} = \sum_{j\le n} T_{m, j} d_y^j \binom{n}{j}`$. + + $`T_{m, n}`$ are $`p^2`$ number of temporary variables that are + reused for different M2M coefficients and costs $`p`$ per variable. + Total cost for calculating $`T_{m, n}`$ is $`p^3`$ and similar for $`B_{m, n}`$ + """ + for d in range(self.dim): + result = [0] * len(self.get_full_coefficient_identifiers()) + for i, tgt_mi in enumerate( + self.get_full_coefficient_identifiers()): + + src_mis_per_dim = [] + for mi_i in range(tgt_mi[d]+1): + new_mi = list(tgt_mi) + new_mi[d] = mi_i + src_mis_per_dim.append(tuple(new_mi)) + + for src_mi in src_mis_per_dim: + try: + src_index = src_mi_to_index[src_mi] + except KeyError: + # Omitted coefficients: not life-threatening + continue - for src_mi in gnitb(tgt_mi_plus_one): - try: - src_index = src_mi_to_index[src_mi] - except KeyError: - # Omitted coefficients: not life-threatening - continue + contrib = src_coeff_exprs[src_index] - contrib = src_coeff_exprs[src_index] + for idim in range(self.dim): + n = tgt_mi[idim] + k = src_mi[idim] + assert n >= k + from sympy import binomial + contrib *= (binomial(n, k) + * sym.UnevaluatedExpr(dvec[idim]/tgt_rscale)**(n-k)) - for idim in range(self.dim): - n = tgt_mi[idim] - k = src_mi[idim] - assert n >= k - from sympy import binomial - contrib *= (binomial(n, k) - * sym.UnevaluatedExpr(dvec[idim]/tgt_rscale)**(n-k)) + result[i] += ( + contrib + * sym.UnevaluatedExpr(src_rscale/tgt_rscale)**sum(src_mi)) - result[i] += ( - contrib - * sym.UnevaluatedExpr(src_rscale/tgt_rscale)**sum(src_mi)) + if d == self.dim-1: + result[i] /= mi_factorial(tgt_mi) - result[i] /= mi_factorial(tgt_mi) + src_coeff_exprs = result[:] logger.info("building translation operator: done") return ( -- GitLab From a125a889d3435f88b2d6e3dc32d1af171dfe58b5 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Tue, 3 Dec 2019 22:52:09 -0600 Subject: [PATCH 2/4] Fix conforming expansions --- sumpy/expansion/multipole.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py index aa797b73..eb6dc0fe 100644 --- a/sumpy/expansion/multipole.py +++ b/sumpy/expansion/multipole.py @@ -165,6 +165,9 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): src_mi_to_index = dict((mi, i) for i, mi in enumerate( src_expansion.get_coefficient_identifiers())) + tgt_mi_to_index = dict((mi, i) for i, mi in enumerate( + self.get_full_coefficient_identifiers())) + for i, mi in enumerate(src_expansion.get_coefficient_identifiers()): src_coeff_exprs[i] *= mi_factorial(mi) @@ -185,6 +188,7 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): reused for different M2M coefficients and costs $`p`$ per variable. Total cost for calculating $`T_{m, n}`$ is $`p^3`$ and similar for $`B_{m, n}`$ """ + mi_to_index = src_mi_to_index for d in range(self.dim): result = [0] * len(self.get_full_coefficient_identifiers()) for i, tgt_mi in enumerate( @@ -198,7 +202,7 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): for src_mi in src_mis_per_dim: try: - src_index = src_mi_to_index[src_mi] + src_index = mi_to_index[src_mi] except KeyError: # Omitted coefficients: not life-threatening continue @@ -221,6 +225,7 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): result[i] /= mi_factorial(tgt_mi) src_coeff_exprs = result[:] + mi_to_index = tgt_mi_to_index logger.info("building translation operator: done") return ( -- GitLab From af8b7f398ba8dffc1d0ce5a03f4b5a3842dc0919 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Tue, 3 Dec 2019 23:54:08 -0600 Subject: [PATCH 3/4] Fix rscale --- sumpy/expansion/multipole.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py index eb6dc0fe..5e34d7da 100644 --- a/sumpy/expansion/multipole.py +++ b/sumpy/expansion/multipole.py @@ -168,8 +168,10 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): tgt_mi_to_index = dict((mi, i) for i, mi in enumerate( self.get_full_coefficient_identifiers())) + src_coeff_exprs = list(src_coeff_exprs) for i, mi in enumerate(src_expansion.get_coefficient_identifiers()): - src_coeff_exprs[i] *= mi_factorial(mi) + src_coeff_exprs[i] *= mi_factorial(mi) * \ + sym.UnevaluatedExpr(src_rscale/tgt_rscale)**sum(mi) result = [0] * len(self.get_full_coefficient_identifiers()) @@ -177,16 +179,18 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): This algorithm uses the observation that M2M coefficients have the following form in 2D, - $`B_{m, n} = \sum_{i\le m, j\le n} A_{i, j} d_x^i d_y^j \binom{m}{i} \binom{n}{j}`$ + $B_{m, n} = \sum_{i\le m, j\le n} A_{i, j} + d_x^i d_y^j \binom{m}{i} \binom{n}{j}$ and can be rewritten as follows. - Let $`T_{m, n} = \sum_{i\le m} A_{i, n} d_x^i \binom{m}{i}`$. + Let $T_{m, n} = \sum_{i\le m} A_{i, n} d_x^i \binom{m}{i}$. - Then, $`B_{m, n} = \sum_{j\le n} T_{m, j} d_y^j \binom{n}{j}`$. + Then, $B_{m, n} = \sum_{j\le n} T_{m, j} d_y^j \binom{n}{j}$. - $`T_{m, n}`$ are $`p^2`$ number of temporary variables that are - reused for different M2M coefficients and costs $`p`$ per variable. - Total cost for calculating $`T_{m, n}`$ is $`p^3`$ and similar for $`B_{m, n}`$ + $T_{m, n}$ are $p^2$ number of temporary variables that are + reused for different M2M coefficients and costs $p$ per variable. + Total cost for calculating $T_{m, n}$ is $p^3$ and similar + for $B_{m, n}$ """ mi_to_index = src_mi_to_index for d in range(self.dim): @@ -217,9 +221,7 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): contrib *= (binomial(n, k) * sym.UnevaluatedExpr(dvec[idim]/tgt_rscale)**(n-k)) - result[i] += ( - contrib - * sym.UnevaluatedExpr(src_rscale/tgt_rscale)**sum(src_mi)) + result[i] += contrib if d == self.dim-1: result[i] /= mi_factorial(tgt_mi) -- GitLab From b8246dca326ae04941138f0b7f05afb1a9f325dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= Date: Wed, 4 Dec 2019 17:46:55 +0100 Subject: [PATCH 4/4] Some readability improvements for the new M2M algorithm --- sumpy/expansion/multipole.py | 44 +++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py index 5e34d7da..068fea23 100644 --- a/sumpy/expansion/multipole.py +++ b/sumpy/expansion/multipole.py @@ -175,23 +175,30 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): result = [0] * len(self.get_full_coefficient_identifiers()) - r""" - This algorithm uses the observation that M2M coefficients - have the following form in 2D, + # This algorithm uses the observation that M2M coefficients + # have the following form in 2D + # + # $B_{m, n} = \sum_{i\le m, j\le n} A_{i, j} + # d_x^i d_y^j \binom{m}{i} \binom{n}{j}$ + # and can be rewritten as follows. + # + # Let $T_{m, n} = \sum_{i\le m} A_{i, n} d_x^i \binom{m}{i}$. + # + # Then, $B_{m, n} = \sum_{j\le n} T_{m, j} d_y^j \binom{n}{j}$. + # + # $T_{m, n}$ are $p^2$ number of temporary variables that are + # reused for different M2M coefficients and costs $p$ per variable. + # Total cost for calculating $T_{m, n}$ is $p^3$ and similar + # for $B_{m, n}$ + + # In other words, we're better off computing the translation + # one dimension at a time. This is realized here by using + # the output from one dimension as the input to the next. + # per_dim_coeffs_to_translate serves as the array of input + # coefficients for each dimension's translation. + + dim_coeffs_to_translate = src_coeff_exprs - $B_{m, n} = \sum_{i\le m, j\le n} A_{i, j} - d_x^i d_y^j \binom{m}{i} \binom{n}{j}$ - and can be rewritten as follows. - - Let $T_{m, n} = \sum_{i\le m} A_{i, n} d_x^i \binom{m}{i}$. - - Then, $B_{m, n} = \sum_{j\le n} T_{m, j} d_y^j \binom{n}{j}$. - - $T_{m, n}$ are $p^2$ number of temporary variables that are - reused for different M2M coefficients and costs $p$ per variable. - Total cost for calculating $T_{m, n}$ is $p^3$ and similar - for $B_{m, n}$ - """ mi_to_index = src_mi_to_index for d in range(self.dim): result = [0] * len(self.get_full_coefficient_identifiers()) @@ -211,7 +218,7 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): # Omitted coefficients: not life-threatening continue - contrib = src_coeff_exprs[src_index] + contrib = dim_coeffs_to_translate[src_index] for idim in range(self.dim): n = tgt_mi[idim] @@ -223,10 +230,11 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): result[i] += contrib + # Defer division by target factorial until the very end if d == self.dim-1: result[i] /= mi_factorial(tgt_mi) - src_coeff_exprs = result[:] + dim_coeffs_to_translate = result[:] mi_to_index = tgt_mi_to_index logger.info("building translation operator: done") -- GitLab