diff --git a/sumpy/__init__.py b/sumpy/__init__.py index 6555fd2b9b3899ea36bc35ff811442b8b79ef7be..31699b03738b2b82bf61d1ab47c13d8197ae35a3 100644 --- a/sumpy/__init__.py +++ b/sumpy/__init__.py @@ -24,7 +24,8 @@ import os from sumpy.p2p import P2P, P2PFromCSR from sumpy.p2e import P2EFromSingleBox, P2EFromCSR from sumpy.e2p import E2PFromSingleBox, E2PFromCSR -from sumpy.e2e import E2EFromCSR, E2EFromChildren, E2EFromParent +from sumpy.e2e import (E2EFromCSR, E2EFromChildren, E2EFromParent, + E2EFromCSRTranslationInvariant, E2EFromCSRTranslationClassesPrecompute) from sumpy.version import VERSION_TEXT from pytools.persistent_dict import WriteOncePersistentDict @@ -32,7 +33,8 @@ __all__ = [ "P2P", "P2PFromCSR", "P2EFromSingleBox", "P2EFromCSR", "E2PFromSingleBox", "E2PFromCSR", - "E2EFromCSR", "E2EFromChildren", "E2EFromParent"] + "E2EFromCSR", "E2EFromChildren", "E2EFromParent", + "E2EFromCSRTranslationInvariant", "E2EFromCSRTranslationClassesPrecompute"] code_cache = WriteOncePersistentDict("sumpy-code-cache-v6-"+VERSION_TEXT) diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 5f17fbce8a74cf39f893f0aa6b563849978cbbe0..236090f3481566f75e2f259d291401e13eb195ac 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -247,6 +247,259 @@ class E2EFromCSR(E2EBase): src_rscale=src_rscale, tgt_rscale=tgt_rscale, **kwargs) + +class E2EFromCSRTranslationInvariant(E2EFromCSR): + """Implements translation from a "compressed sparse row"-like source box + list for translation invariant Kernels. + """ + default_name = "e2e_from_csr_translation_invariant" + + def get_translation_loopy_insns(self): + from sumpy.symbolic import make_sym_vector + dvec = make_sym_vector("d", self.dim) + + src_coeff_exprs = [sym.Symbol("src_coeff%d" % i) + for i in range(len(self.src_expansion))] + src_rscale = sym.Symbol("src_rscale") + + tgt_rscale = sym.Symbol("tgt_rscale") + + nprecomputed_exprs = \ + self.tgt_expansion.m2l_global_precompute_nexpr(self.src_expansion) + + precomputed_exprs = [sym.Symbol("precomputed_expr%d" % i) + for i in range(nprecomputed_exprs)] + + from sumpy.assignment_collection import SymbolicAssignmentCollection + sac = SymbolicAssignmentCollection() + tgt_coeff_names = [ + sac.assign_unique("coeff%d" % i, coeff_i) + for i, coeff_i in enumerate( + self.tgt_expansion.translate_from( + self.src_expansion, src_coeff_exprs, src_rscale, + dvec, tgt_rscale, sac, + precomputed_exprs=precomputed_exprs))] + + sac.run_global_cse() + + from sumpy.codegen import to_loopy_insns + return to_loopy_insns( + sac.assignments.items(), + vector_names=set(["d"]), + pymbolic_expr_maps=[self.tgt_expansion.get_code_transformer()], + retain_names=tgt_coeff_names, + complex_dtype=np.complex128 # FIXME + ) + + def get_kernel(self): + ncoeff_src = len(self.src_expansion) + ncoeff_tgt = len(self.tgt_expansion) + nprecomputed_exprs = \ + self.tgt_expansion.m2l_global_precompute_nexpr(self.src_expansion) + + # To clarify terminology: + # + # isrc_box -> The index in a list of (in this case, source) boxes + # src_ibox -> The (global) box number for the (in this case, source) box + # + # (same for itgt_box, tgt_ibox) + + from sumpy.tools import gather_loopy_arguments + loopy_knl = lp.make_kernel( + [ + "{[itgt_box]: 0<=itgt_box tgt_ibox = target_boxes[itgt_box] + + <> tgt_center[idim] = centers[idim, tgt_ibox] \ + + <> isrc_start = src_box_starts[itgt_box] + <> isrc_stop = src_box_starts[itgt_box+1] + + for isrc_box + <> src_ibox = src_box_lists[isrc_box] \ + {id=read_src_ibox} + + <> src_center[idim] = centers[idim, src_ibox] {dup=idim} + <> d[idim] = tgt_center[idim] - src_center[idim] \ + {dup=idim} + <> translation_class = \ + m2l_translation_classes_lists[isrc_box] + <> translation_class_rel = translation_class - \ + translation_classes_level_start + """] + [""" + <> precomputed_expr{idx} = \ + m2l_precomputed_exprs[translation_class_rel, {idx}] + """.format(idx=idx) for idx in range( + nprecomputed_exprs)] + [""" + <> src_coeff{coeffidx} = \ + src_expansions[src_ibox - src_base_ibox, {coeffidx}] \ + {{dep=read_src_ibox}} + """.format(coeffidx=i) for i in range(ncoeff_src)] + [ + + ] + self.get_translation_loopy_insns() + [""" + end + + """] + [""" + tgt_expansions[tgt_ibox - tgt_base_ibox, {coeffidx}] = \ + simul_reduce(sum, isrc_box, coeff{coeffidx}) \ + {{id_prefix=write_expn}} + """.format(coeffidx=i) for i in range(ncoeff_tgt)] + [""" + end + """], + [ + lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), + lp.ValueArg("src_rscale,tgt_rscale", None), + lp.GlobalArg("src_box_starts, src_box_lists", + None, shape=None, strides=(1,), offset=lp.auto), + lp.ValueArg("aligned_nboxes,tgt_base_ibox,src_base_ibox", + np.int32), + lp.ValueArg("nsrc_level_boxes,ntgt_level_boxes", + np.int32), + lp.ValueArg("translation_classes_level_start", + np.int32), + lp.GlobalArg("src_expansions", None, + shape=("nsrc_level_boxes", ncoeff_src), offset=lp.auto), + lp.GlobalArg("tgt_expansions", None, + shape=("ntgt_level_boxes", ncoeff_tgt), offset=lp.auto), + lp.ValueArg("ntranslation_classes, ntranslation_classes_lists", + np.int32), + lp.GlobalArg("m2l_translation_classes_lists", np.int32, + shape=("ntranslation_classes_lists"), strides=(1,), + offset=lp.auto), + lp.GlobalArg("m2l_precomputed_exprs", None, + shape=("ntranslation_classes", nprecomputed_exprs), + offset=lp.auto), + "..." + ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), + name=self.name, + assumptions="ntgt_boxes>=1", + silenced_warnings="write_race(write_expn*)", + default_offset=lp.auto, + fixed_parameters=dict(dim=self.dim, + nprecomputed_exprs=nprecomputed_exprs), + lang_version=MOST_RECENT_LANGUAGE_VERSION + ) + + for knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: + loopy_knl = knl.prepare_loopy_kernel(loopy_knl) + + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") + loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) + + return loopy_knl + + +class E2EFromCSRTranslationClassesPrecompute(E2EFromCSR): + """Implements precomputing the translation classes dependent + derivatives. + """ + default_name = "e2e_from_csr_translation_classes_precompute" + + def get_translation_loopy_insns(self): + from sumpy.symbolic import make_sym_vector + dvec = make_sym_vector("d", self.dim) + + src_rscale = sym.Symbol("src_rscale") + tgt_rscale = sym.Symbol("tgt_rscale") + + from sumpy.assignment_collection import SymbolicAssignmentCollection + sac = SymbolicAssignmentCollection() + tgt_coeff_names = [ + sac.assign_unique("precomputed_expr%d" % i, coeff_i) + for i, coeff_i in enumerate( + self.tgt_expansion.m2l_global_precompute_exprs( + self.src_expansion, src_rscale, + dvec, tgt_rscale, sac))] + + sac.run_global_cse() + + from sumpy.codegen import to_loopy_insns + return to_loopy_insns( + sac.assignments.items(), + vector_names=set(["d"]), + pymbolic_expr_maps=[self.tgt_expansion.get_code_transformer()], + retain_names=tgt_coeff_names, + complex_dtype=np.complex128 # FIXME + ) + + def get_kernel(self): + nprecomputed_exprs = \ + self.tgt_expansion.m2l_global_precompute_nexpr(self.src_expansion) + from sumpy.tools import gather_loopy_arguments + loopy_knl = lp.make_kernel( + [ + "{[itr_class]: 0<=itr_class d[idim] = m2l_translation_vectors[idim, \ + itr_class + translation_classes_level_start] {dup=idim} + + """] + self.get_translation_loopy_insns() + [""" + m2l_precomputed_exprs[itr_class, {idx}] = precomputed_expr{idx} + """.format(idx=i) for i in range(nprecomputed_exprs)] + [""" + end + """], + [ + lp.ValueArg("src_rscale", None), + lp.GlobalArg("m2l_precomputed_exprs", None, + shape=("ntranslation_classes", nprecomputed_exprs), + offset=lp.auto), + lp.GlobalArg("m2l_translation_vectors", None, + shape=("dim", "ntranslation_vectors")), + lp.ValueArg("ntranslation_classes", np.int32), + lp.ValueArg("ntranslation_vectors", np.int32), + lp.ValueArg("translation_classes_level_start", np.int32), + "..." + ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), + name=self.name, + assumptions="ntranslation_classes>=1", + default_offset=lp.auto, + fixed_parameters=dict(dim=self.dim), + lang_version=MOST_RECENT_LANGUAGE_VERSION + ) + + for knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: + loopy_knl = knl.prepare_loopy_kernel(loopy_knl) + + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") + loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) + + return loopy_knl + + def get_optimized_kernel(self): + # FIXME + knl = self.get_kernel() + knl = lp.split_iname(knl, "itr_class", 16, outer_tag="g.0") + + return knl + + def __call__(self, queue, **kwargs): + """ + :arg src_rscale: + :arg translation_classes_level_start: + :arg ntranslation_classes: + :arg m2l_precomputed_exprs: + :arg m2l_translation_vectors: + """ + knl = self.get_cached_optimized_kernel() + + m2l_translation_vectors = kwargs.pop("m2l_translation_vectors") + # "1" may be passed for rscale, which won't have its type + # meaningfully inferred. Make the type of rscale explicit. + src_rscale = m2l_translation_vectors.dtype.type(kwargs.pop("src_rscale")) + + return knl(queue, + src_rscale=src_rscale, + m2l_translation_vectors=m2l_translation_vectors, + **kwargs) + # }}} diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index 18d965308791ed924a7e12b9637161b9e223d95a..9c79c0737bde90e7909a5eead2629a58e05ade10 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -28,7 +28,7 @@ from sumpy.expansion import ( HelmholtzConformingVolumeTaylorExpansion, BiharmonicConformingVolumeTaylorExpansion) -from sumpy.tools import mi_increment_axis +from sumpy.tools import mi_increment_axis, matvec_toeplitz_upper_triangular from pytools import single_valued @@ -179,8 +179,96 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): return kernel.postprocess_at_target(result, bvec) + def m2l_global_precompute_nexpr(self, src_expansion): + return len(self.m2l_global_precompute_mis(src_expansion)[1]) + + def m2l_global_precompute_mis(self, src_expansion): + from pytools import generate_nonnegative_integer_tuples_below as gnitb + from sumpy.tools import add_mi + + max_mi = [0]*self.dim + for i in range(self.dim): + max_mi[i] = max(mi[i] for mi in + src_expansion.get_coefficient_identifiers()) + max_mi[i] += max(mi[i] for mi in + self.get_coefficient_identifiers()) + + toeplitz_matrix_coeffs = list(gnitb([m + 1 for m in max_mi])) + + needed_vector_terms = [] + # For eg: 2D full Taylor Laplace, we only need kernel derivatives + # (n1+n2, m1+m2), n1+m1<=p, n2+m2<=p + for tgt_deriv in self.get_coefficient_identifiers(): + for src_deriv in src_expansion.get_coefficient_identifiers(): + needed = add_mi(src_deriv, tgt_deriv) + if needed not in needed_vector_terms: + needed_vector_terms.append(needed) + + return toeplitz_matrix_coeffs, needed_vector_terms, max_mi + + def m2l_global_precompute_exprs(self, src_expansion, src_rscale, + dvec, tgt_rscale, sac): + # We know the general form of the multipole expansion is: + # + # coeff0 * diff(kernel(src - c1), mi0) + + # coeff1 * diff(kernel(src - c1, mi1) + ... + # + # To get the local expansion coefficients, we take derivatives of + # the multipole expansion. For eg: the coefficient w.r.t mir is + # + # coeff0 * diff(kernel(c2 - c1), mi0 + mir) + + # coeff1 * diff(kernel(c2 - c1, mi1 + mir) + ... + # + # The derivatives above depends only on `c2 - c1` and can be precomputed + # globally as there are only a finite number of values for `c2 - c1` for + # m2l. + + if not self.use_rscale: + src_rscale = 1 + + from sumpy.tools import add_mi + + toeplitz_matrix_coeffs, needed_vector_terms, max_mi = \ + self.m2l_global_precompute_mis(src_expansion) + + # Create a expansion terms wrangler for derivatives up to order + # (tgt order)+(src order) including a corresponding reduction matrix + # For eg: 2D full Taylor Laplace, this is (n, m), + # n+m<=2*p, n<=2*p, m<=2*p + srcplusderiv_terms_wrangler = \ + src_expansion.expansion_terms_wrangler.copy( + order=self.order + src_expansion.order, max_mi=tuple(max_mi)) + srcplusderiv_full_coeff_ids = \ + srcplusderiv_terms_wrangler.get_full_coefficient_identifiers() + srcplusderiv_ident_to_index = dict((ident, i) for i, ident in + enumerate(srcplusderiv_full_coeff_ids)) + + # The vector has the kernel derivatives and depends only on the distance + # between the two centers + taker = src_expansion.get_kernel_derivative_taker(dvec, src_rscale, sac) + vector_stored = [] + # Calculate the kernel derivatives for the compressed set + for term in \ + srcplusderiv_terms_wrangler.get_coefficient_identifiers(): + kernel_deriv = taker.diff(term) + vector_stored.append(kernel_deriv) + # Calculate the kernel derivatives for the full set + vector_full = \ + srcplusderiv_terms_wrangler.get_full_kernel_derivatives_from_stored( + vector_stored, src_rscale) + + for term in srcplusderiv_full_coeff_ids: + assert term in needed_vector_terms + + vector = [0]*len(needed_vector_terms) + for i, term in enumerate(needed_vector_terms): + vector[i] = add_to_sac(sac, + vector_full[srcplusderiv_ident_to_index[term]]) + return vector + def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, - dvec, tgt_rscale, sac=None, _fast_version=True): + dvec, tgt_rscale, sac=None, _fast_version=True, + precomputed_exprs=None): logger.info("building translation operator for %s: %s(%d) -> %s(%d): start" % (src_expansion.kernel, type(src_expansion).__name__, @@ -194,79 +282,44 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansionBase if isinstance(src_expansion, VolumeTaylorMultipoleExpansionBase): - # We know the general form of the multipole expansion is: - # - # coeff0 * diff(kernel, mi0) + coeff1 * diff(kernel, mi1) + ... - # - # To get the local expansion coefficients, we take derivatives of - # the multipole expansion. - # - # This code speeds up derivative taking by caching all kernel - # derivatives. - - taker = src_expansion.get_kernel_derivative_taker(dvec=dvec, sac=sac, - rscale=src_rscale) - - from sumpy.tools import add_mi - - # Calculate a elementwise maximum multi-index because the number - # of multi-indices needed is much less than - # gnitstam(src_order + tgt order) when PDE conforming expansions - # are used. For full Taylor, there's no difference. - - def calc_max_mi(mis): - return (max(mi[i] for mi in mis) for i in range(self.dim)) - - src_max_mi = calc_max_mi(src_expansion.get_coefficient_identifiers()) - tgt_max_mi = calc_max_mi(self.get_coefficient_identifiers()) - max_mi = add_mi(src_max_mi, tgt_max_mi) - - # Create a expansion terms wrangler for derivatives up to order - # (tgt order)+(src order) including a corresponding reduction matrix - tgtplusderiv_exp_terms_wrangler = \ - src_expansion.expansion_terms_wrangler.copy( - order=self.order + src_expansion.order, max_mi=tuple(max_mi)) - tgtplusderiv_coeff_ids = \ - tgtplusderiv_exp_terms_wrangler.get_coefficient_identifiers() - tgtplusderiv_full_coeff_ids = \ - tgtplusderiv_exp_terms_wrangler.get_full_coefficient_identifiers() - - tgtplusderiv_ident_to_index = {ident: i for i, ident in - enumerate(tgtplusderiv_full_coeff_ids)} - + toeplitz_matrix_coeffs, needed_vector_terms, max_mi = \ + self.m2l_global_precompute_mis(src_expansion) + toeplitz_matrix_ident_to_index = dict((ident, i) for i, ident in + enumerate(toeplitz_matrix_coeffs)) + + if precomputed_exprs is None: + derivatives = self.m2l_global_precompute_exprs(src_expansion, + src_rscale, dvec, tgt_rscale, sac) + else: + derivatives = precomputed_exprs + + derivatives_full = [0]*len(toeplitz_matrix_coeffs) + for expr, mi in zip(derivatives, needed_vector_terms): + derivatives_full[toeplitz_matrix_ident_to_index[mi]] = expr + + # Calculate the first row of the upper triangular Toeplitz matrix + toeplitz_first_row = [0] * len(toeplitz_matrix_coeffs) + for coeff, term in zip( + src_coeff_exprs, + src_expansion.get_coefficient_identifiers()): + toeplitz_first_row[toeplitz_matrix_ident_to_index[term]] = \ + add_to_sac(sac, coeff) + + # Do the matvec + output = matvec_toeplitz_upper_triangular(toeplitz_first_row, + derivatives_full) + + # Filter out the dummy rows and scale them for target result = [] - for lexp_mi in self.get_coefficient_identifiers(): - lexp_mi_terms = [] - - # Embed the source coefficients in the coefficient array - # for the (tgt order)+(src order) wrangler, offset by lexp_mi. - embedded_coeffs = [0] * len(tgtplusderiv_full_coeff_ids) - for coeff, term in zip( - src_coeff_exprs, - src_expansion.get_coefficient_identifiers()): - embedded_coeffs[ - tgtplusderiv_ident_to_index[add_mi(lexp_mi, term)]] \ - = coeff - - # Compress the embedded coefficient set - stored_coeffs = tgtplusderiv_exp_terms_wrangler \ - .get_stored_mpole_coefficients_from_full( - embedded_coeffs, src_rscale, sac=sac) - - # Sum the embedded coefficient set - for tgtplusderiv_coeff_id, coeff in zip(tgtplusderiv_coeff_ids, - stored_coeffs): - if coeff == 0: - continue - kernel_deriv = taker.diff(tgtplusderiv_coeff_id) - rscale_ratio = tgt_rscale / src_rscale - lexp_mi_terms.append( - coeff * kernel_deriv * rscale_ratio**sum(lexp_mi)) - result.append(sym.Add(*lexp_mi_terms)) + rscale_ratio = add_to_sac(sac, tgt_rscale/src_rscale) + for term in self.get_coefficient_identifiers(): + index = toeplitz_matrix_ident_to_index[term] + result.append(output[index]*rscale_ratio**sum(term)) + logger.info("building translation operator: done") return result - rscale_ratio = sym.UnevaluatedExpr(tgt_rscale/src_rscale) + rscale_ratio = add_to_sac(sac, tgt_rscale/src_rscale) from math import factorial src_wrangler = src_expansion.expansion_terms_wrangler diff --git a/sumpy/fmm.py b/sumpy/fmm.py index 5b61a0c0559b0c794537174761eb1fbda9f2c12e..b8387dcfe8643fb95321ff8df21efd2de6b537ff 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -36,7 +36,8 @@ from sumpy import ( P2EFromSingleBox, P2EFromCSR, E2PFromSingleBox, E2PFromCSR, P2PFromCSR, - E2EFromCSR, E2EFromChildren, E2EFromParent) + E2EFromCSR, E2EFromChildren, E2EFromParent, + E2EFromCSRTranslationInvariant, E2EFromCSRTranslationClassesPrecompute) def level_to_rscale(tree, level): @@ -121,6 +122,18 @@ class SumpyExpansionWranglerCodeContainer: self.multipole_expansion(src_order), self.local_expansion(tgt_order)) + @memoize_method + def m2l_optimized(self, src_order, tgt_order): + return E2EFromCSRTranslationInvariant(self.cl_context, + self.multipole_expansion(src_order), + self.local_expansion(tgt_order)) + + @memoize_method + def m2l_optimized_precompute_kernel(self, src_order, tgt_order): + return E2EFromCSRTranslationClassesPrecompute(self.cl_context, + self.multipole_expansion(src_order), + self.local_expansion(tgt_order)) + @memoize_method def l2l(self, src_order, tgt_order): return E2EFromParent(self.cl_context, @@ -149,9 +162,11 @@ class SumpyExpansionWranglerCodeContainer: def get_wrangler(self, queue, tree, dtype, fmm_level_to_order, source_extra_kwargs={}, kernel_extra_kwargs=None, - self_extra_kwargs=None): + self_extra_kwargs=None, + translation_classes_data=None): return SumpyExpansionWrangler(self, queue, tree, dtype, fmm_level_to_order, - source_extra_kwargs, kernel_extra_kwargs, self_extra_kwargs) + source_extra_kwargs, kernel_extra_kwargs, self_extra_kwargs, + translation_classes_data=translation_classes_data) # }}} @@ -204,6 +219,56 @@ class SumpyTimingFuture: # }}} +# {{{ translation classes data + +class SumpyTranslationClassesData: + """A class for building and storing additional, optional data for + precomputation of translation classes passed to the expansion wrangler.""" + + def __init__(self, queue, trav, is_translation_per_level=True): + self.queue = queue + self.trav = trav + self.tree = trav.tree + self.is_translation_per_level = is_translation_per_level + + @property + @memoize_method + def translation_classes_builder(self): + from boxtree.translation_classes import TranslationClassesBuilder + return TranslationClassesBuilder(self.queue.context) + + @memoize_method + def build_translation_classes_lists(self): + trav = self.trav.to_device(self.queue) + tree = self.tree.to_device(self.queue) + return self.translation_classes_builder(self.queue, trav, tree, + is_translation_per_level=self.is_translation_per_level)[0] + + @memoize_method + def m2l_translation_classes_lists(self): + return (self + .build_translation_classes_lists() + .from_sep_siblings_translation_classes) + + @memoize_method + def m2l_translation_vectors(self): + return (self + .build_translation_classes_lists() + .from_sep_siblings_translation_class_to_distance_vector) + + @memoize_method + def m2l_translation_classes_level_starts(self): + return (self + .build_translation_classes_lists() + .from_sep_siblings_translation_classes_level_starts) + + +class SumpyTranslationClassesDataNotSuppliedWarning(UserWarning): + pass + +# }}} + + # {{{ expansion wrangler class SumpyExpansionWrangler: @@ -230,7 +295,8 @@ class SumpyExpansionWrangler: def __init__(self, code_container, queue, tree, dtype, fmm_level_to_order, source_extra_kwargs, kernel_extra_kwargs=None, - self_extra_kwargs=None): + self_extra_kwargs=None, + translation_classes_data=None): self.code = code_container self.queue = queue self.tree = tree @@ -260,14 +326,31 @@ class SumpyExpansionWrangler: self.extra_kwargs = source_extra_kwargs.copy() self.extra_kwargs.update(self.kernel_extra_kwargs) + if base_kernel.is_translation_invariant: + if translation_classes_data is None: + from warnings import warn + warn( + "List 2 (multipole-to-local) translations will be " + "unoptimized. Supply a translation_classes_data argument to " + "the wrangler for optimized List 2.", + SumpyTranslationClassesDataNotSuppliedWarning, + + stacklevel=2) + self.supports_optimized_m2l = False + else: + self.supports_optimized_m2l = True + else: + self.supports_optimized_m2l = False + + self.translation_classes_data = translation_classes_data + + # {{{ data vector utilities - def _expansions_level_starts(self, order_to_size): + def _expansions_level_starts(self, order_to_size, level_starts): result = [0] for lev in range(self.tree.nlevels): - lev_nboxes = ( - self.tree.level_start_box_nrs[lev+1] - - self.tree.level_start_box_nrs[lev]) + lev_nboxes = level_starts[lev+1] - level_starts[lev] expn_size = order_to_size(self.level_orders[lev]) result.append( @@ -279,12 +362,29 @@ class SumpyExpansionWrangler: @memoize_method def multipole_expansions_level_starts(self): return self._expansions_level_starts( - lambda order: len(self.code.multipole_expansion_factory(order))) + lambda order: len(self.code.multipole_expansion_factory(order)), + level_starts=self.tree.level_start_box_nrs) @memoize_method def local_expansions_level_starts(self): return self._expansions_level_starts( - lambda order: len(self.code.local_expansion_factory(order))) + lambda order: len(self.code.local_expansion_factory(order)), + level_starts=self.tree.level_start_box_nrs) + + @memoize_method + def m2l_translation_class_level_start_box_nrs(self): + data = self.translation_classes_data + return data.m2l_translation_classes_level_starts().get(self.queue) + + @memoize_method + def m2l_precomputed_exprs_level_starts(self): + def order_to_size(order): + mpole_expn = self.code.multipole_expansion_factory(order) + local_expn = self.code.local_expansion_factory(order) + return local_expn.m2l_global_precompute_nexpr(mpole_expn) + + return self._expansions_level_starts(order_to_size, + level_starts=self.m2l_translation_class_level_start_box_nrs()) def multipole_expansion_zeros(self): return cl.array.zeros( @@ -298,6 +398,12 @@ class SumpyExpansionWrangler: self.local_expansions_level_starts()[-1], dtype=self.dtype) + def m2l_precomputed_exprs_zeros(self): + return cl.array.zeros( + self.queue, + self.m2l_precomputed_exprs_level_starts()[-1], + dtype=self.dtype) + def multipole_expansions_view(self, mpole_exps, level): expn_start, expn_stop = \ self.multipole_expansions_level_starts()[level:level+2] @@ -314,6 +420,16 @@ class SumpyExpansionWrangler: return (box_start, local_exps[expn_start:expn_stop].reshape(box_stop-box_start, -1)) + def m2l_precomputed_exprs_view(self, m2l_precomputed_exprs, level): + expn_start, expn_stop = \ + self.m2l_precomputed_exprs_level_starts()[level:level+2] + translation_class_start, translation_class_stop = \ + self.m2l_translation_class_level_start_box_nrs()[level:level+2] + + exprs_level = m2l_precomputed_exprs[expn_start:expn_stop] + return (translation_class_start, exprs_level.reshape( + translation_class_stop - translation_class_start, -1)) + def output_zeros(self): from pytools.obj_array import make_obj_array return make_obj_array([ @@ -480,6 +596,69 @@ class SumpyExpansionWrangler: return (pot, SumpyTimingFuture(self.queue, events)) + @memoize_method + def _multipole_to_local_precompute(self, src_rscale): + m2l_precomputed_exprs = self.m2l_precomputed_exprs_zeros() + events = [] + for lev in range(self.tree.nlevels): + order = self.level_orders[lev] + precompute_kernel = \ + self.code.m2l_optimized_precompute_kernel(order, order) + + translation_classes_level_start, precomputed_exprs_view = \ + self.m2l_precomputed_exprs_view(m2l_precomputed_exprs, lev) + + ntranslation_classes = precomputed_exprs_view.shape[0] + + if ntranslation_classes == 0: + events.append([]) + continue + + m2l_translation_vectors = ( + self.translation_classes_data.m2l_translation_vectors()) + + evt, _ = precompute_kernel( + self.queue, + src_rscale=src_rscale, + translation_classes_level_start=translation_classes_level_start, + ntranslation_classes=ntranslation_classes, + m2l_precomputed_exprs=precomputed_exprs_view, + m2l_translation_vectors=m2l_translation_vectors, + ntranslation_vectors=m2l_translation_vectors.shape[1], + **self.kernel_extra_kwargs + ) + + events.append([evt]) + + return m2l_precomputed_exprs, events + + def multipole_to_local_precompute(self, info, lev): + """Adds to info dict, the information needed for a multipole-to-local + translation with precomputation. Returns true if the level `lev` + has translations to do. If the wrangler does not support an optimized + translation, returns true. + """ + if not self.supports_optimized_m2l: + return True + src_rscale = info["src_rscale"] + m2l_precomputed_exprs, events = \ + self._multipole_to_local_precompute(src_rscale) + translation_classes_level_start, precomputed_exprs_view = \ + self.m2l_precomputed_exprs_view(m2l_precomputed_exprs, lev) + info["m2l_precomputed_exprs"] = precomputed_exprs_view + info["translation_classes_level_start"] = translation_classes_level_start + info["m2l_translation_classes_lists"] = \ + self.translation_classes_data.m2l_translation_classes_lists() + info["wait_for"] = events[lev] + return precomputed_exprs_view.shape[0] != 0 + + @property + def m2l_class(self): + if self.supports_optimized_m2l: + return self.code.m2l_optimized + else: + return self.code.m2l + def multipole_to_local(self, level_start_target_box_nrs, target_boxes, src_box_starts, src_box_lists, @@ -494,16 +673,14 @@ class SumpyExpansionWrangler: continue order = self.level_orders[lev] - m2l = self.code.m2l(order, order) + m2l = self.m2l_class(order, order) source_level_start_ibox, source_mpoles_view = \ self.multipole_expansions_view(mpole_exps, lev) target_level_start_ibox, target_local_exps_view = \ self.local_expansions_view(local_exps, lev) - evt, (local_exps_res,) = m2l( - self.queue, - + kwargs = dict( src_expansions=source_mpoles_view, src_base_ibox=source_level_start_ibox, tgt_expansions=target_local_exps_view, @@ -518,6 +695,11 @@ class SumpyExpansionWrangler: tgt_rscale=level_to_rscale(self.tree, lev), **self.kernel_extra_kwargs) + + if not self.multipole_to_local_precompute(kwargs, lev): + continue + evt, _ = m2l(self.queue, **kwargs) + events.append(evt) return (local_exps, SumpyTimingFuture(self.queue, events)) diff --git a/sumpy/kernel.py b/sumpy/kernel.py index 55b30c8aec946fea89618c8fbeee3bbc2ec1c243..1891d3070a6962fbe168372b45a6789e310f524e 100644 --- a/sumpy/kernel.py +++ b/sumpy/kernel.py @@ -116,6 +116,7 @@ class Kernel: """Basic kernel interface. .. attribute:: is_complex_valued + .. attribute:: is_translation_invariant .. attribute:: dim .. automethod:: get_base_kernel @@ -288,6 +289,9 @@ class Kernel: """ return [] + # TODO: Allow kernels that are not translation invariant + is_translation_invariant = True + # }}} diff --git a/sumpy/tools.py b/sumpy/tools.py index e38a19d32c3a0c49f7a054f9ebaa3dc6f5bd0f15..2b2ae9948dcfff84e801466db70ef98373f71811 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -89,7 +89,6 @@ def mi_power(vector, mi, evaluate=True): def add_to_sac(sac, expr): import sumpy.symbolic as sym if sac is None: - raise RuntimeError("") return expr if isinstance(expr, (sym.Number, sym.Symbol)): @@ -1040,4 +1039,13 @@ def find_linear_relationship(matrix): mat[j, :] = mat[j, :]*mat[i, col] - mat[i, :]*mat[j, col] return {} +def matvec_toeplitz_upper_triangular(first_row, vector): + n = len(first_row) + assert len(vector) == n + output = [0]*n + for row in range(n): + terms = tuple(first_row[col-row]*vector[col] for col in range(row, n)) + output[row] = sym.Add(*terms) + return output + # vim: fdm=marker diff --git a/test/test_fmm.py b/test/test_fmm.py index 45e9bcd36a62c883402a27a716529e694b946da3..c51b81657d99f98aa781160056552e81cd692d45 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -53,23 +53,36 @@ else: faulthandler.enable() -@pytest.mark.parametrize("knl, local_expn_class, mpole_expn_class", [ - (LaplaceKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion), +@pytest.mark.parametrize("knl, local_expn_class, mpole_expn_class, optimized_m2l", [ + (LaplaceKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion, + False), + (LaplaceKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion, + True), (LaplaceKernel(2), LaplaceConformingVolumeTaylorLocalExpansion, - LaplaceConformingVolumeTaylorMultipoleExpansion), - (LaplaceKernel(3), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion), + LaplaceConformingVolumeTaylorMultipoleExpansion, True), + (LaplaceKernel(2), LaplaceConformingVolumeTaylorLocalExpansion, + LaplaceConformingVolumeTaylorMultipoleExpansion, False), + (LaplaceKernel(3), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion, + False), + (LaplaceKernel(3), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion, + True), + (LaplaceKernel(3), LaplaceConformingVolumeTaylorLocalExpansion, + LaplaceConformingVolumeTaylorMultipoleExpansion, True), (LaplaceKernel(3), LaplaceConformingVolumeTaylorLocalExpansion, - LaplaceConformingVolumeTaylorMultipoleExpansion), - (HelmholtzKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion), + LaplaceConformingVolumeTaylorMultipoleExpansion, False), + (HelmholtzKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion, + False), (HelmholtzKernel(2), HelmholtzConformingVolumeTaylorLocalExpansion, - HelmholtzConformingVolumeTaylorMultipoleExpansion), - (HelmholtzKernel(2), H2DLocalExpansion, H2DMultipoleExpansion), - (HelmholtzKernel(3), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion), + HelmholtzConformingVolumeTaylorMultipoleExpansion , False), + (HelmholtzKernel(2), H2DLocalExpansion, H2DMultipoleExpansion, False), + (HelmholtzKernel(3), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion, + False), (HelmholtzKernel(3), HelmholtzConformingVolumeTaylorLocalExpansion, - HelmholtzConformingVolumeTaylorMultipoleExpansion), - (YukawaKernel(2), Y2DLocalExpansion, Y2DMultipoleExpansion), + HelmholtzConformingVolumeTaylorMultipoleExpansion, False), + (YukawaKernel(2), Y2DLocalExpansion, Y2DMultipoleExpansion, False), ]) -def test_sumpy_fmm(ctx_factory, knl, local_expn_class, mpole_expn_class): +def test_sumpy_fmm(ctx_factory, knl, local_expn_class, mpole_expn_class, + optimized_m2l): logging.basicConfig(level=logging.INFO) ctx = ctx_factory() @@ -167,15 +180,24 @@ def test_sumpy_fmm(ctx_factory, knl, local_expn_class, mpole_expn_class): for order in order_values: target_kernels = [knl] - from sumpy.fmm import SumpyExpansionWranglerCodeContainer + from sumpy.fmm import (SumpyExpansionWranglerCodeContainer, + SumpyTranslationClassesData) + + if optimized_m2l: + translation_classes_data = SumpyTranslationClassesData(queue, trav) + else: + translation_classes_data = None + wcc = SumpyExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), target_kernels) + wrangler = wcc.get_wrangler(queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order, - kernel_extra_kwargs=extra_kwargs) + kernel_extra_kwargs=extra_kwargs, + translation_classes_data=translation_classes_data) from boxtree.fmm import drive_fmm diff --git a/test/test_kernels.py b/test/test_kernels.py index 658816a9f922fc4875ecac1801224f529554f0e5..9dd48c0653f75f6591e57957fb7ce0c2f882a517 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -42,6 +42,7 @@ from sumpy.expansion.local import ( BiharmonicConformingVolumeTaylorLocalExpansion) from sumpy.kernel import (LaplaceKernel, HelmholtzKernel, AxisTargetDerivative, DirectionalSourceDerivative, BiharmonicKernel, StokesletKernel) +import sumpy.symbolic as sym from pytools.convergence import PConvergenceVerifier import logging @@ -800,6 +801,72 @@ def test_m2m_and_l2l_exprs_simpler(base_knl, local_expn_class, mpole_expn_class, assert _check_equal(expr1, expr2) +# {{{ test toeplitz + +def _m2l_translate_simple(tgt_expansion, src_expansion, src_coeff_exprs, src_rscale, + dvec, tgt_rscale): + + if not tgt_expansion.use_rscale: + src_rscale = 1 + tgt_rscale = 1 + + from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansionBase + if not isinstance(src_expansion, VolumeTaylorMultipoleExpansionBase): + return 1 + + # We know the general form of the multipole expansion is: + # + # coeff0 * diff(kernel, mi0) + coeff1 * diff(kernel, mi1) + ... + # + # To get the local expansion coefficients, we take derivatives of + # the multipole expansion. + taker = src_expansion.get_kernel_derivative_taker(dvec, src_rscale, sac=None) + + from sumpy.tools import add_mi + + result = [] + for deriv in tgt_expansion.get_coefficient_identifiers(): + local_result = [] + for coeff, term in zip( + src_coeff_exprs, + src_expansion.get_coefficient_identifiers()): + + kernel_deriv = taker.diff(add_mi(deriv, term)) / src_rscale**sum(deriv) + + local_result.append( + coeff * kernel_deriv * tgt_rscale**sum(deriv)) + result.append(sym.Add(*local_result)) + return result + + +def test_m2l_toeplitz(): + dim = 3 + knl = LaplaceKernel(dim) + local_expn_class = LaplaceConformingVolumeTaylorLocalExpansion + mpole_expn_class = LaplaceConformingVolumeTaylorMultipoleExpansion + + local_expn = local_expn_class(knl, order=5) + mpole_expn = mpole_expn_class(knl, order=5) + + dvec = sym.make_sym_vector("d", dim) + src_coeff_exprs = list(1 + np.random.randn(len(mpole_expn))) + src_rscale = 2.0 + tgt_rscale = 1.0 + + expected_output = _m2l_translate_simple(local_expn, mpole_expn, src_coeff_exprs, + src_rscale, dvec, tgt_rscale) + actual_output = local_expn.translate_from(mpole_expn, src_coeff_exprs, + src_rscale, dvec, tgt_rscale, sac=None) + + replace_dict = dict((d, np.random.rand(1)[0]) for d in dvec) + for sym_a, sym_b in zip(expected_output, actual_output): + num_a = sym_a.xreplace(replace_dict) + num_b = sym_b.xreplace(replace_dict) + assert abs(num_a - num_b)/abs(num_a) < 1e-10 + +# }}} + + # You can test individual routines by typing # $ python test_kernels.py 'test_p2p(cl.create_some_context)'