diff --git a/examples/curve-pot.py b/examples/curve-pot.py index 3bed529c359898aac2a4da20f0c09038572298e5..134d7d34a3555480cc60f2869d7cd57269524902 100644 --- a/examples/curve-pot.py +++ b/examples/curve-pot.py @@ -14,17 +14,19 @@ except ModuleNotFoundError: def process_kernel(knl, what_operator): + target_knl = knl + source_knl = knl if what_operator == "S": pass elif what_operator == "S0": from sumpy.kernel import TargetDerivative - knl = TargetDerivative(0, knl) + target_knl = TargetDerivative(0, knl) elif what_operator == "S1": from sumpy.kernel import TargetDerivative - knl = TargetDerivative(1, knl) + target_knl = TargetDerivative(1, knl) elif what_operator == "D": from sumpy.kernel import DirectionalSourceDerivative - knl = DirectionalSourceDerivative(knl) + source_knl = DirectionalSourceDerivative(knl) # DirectionalTargetDerivative (temporarily?) removed # elif what_operator == "S'": # from sumpy.kernel import DirectionalTargetDerivative @@ -32,7 +34,7 @@ def process_kernel(knl, what_operator): else: raise RuntimeError("unrecognized operator '%s'" % what_operator) - return knl + return source_knl, target_knl def draw_pot_figure(aspect_ratio, @@ -83,15 +85,17 @@ def draw_pot_figure(aspect_ratio, expn_class = LineTaylorLocalExpansion knl_kwargs = {} - vol_knl = process_kernel(knl, what_operator) - p2p = P2P(ctx, [vol_knl], exclude_self=False, + vol_source_knl, vol_target_knl = process_kernel(knl, what_operator) + p2p = P2P(ctx, source_kernels=(vol_source_knl,), + target_kernels=(vol_target_knl,), + exclude_self=False, value_dtypes=np.complex128) - lpot_knl = process_kernel(knl, what_operator_lpot) + lpot_source_knl, lpot_target_knl = process_kernel(knl, what_operator_lpot) from sumpy.qbx import LayerPotential - lpot = LayerPotential(ctx, [ - expn_class(lpot_knl, order=order)], + lpot = LayerPotential(ctx, expansion=expn_class(knl, order=order), + source_kernels=(lpot_source_knl,), target_kernels=(lpot_target_knl,), value_dtypes=np.complex128) # }}} diff --git a/sumpy/assignment_collection.py b/sumpy/assignment_collection.py index 9078c7aad409589ecb65dc8e713845ddd012c763..7ae5ee001e37d9eb1449c1b20ab0d58454a3dc05 100644 --- a/sumpy/assignment_collection.py +++ b/sumpy/assignment_collection.py @@ -149,6 +149,7 @@ class SymbolicAssignmentCollection: root_name = name self.assignments[name] = sym.sympify(expr) + return name def assign_unique(self, name_base, expr): """Assign *expr* to a new variable whose name is based on *name_base*. diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 52dd453eaf28192e498e0bed56217a238c3c6ecc..eb5963031ec970deb5cba3db0a8b90e13fec9f86 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -217,8 +217,8 @@ class E2EFromCSR(E2EBase): lang_version=MOST_RECENT_LANGUAGE_VERSION ) - for expn in [self.src_expansion, self.tgt_expansion]: - loopy_knl = expn.prepare_loopy_kernel(loopy_knl) + for knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: + loopy_knl = knl.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) @@ -337,8 +337,8 @@ class E2EFromChildren(E2EBase): fixed_parameters=dict(dim=self.dim, nchildren=2**self.dim), lang_version=MOST_RECENT_LANGUAGE_VERSION) - for expn in [self.src_expansion, self.tgt_expansion]: - loopy_knl = expn.prepare_loopy_kernel(loopy_knl) + for knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: + loopy_knl = knl.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") @@ -441,8 +441,8 @@ class E2EFromParent(E2EBase): fixed_parameters=dict(dim=self.dim, nchildren=2**self.dim), lang_version=MOST_RECENT_LANGUAGE_VERSION) - for expn in [self.src_expansion, self.tgt_expansion]: - loopy_knl = expn.prepare_loopy_kernel(loopy_knl) + for knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: + loopy_knl = knl.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") diff --git a/sumpy/e2p.py b/sumpy/e2p.py index 2512aca8574cac4c1815a1202c729aab1e25786f..3ece084ed8337b111ad98f7fcd03767c710e543f 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -62,8 +62,9 @@ class E2PBase(KernelCacheWrapper): expansion = expansion.with_kernel( sdr(expansion.kernel)) + kernels = [sdr(knl) for knl in kernels] for knl in kernels: - assert sdr(tdr(knl)) == expansion.kernel + assert tdr(knl) == expansion.kernel self.ctx = ctx self.expansion = expansion @@ -85,11 +86,10 @@ class E2PBase(KernelCacheWrapper): coeff_exprs = [sym.Symbol("coeff%d" % i) for i in range(len(self.expansion.get_coefficient_identifiers()))] - value = self.expansion.evaluate(coeff_exprs, bvec, rscale, sac=sac) result_names = [ sac.assign_unique("result_%d_p" % i, - knl.postprocess_at_target(value, bvec)) + self.expansion.evaluate(knl, coeff_exprs, bvec, rscale, sac=sac)) for i, knl in enumerate(self.kernels) ] @@ -187,7 +187,8 @@ class E2PFromSingleBox(E2PBase): lang_version=MOST_RECENT_LANGUAGE_VERSION) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") - loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) + for knl in self.kernels: + loopy_knl = knl.prepare_loopy_kernel(loopy_knl) return loopy_knl @@ -296,7 +297,8 @@ class E2PFromCSR(E2PBase): loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.prioritize_loops(loopy_knl, "itgt_box,itgt,isrc_box") - loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) + for knl in self.kernels: + loopy_knl = knl.prepare_loopy_kernel(loopy_knl) return loopy_knl diff --git a/sumpy/expansion/__init__.py b/sumpy/expansion/__init__.py index 5e666ceeee0ecb9b9563045393a3b48e2b0d55c6..0885186c267d3374e0d60640c4617fa338068f80 100644 --- a/sumpy/expansion/__init__.py +++ b/sumpy/expansion/__init__.py @@ -54,6 +54,7 @@ class ExpansionBase: .. automethod:: __eq__ .. automethod:: __ne__ """ + init_arg_names = ("kernel", "order", "use_rscale") def __init__(self, kernel, order, use_rscale=None): # Don't be tempted to remove target derivatives here. @@ -81,9 +82,6 @@ class ExpansionBase: def is_complex_valued(self): return self.kernel.is_complex_valued - def prepare_loopy_kernel(self, loopy_knl): - return self.kernel.prepare_loopy_kernel(loopy_knl) - def get_code_transformer(self): return self.kernel.get_code_transformer() @@ -110,7 +108,7 @@ class ExpansionBase: """ raise NotImplementedError - def coefficients_from_source(self, avec, bvec, rscale, sac=None): + def coefficients_from_source(self, kernel, avec, bvec, rscale, sac=None): """Form an expansion from a source point. :arg avec: vector from source to center. @@ -124,7 +122,7 @@ class ExpansionBase: """ raise NotImplementedError - def evaluate(self, coeffs, bvec, rscale, sac=None): + def evaluate(self, kernel, coeffs, bvec, rscale, sac=None): """ :return: a :mod:`sympy` expression corresponding to the evaluated expansion with the coefficients @@ -153,6 +151,21 @@ class ExpansionBase: def __ne__(self, other): return not self.__eq__(other) + def copy(self, **kwargs): + new_kwargs = { + name: getattr(self, name) + for name in self.init_arg_names} + + for name in self.init_arg_names: + new_kwargs[name] = kwargs.pop(name, getattr(self, name)) + + if kwargs: + raise TypeError("unexpected keyword arguments '%s'" + % ", ".join(kwargs)) + + return type(self)(**new_kwargs) + + # }}} diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index 17ecda061013236bbe0b5f30b56845ee3a4d11cb..40a667b995f7375386c2f1c63f41c1ac6f081447 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -77,11 +77,10 @@ class LineTaylorLocalExpansion(LocalExpansionBase): deriv_taker = MiDerivativeTaker(line_kernel, (tau,)) return [my_syntactic_subs( - kernel.postprocess_at_target( - kernel.postprocess_at_source( - deriv_taker.diff(i), - avec), bvec), - {tau: 0}) + kernel.postprocess_at_source( + deriv_taker.diff(i), + avec), + {tau: 0}) for i in self.get_coefficient_identifiers()] else: # Workaround for sympy. The automatic distribution after @@ -91,14 +90,12 @@ class LineTaylorLocalExpansion(LocalExpansionBase): # # See also https://gitlab.tiker.net/inducer/pytential/merge_requests/12 - return [kernel.postprocess_at_target( - kernel.postprocess_at_source( - line_kernel.diff("tau", i), avec), - bvec) + return [kernel.postprocess_at_source( + line_kernel.diff("tau", i), avec) .subs("tau", 0) for i in self.get_coefficient_identifiers()] - def evaluate(self, coeffs, bvec, rscale, sac=None): + def evaluate(self, kernel, coeffs, bvec, rscale, sac=None): # no point in heeding rscale here--just ignore it from pytools import factorial return sym.Add(*( @@ -124,21 +121,23 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): taker.diff(mi) * rscale ** sum(mi) for mi in self.get_coefficient_identifiers()] - def evaluate(self, coeffs, bvec, rscale, sac=None): + def evaluate(self, kernel, coeffs, bvec, rscale, sac=None): evaluated_coeffs = ( self.expansion_terms_wrangler.get_full_kernel_derivatives_from_stored( coeffs, rscale, sac=sac)) - bvec = [b*rscale**-1 for b in bvec] + bvec_scaled = [b*rscale**-1 for b in bvec] from sumpy.tools import mi_power, mi_factorial - return sum( + result = sum( coeff - * mi_power(bvec, mi, evaluate=False) + * mi_power(bvec_scaled, mi, evaluate=False) / mi_factorial(mi) for coeff, mi in zip( evaluated_coeffs, self.get_full_coefficient_identifiers())) + return kernel.postprocess_at_target(result, bvec) + def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, dvec, tgt_rscale, sac=None, _fast_version=True): logger.info("building translation operator: %s(%d) -> %s(%d): start" @@ -163,7 +162,8 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # This code speeds up derivative taking by caching all kernel # derivatives. - taker = src_expansion.get_kernel_derivative_taker(dvec) + taker = src_expansion.get_kernel_derivative_taker(src_expansion.kernel, + dvec) from sumpy.tools import add_mi @@ -203,7 +203,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): src_coeff_exprs, src_expansion.get_coefficient_identifiers()): embedded_coeffs[ - tgtplusderiv_ident_to_index[add_mi(lexp_mi, term)]] \ + tgtplusderiv_ident_to_index[add_mi(lexp_mi, term)]] \ = coeff # Compress the embedded coefficient set @@ -220,6 +220,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): sum(tgtplusderiv_coeff_id)-sum(lexp_mi) kernel_deriv = ( src_expansion.get_scaled_multipole( + src_expansion.kernel, taker.diff(tgtplusderiv_coeff_id), dvec, src_rscale, nderivatives=sum(tgtplusderiv_coeff_id), @@ -348,8 +349,8 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): # This moves the two cancelling "rscales" closer to each other at # the end in the hope of helping rscale magnitude. dvec_scaled = [d*src_rscale for d in dvec] - expr = src_expansion.evaluate(src_coeff_exprs, dvec_scaled, - rscale=src_rscale, sac=sac) + expr = src_expansion.evaluate(src_expansion.kernel, src_coeff_exprs, + dvec_scaled, rscale=src_rscale, sac=sac) replace_dict = {d: d/src_rscale for d in dvec} taker = MiDerivativeTaker(expr, dvec) rscale_ratio = sym.UnevaluatedExpr(tgt_rscale/src_rscale) @@ -429,7 +430,7 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): * sym.exp(sym.I * c * source_angle_rel_center), avec) for c in self.get_coefficient_identifiers()] - def evaluate(self, coeffs, bvec, rscale, sac=None): + def evaluate(self, kernel, coeffs, bvec, rscale, sac=None): if not self.use_rscale: rscale = 1 @@ -441,7 +442,7 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): arg_scale = self.get_bessel_arg_scaling() return sum(coeffs[self.get_storage_index(c)] - * self.kernel.postprocess_at_target( + * kernel.postprocess_at_target( bessel_j(c, arg_scale * bvec_len) / rscale ** abs(c) * sym.exp(sym.I * c * -target_angle_rel_center), bvec) diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py index e31be2103f417b58a18bbbaa79c52f3c887fad43..2a6955e7c56589d6747e6ec855c08ff62adaa12f 100644 --- a/sumpy/expansion/multipole.py +++ b/sumpy/expansion/multipole.py @@ -110,14 +110,14 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): self.expansion_terms_wrangler.get_stored_mpole_coefficients_from_full( result, rscale, sac=sac)) - def get_scaled_multipole(self, expr, bvec, rscale, nderivatives, + def get_scaled_multipole(self, kernel, expr, bvec, rscale, nderivatives, nderivatives_for_scaling=None): if nderivatives_for_scaling is None: nderivatives_for_scaling = nderivatives if self.kernel.has_efficient_scale_adjustment: return ( - self.kernel.adjust_for_kernel_scaling( + kernel.adjust_for_kernel_scaling( vector_xreplace( expr, bvec, bvec * rscale**-1), @@ -126,22 +126,21 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase): else: return (rscale**nderivatives_for_scaling * expr) - def evaluate(self, coeffs, bvec, rscale, sac=None): + def evaluate(self, kernel, coeffs, bvec, rscale, sac=None): if not self.use_rscale: rscale = 1 - taker = self.get_kernel_derivative_taker(bvec) + taker = self.get_kernel_derivative_taker(kernel, bvec) - result = sym.Add(*tuple( - coeff - * self.get_scaled_multipole(taker.diff(mi), bvec, rscale, sum(mi)) + result = sym.Add(*tuple(coeff * self.get_scaled_multipole(kernel, + taker.diff(mi), bvec, rscale, sum(mi)) for coeff, mi in zip(coeffs, self.get_coefficient_identifiers()))) - return result + return kernel.postprocess_at_target(result, bvec) - def get_kernel_derivative_taker(self, bvec): + def get_kernel_derivative_taker(self, kernel, bvec): from sumpy.tools import MiDerivativeTaker - return MiDerivativeTaker(self.kernel.get_expression(bvec), bvec) + return MiDerivativeTaker(kernel.get_expression(bvec), bvec) def translate_from(self, src_expansion, src_coeff_exprs, src_rscale, dvec, tgt_rscale, sac=None, _fast_version=True): @@ -451,7 +450,7 @@ class _HankelBased2DMultipoleExpansion(MultipoleExpansionBase): avec) for c in self.get_coefficient_identifiers()] - def evaluate(self, coeffs, bvec, rscale, sac=None): + def evaluate(self, kernel, coeffs, bvec, rscale, sac=None): if not self.use_rscale: rscale = 1 @@ -463,7 +462,7 @@ class _HankelBased2DMultipoleExpansion(MultipoleExpansionBase): arg_scale = self.get_bessel_arg_scaling() return sum(coeffs[self.get_storage_index(c)] - * self.kernel.postprocess_at_target( + * kernel.postprocess_at_target( hankel_1(c, arg_scale * bvec_len) * rscale ** abs(c) * sym.exp(sym.I * c * target_angle_rel_center), bvec) diff --git a/sumpy/fmm.py b/sumpy/fmm.py index feebbf3d91d577052b6d95b44e6f7ad72c21efe8..5b61a0c0559b0c794537174761eb1fbda9f2c12e 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -60,27 +60,32 @@ class SumpyExpansionWranglerCodeContainer: def __init__(self, cl_context, multipole_expansion_factory, local_expansion_factory, - out_kernels, exclude_self=False, use_rscale=None): + target_kernels, exclude_self=False, use_rscale=None, + strength_usage=None, source_kernels=None): """ :arg multipole_expansion_factory: a callable of a single argument (order) that returns a multipole expansion. :arg local_expansion_factory: a callable of a single argument (order) that returns a local expansion. - :arg out_kernels: a list of output kernels + :arg target_kernels: a list of output kernels :arg exclude_self: whether the self contribution should be excluded + :arg strength_usage: passed unchanged to p2l, p2m and p2p. + :arg source_kernels: passed unchanged to p2l, p2m and p2p. """ self.multipole_expansion_factory = multipole_expansion_factory self.local_expansion_factory = local_expansion_factory - self.out_kernels = out_kernels + self.source_kernels = source_kernels + self.target_kernels = target_kernels self.exclude_self = exclude_self self.use_rscale = use_rscale + self.strength_usage = strength_usage self.cl_context = cl_context @memoize_method def get_base_kernel(self): from pytools import single_valued - return single_valued(k.get_base_kernel() for k in self.out_kernels) + return single_valued(k.get_base_kernel() for k in self.target_kernels) @memoize_method def multipole_expansion(self, order): @@ -93,12 +98,16 @@ class SumpyExpansionWranglerCodeContainer: @memoize_method def p2m(self, tgt_order): return P2EFromSingleBox(self.cl_context, - self.multipole_expansion(tgt_order)) + kernels=self.source_kernels, + expansion=self.multipole_expansion(tgt_order), + strength_usage=self.strength_usage) @memoize_method def p2l(self, tgt_order): return P2EFromCSR(self.cl_context, - self.local_expansion(tgt_order)) + kernels=self.source_kernels, + expansion=self.local_expansion(tgt_order), + strength_usage=self.strength_usage) @memoize_method def m2m(self, src_order, tgt_order): @@ -122,18 +131,20 @@ class SumpyExpansionWranglerCodeContainer: def m2p(self, src_order): return E2PFromCSR(self.cl_context, self.multipole_expansion(src_order), - self.out_kernels) + self.target_kernels) @memoize_method def l2p(self, src_order): return E2PFromSingleBox(self.cl_context, self.local_expansion(src_order), - self.out_kernels) + self.target_kernels) @memoize_method def p2p(self): - return P2PFromCSR(self.cl_context, self.out_kernels, - exclude_self=self.exclude_self) + return P2PFromCSR(self.cl_context, target_kernels=self.target_kernels, + source_kernels=self.source_kernels, + exclude_self=self.exclude_self, + strength_usage=self.strength_usage) def get_wrangler(self, queue, tree, dtype, fmm_level_to_order, source_extra_kwargs={}, @@ -310,7 +321,7 @@ class SumpyExpansionWrangler: self.queue, self.tree.ntargets, dtype=self.dtype) - for k in self.code.out_kernels]) + for k in self.code.target_kernels]) def reorder_sources(self, source_array): return source_array.with_queue(self.queue)[self.tree.user_source_ids] diff --git a/sumpy/kernel.py b/sumpy/kernel.py index 16e740a13c54868c4bdb8fa841c87ed7c64a3138..1820ff1c861a6037815bf65ff81687a6e11cf57b 100644 --- a/sumpy/kernel.py +++ b/sumpy/kernel.py @@ -116,6 +116,7 @@ class Kernel: .. attribute:: dim .. automethod:: get_base_kernel + .. automethod:: replace_base_kernel .. automethod:: prepare_loopy_kernel .. automethod:: get_code_transformer .. automethod:: get_expression @@ -172,6 +173,12 @@ class Kernel: """ return self + def replace_base_kernel(self, new_base_kernel): + """Return the base kernel being wrapped by this one, or else + *new_base_kernel*. + """ + return new_base_kernel + def prepare_loopy_kernel(self, loopy_knl): """Apply some changes (such as registering function manglers) to the kernel. Return the new kernel. @@ -804,6 +811,10 @@ class KernelWrapper(Kernel): def get_source_args(self): return self.inner_kernel.get_source_args() + def replace_base_kernel(self, new_base_kernel): + raise NotImplementedError("replace_base_kernel is not implemented " + "for this wrapper.") + # }}} @@ -833,6 +844,10 @@ class AxisTargetDerivative(DerivativeBase): expr = self.inner_kernel.postprocess_at_target(expr, bvec) return expr.diff(bvec[self.axis]) + def replace_base_kernel(self, new_base_kernel): + return type(self)(self.axis, + self.inner_kernel.replace_base_kernel(new_base_kernel)) + def replace_inner_kernel(self, new_inner_kernel): return type(self)(self.axis, new_inner_kernel) @@ -878,6 +893,10 @@ class DirectionalDerivative(DerivativeBase): key_builder.rec(key_hash, self.inner_kernel) key_builder.rec(key_hash, self.dir_vec_name) + def replace_base_kernel(self, new_base_kernel): + return type(self)(self.inner_kernel.replace_base_kernel(new_base_kernel), + dir_vec_name=self.dir_vec_name) + def __str__(self): return r"{} . \/_{} {}".format( self.dir_vec_name, self.directional_kind[0], self.inner_kernel) diff --git a/sumpy/p2e.py b/sumpy/p2e.py index 93692e6ec47abd85a24f35a591bec0b95ab287d5..a4a9ceb656b9f232dcbd1f0dbf5c988b4b3c6ce8 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -57,16 +57,25 @@ class P2EBase(KernelComputation, KernelCacheWrapper): number of strength arrays that need to be passed. Default: all kernels use the same strength. """ + from sumpy.kernel import TargetDerivativeRemover, SourceDerivativeRemover + tdr = TargetDerivativeRemover() + sdr = SourceDerivativeRemover() + if kernels is None: - kernels = [expansion.kernel] + kernels = [tdr(expansion.kernel)] + else: + kernels = kernels + + expansion = expansion.with_kernel(sdr(tdr(expansion.kernel))) - KernelComputation.__init__(self, ctx=ctx, kernels=kernels, - strength_usage=strength_usage, value_dtypes=None, - name=name, device=device) + for knl in kernels: + assert tdr(knl) == knl + assert sdr(knl) == expansion.kernel - from sumpy.kernel import TargetDerivativeRemover - expansion = expansion.with_kernel( - TargetDerivativeRemover()(expansion.kernel)) + KernelComputation.__init__(self, ctx=ctx, target_kernels=[], + source_kernels=kernels, + strength_usage=strength_usage, value_dtypes=None, + name=name, device=device) self.expansion = expansion self.dim = expansion.dim @@ -82,7 +91,7 @@ class P2EBase(KernelComputation, KernelCacheWrapper): sac = SymbolicAssignmentCollection() coeff_names = [] - for knl_idx, kernel in enumerate(self.kernels): + for knl_idx, kernel in enumerate(self.source_kernels): for i, coeff_i in enumerate( self.expansion.coefficients_from_source(kernel, avec, None, rscale, sac) @@ -93,7 +102,7 @@ class P2EBase(KernelComputation, KernelCacheWrapper): sac.run_global_cse() code_transformers = [self.expansion.get_code_transformer()] \ - + [kernel.get_code_transformer() for kernel in self.kernels] + + [kernel.get_code_transformer() for kernel in self.source_kernels] from sumpy.codegen import to_loopy_insns return to_loopy_insns( @@ -105,14 +114,14 @@ class P2EBase(KernelComputation, KernelCacheWrapper): ) def get_cache_key(self): - return (type(self).__name__, self.name, self.expansion, tuple(self.kernels), - tuple(self.strength_usage)) + return (type(self).__name__, self.name, self.expansion, + tuple(self.source_kernels), tuple(self.strength_usage)) def get_result_expr(self, icoeff): isrc = pymbolic.var("isrc") return sum(pymbolic.var(f"coeff{icoeff}_{i}") * pymbolic.var("strengths")[self.strength_usage[i], isrc] - for i in range(len(self.kernels))) + for i in range(len(self.source_kernels))) # }}} @@ -153,8 +162,8 @@ class P2EFromSingleBox(P2EBase): [ lp.GlobalArg("sources", None, shape=(self.dim, "nsources"), dim_tags="sep,c"), - lp.GlobalArg("strengths", None, shape="strength_count, nsources", - dim_tags="sep,C"), + lp.GlobalArg("strengths", None, + shape="strength_count, nsources", dim_tags="sep,C"), lp.GlobalArg("box_source_starts,box_source_counts_nonchild", None, shape=None), lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), @@ -164,7 +173,8 @@ class P2EFromSingleBox(P2EBase): lp.ValueArg("nboxes,aligned_nboxes,tgt_base_ibox", np.int32), lp.ValueArg("nsources", np.int32), "..." - ] + gather_loopy_source_arguments(self.kernels + (self.expansion,)), + ] + gather_loopy_source_arguments(self.source_kernels + + (self.expansion,)), name=self.name, assumptions="nsrc_boxes>=1", silenced_warnings="write_race(write_expn*)", @@ -173,7 +183,8 @@ class P2EFromSingleBox(P2EBase): strength_count=self.strength_count), lang_version=MOST_RECENT_LANGUAGE_VERSION) - loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) + for knl in self.source_kernels: + loopy_knl = knl.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") return loopy_knl @@ -234,7 +245,8 @@ class P2EFromCSR(P2EBase): np.int32), lp.ValueArg("nsources", np.int32), "..." - ] + gather_loopy_source_arguments(self.kernels + (self.expansion,))) + ] + gather_loopy_source_arguments(self.source_kernels + + (self.expansion,))) loopy_knl = lp.make_kernel( [ @@ -280,7 +292,8 @@ class P2EFromCSR(P2EBase): strength_count=self.strength_count), lang_version=MOST_RECENT_LANGUAGE_VERSION) - loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) + for knl in self.source_kernels: + loopy_knl = knl.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") return loopy_knl diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 746a9ef7f9b6bf82881552e6380238b7b1c85008..b71bac83ddbae8fcaf23b1721213de143183951a 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -26,7 +26,6 @@ THE SOFTWARE. import numpy as np import loopy as lp from loopy.version import MOST_RECENT_LANGUAGE_VERSION -from pymbolic import var from sumpy.tools import ( KernelComputation, KernelCacheWrapper, is_obj_array_like) @@ -52,76 +51,118 @@ Particle-to-particle # {{{ p2p base class class P2PBase(KernelComputation, KernelCacheWrapper): - def __init__(self, ctx, kernels, exclude_self, strength_usage=None, - value_dtypes=None, name=None, device=None): + def __init__(self, ctx, target_kernels, exclude_self, strength_usage=None, + value_dtypes=None, name=None, device=None, source_kernels=None): """ - :arg kernels: list of :class:`sumpy.kernel.Kernel` instances + :arg target_kernels: list of :class:`sumpy.kernel.Kernel` instances + with only target derivatives. + :arg source_kernels: list of :class:`sumpy.kernel.Kernel` instances + with only source derivatives. :arg strength_usage: A list of integers indicating which expression uses which source strength indicator. This implicitly specifies the number of strength arrays that need to be passed. Default: all kernels use the same strength. """ - KernelComputation.__init__(self, ctx, kernels, strength_usage, - value_dtypes, - name, device) + from pytools import single_valued + from sumpy.kernel import (TargetDerivativeRemover, + SourceDerivativeRemover) + tdr = TargetDerivativeRemover() + sdr = SourceDerivativeRemover() + + if source_kernels is None: + source_kernels = [single_valued(tdr(knl) for knl in target_kernels)] + target_kernels = [sdr(knl) for knl in target_kernels] + else: + for knl in source_kernels: + assert(tdr(knl) == knl) + for knl in target_kernels: + assert(sdr(knl) == knl) + + base_source_kernel = single_valued(sdr(knl) for knl in source_kernels) + base_target_kernel = single_valued(tdr(knl) for knl in target_kernels) + assert base_source_kernel == base_target_kernel + + KernelComputation.__init__(self, ctx=ctx, target_kernels=target_kernels, + source_kernels=source_kernels, strength_usage=strength_usage, + value_dtypes=value_dtypes, name=name, device=device) self.exclude_self = exclude_self - from pytools import single_valued - self.dim = single_valued(knl.dim for knl in self.kernels) + self.dim = single_valued(knl.dim for knl in + list(self.target_kernels) + list(self.source_kernels)) def get_cache_key(self): - return (type(self).__name__, tuple(self.kernels), self.exclude_self, - tuple(self.strength_usage), tuple(self.value_dtypes)) + return (type(self).__name__, tuple(self.target_kernels), self.exclude_self, + tuple(self.strength_usage), tuple(self.value_dtypes), + tuple(self.source_kernels), + self.device.hashable_model_and_version_identifier) def get_loopy_insns_and_result_names(self): from sumpy.symbolic import make_sym_vector + from pymbolic import var + dvec = make_sym_vector("d", self.dim) from sumpy.assignment_collection import SymbolicAssignmentCollection sac = SymbolicAssignmentCollection() + isrc_sym = var("isrc") + + exprs = [] + for i, out_knl in enumerate(self.target_kernels): + expr_sum = 0 + for j, in_knl in enumerate(self.source_kernels): + expr = in_knl.postprocess_at_source( + in_knl.get_expression(dvec), + dvec) + expr *= self.get_strength_or_not(isrc_sym, j) + expr_sum += expr + expr_sum = out_knl.postprocess_at_target(expr_sum, dvec) + exprs.append(expr_sum) + + if self.exclude_self: + result_name_prefix = "pair_result_tmp" + else: + result_name_prefix = "pair_result" + result_names = [ - sac.assign_unique("knl%d" % i, - knl.postprocess_at_target( - knl.postprocess_at_source( - knl.get_expression(dvec), - dvec), - dvec) - ) - for i, knl in enumerate(self.kernels)] + sac.add_assignment(f"{result_name_prefix}_{i}", expr) + for i, expr in enumerate(exprs) + ] sac.run_global_cse() from sumpy.codegen import to_loopy_insns loopy_insns = to_loopy_insns(sac.assignments.items(), vector_names={"d"}, - pymbolic_expr_maps=[ - knl.get_code_transformer() for knl in self.kernels], + pymbolic_expr_maps=( + [knl.get_code_transformer() for knl in self.source_kernels] + + [knl.get_code_transformer() for knl in self.target_kernels]), retain_names=result_names, complex_dtype=np.complex128 # FIXME ) - return loopy_insns, result_names - - def get_strength_or_not(self, isrc, kernel_idx): - return var("strength").index((self.strength_usage[kernel_idx], isrc)) - - def get_kernel_exprs(self, result_names): from pymbolic import var - - isrc_sym = var("isrc") - exprs = [var(name) * self.get_strength_or_not(isrc_sym, i) - for i, name in enumerate(result_names)] - if self.exclude_self: + assignments = [lp.Assignment(id=None, + assignee=f"pair_result_{i}", expression=var(name), + temp_var_type=lp.Optional(None)) + for i, name in enumerate(result_names)] + from pymbolic.primitives import If, Variable - exprs = [If(Variable("is_self"), 0, expr) for expr in exprs] + assignments = [assign.copy(expression=If(Variable("is_self"), 0, + assign.expression)) for assign in assignments] + else: + assignments = [] - return [lp.Assignment(id=None, - assignee="pair_result_%d" % i, expression=expr, - temp_var_type=lp.Optional(None)) - for i, expr in enumerate(exprs)] + return assignments + loopy_insns, result_names + + def get_kernel_scaling_or_not(self, kernel_idx): + return f"knl_{kernel_idx}_scaling" + + def get_strength_or_not(self, isrc, kernel_idx): + from sumpy.symbolic import Symbol + return Symbol(f"strength_{self.strength_usage[kernel_idx]}") def get_default_src_tgt_arguments(self): from sumpy.tools import gather_loopy_source_arguments @@ -134,7 +175,7 @@ class P2PBase(KernelComputation, KernelCacheWrapper): lp.ValueArg("ntargets", None)] + ([lp.GlobalArg("target_to_source", None, shape=("ntargets",))] if self.exclude_self else []) - + gather_loopy_source_arguments(self.kernels)) + + gather_loopy_source_arguments(self.source_kernels)) def get_kernel(self): raise NotImplementedError @@ -164,7 +205,6 @@ class P2P(P2PBase): def get_kernel(self): loopy_insns, result_names = self.get_loopy_insns_and_result_names() - kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( self.get_default_src_tgt_arguments() + [ @@ -185,12 +225,13 @@ class P2P(P2PBase): + ["<> d[idim] = targets[idim, itgt] - sources[idim, isrc]"] + ["<> is_self = (isrc == target_to_source[itgt])" if self.exclude_self else ""] - + loopy_insns + kernel_exprs - + [""" - result[{i}, itgt] = knl_{i}_scaling * \ - simul_reduce(sum, isrc, pair_result_{i}) {{inames=itgt}} - """.format(i=iknl) - for iknl in range(len(self.kernels))] + + [f"<> strength_{i} = strength[{i}, isrc]" for + i in set(self.strength_usage)] + + loopy_insns + + [f""" + result[{iknl}, itgt] = knl_{iknl}_scaling * \ + simul_reduce(sum, isrc, pair_result_{iknl}) {{inames=itgt}} + """ for iknl in range(len(self.target_kernels))] + ["end"], arguments, assumptions="nsources>=1 and ntargets>=1", @@ -198,12 +239,12 @@ class P2P(P2PBase): fixed_parameters=dict( dim=self.dim, nstrengths=self.strength_count, - nresults=len(self.kernels)), + nresults=len(self.target_kernels)), lang_version=MOST_RECENT_LANGUAGE_VERSION) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") - for knl in self.kernels: + for knl in self.target_kernels + self.source_kernels: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) return loopy_knl @@ -231,7 +272,6 @@ class P2PMatrixGenerator(P2PBase): def get_kernel(self): loopy_insns, result_names = self.get_loopy_insns_and_result_names() - kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( self.get_default_src_tgt_arguments() + [lp.GlobalArg("result_%d" % i, dtype, @@ -249,12 +289,11 @@ class P2PMatrixGenerator(P2PBase): + ["<> d[idim] = targets[idim, itgt] - sources[idim, isrc]"] + ["<> is_self = (isrc == target_to_source[itgt])" if self.exclude_self else ""] - + loopy_insns + kernel_exprs - + [""" - result_{i}[itgt, isrc] = \ - knl_{i}_scaling * pair_result_{i} {{inames=isrc:itgt}} - """.format(i=iknl) - for iknl in range(len(self.kernels))] + + loopy_insns + + [f""" + result_{iknl}[itgt, isrc] = knl_{iknl}_scaling \ + * pair_result_{iknl} {{inames=isrc:itgt}} + """ for iknl in range(len(self.target_kernels))] + ["end"], arguments, assumptions="nsources>=1 and ntargets>=1", @@ -264,7 +303,7 @@ class P2PMatrixGenerator(P2PBase): loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") - for knl in self.kernels: + for knl in self.target_kernels + self.source_kernels: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) return loopy_knl @@ -294,7 +333,6 @@ class P2PMatrixBlockGenerator(P2PBase): def get_kernel(self): loopy_insns, result_names = self.get_loopy_insns_and_result_names() - kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( self.get_default_src_tgt_arguments() + [ @@ -321,13 +359,12 @@ class P2PMatrixBlockGenerator(P2PBase): + [""" <> is_self = (isrc == target_to_source[itgt]) """ if self.exclude_self else ""] - + loopy_insns + kernel_exprs - + [""" - result_{i}[imat] = \ - knl_{i}_scaling * pair_result_{i} \ + + loopy_insns + + [f""" + result_{iknl}[imat] = \ + knl_{iknl}_scaling * pair_result_{iknl} \ {{id_prefix=write_p2p}} - """.format(i=iknl) - for iknl in range(len(self.kernels))] + """ for iknl in range(len(self.target_kernels))] + ["end"], arguments, assumptions="nresult>=1", @@ -340,7 +377,7 @@ class P2PMatrixBlockGenerator(P2PBase): loopy_knl = lp.add_dtypes(loopy_knl, dict(nsources=np.int32, ntargets=np.int32)) - for knl in self.kernels: + for knl in self.target_kernels + self.source_kernels: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) return loopy_knl @@ -399,7 +436,6 @@ class P2PFromCSR(P2PBase): def get_kernel(self): loopy_insns, result_names = self.get_loopy_insns_and_result_names() - kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( self.get_default_src_tgt_arguments() + [ @@ -418,7 +454,7 @@ class P2PFromCSR(P2PBase): lp.GlobalArg("strength", None, shape="nstrengths, nsources", dim_tags="sep,C"), lp.GlobalArg("result", None, - shape="nkernels, ntargets", dim_tags="sep,C"), + shape="noutputs, ntargets", dim_tags="sep,C"), "..." ]) @@ -452,14 +488,16 @@ class P2PFromCSR(P2PBase): """] + [""" <> is_self = (isrc == target_to_source[itgt]) """ if self.exclude_self else ""] - + loopy_insns + kernel_exprs + + [f"<> strength_{i} = strength[{i}, isrc]" for + i in set(self.strength_usage)] + + loopy_insns + [" end"] - + [""" - result[{i}, itgt] = result[{i}, itgt] + \ - knl_{i}_scaling * simul_reduce(sum, isrc, pair_result_{i}) \ + + [f""" + result[{iknl}, itgt] = result[{iknl}, itgt] + \ + knl_{iknl}_scaling * \ + simul_reduce(sum, isrc, pair_result_{iknl}) \ {{id_prefix=write_csr}} - """.format(i=iknl) - for iknl in range(len(self.kernels))] + """ for iknl in range(len(self.target_kernels))] + [""" end end @@ -472,7 +510,7 @@ class P2PFromCSR(P2PBase): fixed_parameters=dict( dim=self.dim, nstrengths=self.strength_count, - nkernels=len(self.kernels)), + noutputs=len(self.target_kernels)), lang_version=MOST_RECENT_LANGUAGE_VERSION) loopy_knl = lp.add_dtypes(loopy_knl, @@ -482,7 +520,7 @@ class P2PFromCSR(P2PBase): loopy_knl = lp.tag_array_axes(loopy_knl, "targets", "sep,C") loopy_knl = lp.tag_array_axes(loopy_knl, "sources", "sep,C") - for knl in self.kernels: + for knl in self.target_kernels + self.source_kernels: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) return loopy_knl diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 9e556ccc7217669c1cb375a452985d846cc78a0f..0c0438c7b6a3bc083cfedc27c28116b1e8e3ad8e 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -62,45 +62,55 @@ def stringify_expn_index(i): return str(i) -def expand(expansion_nr, sac, expansion, avec, bvec): - rscale = sym.Symbol("rscale") - - coefficients = expansion.coefficients_from_source(expansion.kernel, - avec, bvec, rscale) - - assigned_coeffs = [ - sym.Symbol( - sac.assign_unique("expn%dcoeff%s" % ( - expansion_nr, stringify_expn_index(i)), - coefficients[expansion.get_storage_index(i)])) - for i in expansion.get_coefficient_identifiers()] - - return sac.assign_unique("expn%d_result" % expansion_nr, - expansion.evaluate(assigned_coeffs, bvec, rscale)) - - # {{{ layer potential computation # {{{ base class class LayerPotentialBase(KernelComputation, KernelCacheWrapper): - def __init__(self, ctx, expansions, strength_usage=None, - value_dtypes=None, - name=None, device=None): - KernelComputation.__init__(self, ctx, expansions, strength_usage, - value_dtypes, - name, device) + def __init__(self, ctx, expansion, strength_usage=None, + value_dtypes=None, name=None, device=None, + source_kernels=None, target_kernels=None): from pytools import single_valued - self.dim = single_valued(knl.dim for knl in self.expansions) + + KernelComputation.__init__(self, ctx=ctx, target_kernels=target_kernels, + strength_usage=strength_usage, source_kernels=source_kernels, + value_dtypes=value_dtypes, name=name, device=device) + + self.dim = single_valued(knl.dim for knl in self.target_kernels) + self.expansion = expansion def get_cache_key(self): - return (type(self).__name__, tuple(self.kernels), - tuple(self.strength_usage), tuple(self.value_dtypes)) + return (type(self).__name__, self.expansion, tuple(self.target_kernels), + tuple(self.source_kernels), tuple(self.strength_usage), + tuple(self.value_dtypes), + self.device.hashable_model_and_version_identifier) + + def _expand(self, sac, avec, bvec, rscale, isrc): + coefficients = np.zeros(len(self.expansion), dtype=object) + from sumpy.symbolic import PymbolicToSympyMapper + conv = PymbolicToSympyMapper() + for idx, knl in enumerate(self.source_kernels): + strength = conv(self.get_strength_or_not(isrc, idx)) + coefficients += np.array([coeff * strength for + i, coeff in enumerate( + self.expansion.coefficients_from_source(knl, + avec, bvec, rscale))]) + + return list(coefficients) + + def _evaluate(self, sac, avec, bvec, rscale, expansion_nr, coefficients): + expn = self.target_kernels[expansion_nr] + assigned_coeffs = [ + sym.Symbol( + sac.assign_unique("expn%dcoeff%s" % ( + expansion_nr, stringify_expn_index(i)), + expn.postprocess_at_target( + coefficients[self.expansion.get_storage_index(i)], bvec))) + for i in self.expansion.get_coefficient_identifiers()] - @property - def expansions(self): - return self.kernels + return sac.assign_unique("expn%d_result" % expansion_nr, + self.expansion.evaluate(expn, assigned_coeffs, bvec, rscale)) def get_loopy_insns_and_result_names(self): from sumpy.symbolic import make_sym_vector @@ -112,19 +122,25 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper): logger.info("compute expansion expressions: start") - result_names = [expand(i, sac, expn, avec, bvec) - for i, expn in enumerate(self.expansions)] + rscale = sym.Symbol("rscale") + isrc_sym = var("isrc") + + coefficients = self._expand(sac, avec, bvec, rscale, isrc_sym) + result_names = [self._evaluate(sac, avec, bvec, rscale, i, coefficients) + for i in range(len(self.target_kernels))] logger.info("compute expansion expressions: done") sac.run_global_cse() + pymbolic_expr_maps = [knl.get_code_transformer() for knl in ( + self.target_kernels + self.source_kernels)] + from sumpy.codegen import to_loopy_insns loopy_insns = to_loopy_insns( sac.assignments.items(), vector_names={"a", "b"}, - pymbolic_expr_maps=[ - expn.kernel.get_code_transformer() for expn in self.expansions], + pymbolic_expr_maps=pymbolic_expr_maps, retain_names=result_names, complex_dtype=np.complex128 # FIXME ) @@ -132,12 +148,10 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper): return loopy_insns, result_names def get_strength_or_not(self, isrc, kernel_idx): - return var("strength_%d" % self.strength_usage[kernel_idx]).index(isrc) + return var("strength_%d_isrc" % self.strength_usage[kernel_idx]) def get_kernel_exprs(self, result_names): - isrc_sym = var("isrc") - exprs = [var(name) * self.get_strength_or_not(isrc_sym, i) - for i, name in enumerate(result_names)] + exprs = [var(name) for i, name in enumerate(result_names)] return [lp.Assignment(id=None, assignee="pair_result_%d" % i, expression=expr, @@ -157,7 +171,7 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper): None, shape="ntargets"), lp.ValueArg("nsources", None), lp.ValueArg("ntargets", None)] - + gather_loopy_source_arguments(self.kernels)) + + gather_loopy_source_arguments(self.source_kernels)) def get_kernel(self): raise NotImplementedError @@ -213,7 +227,7 @@ class LayerPotential(LayerPotentialBase): for i in range(self.strength_count)] + [lp.GlobalArg("result_%d" % i, None, shape="ntargets", order="C") - for i in range(len(self.kernels))]) + for i in range(len(self.target_kernels))]) loopy_knl = lp.make_kernel([""" {[itgt, isrc, idim]: \ @@ -226,13 +240,15 @@ class LayerPotential(LayerPotentialBase): + ["<> a[idim] = center[idim, itgt] - src[idim, isrc] {dup=idim}"] + ["<> b[idim] = tgt[idim, itgt] - center[idim, itgt] {dup=idim}"] + ["<> rscale = expansion_radii[itgt]"] + + [f"<> strength_{i}_isrc = strength_{i}[isrc]" for i in + range(self.strength_count)] + loopy_insns + kernel_exprs + [""" result_{i}[itgt] = knl_{i}_scaling * \ simul_reduce(sum, isrc, pair_result_{i}) \ {{id_prefix=write_lpot,inames=itgt}} """.format(i=iknl) - for iknl in range(len(self.expansions))] + for iknl in range(len(self.target_kernels))] + ["end"], arguments, name=self.name, @@ -243,8 +259,8 @@ class LayerPotential(LayerPotentialBase): lang_version=MOST_RECENT_LANGUAGE_VERSION) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") - for expn in self.expansions: - loopy_knl = expn.prepare_loopy_kernel(loopy_knl) + for knl in self.target_kernels + self.source_kernels: + loopy_knl = knl.prepare_loopy_kernel(loopy_knl) return loopy_knl @@ -306,7 +322,7 @@ class LayerPotentialMatrixGenerator(LayerPotentialBase): knl_{i}_scaling * pair_result_{i} \ {{inames=isrc:itgt}} """.format(i=iknl) - for iknl in range(len(self.expansions))] + for iknl in range(len(self.target_kernels))] + ["end"], arguments, name=self.name, @@ -316,7 +332,7 @@ class LayerPotentialMatrixGenerator(LayerPotentialBase): lang_version=MOST_RECENT_LANGUAGE_VERSION) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") - for expn in self.expansions: + for expn in self.source_kernels + self.target_kernels: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) return loopy_knl @@ -381,7 +397,7 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): result_{i}[imat] = knl_{i}_scaling * pair_result_{i} \ {{id_prefix=write_lpot}} """.format(i=iknl) - for iknl in range(len(self.expansions))] + for iknl in range(len(self.target_kernels))] + ["end"], arguments, name=self.name, @@ -395,8 +411,8 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase): loopy_knl = lp.add_dtypes(loopy_knl, dict(nsources=np.int32, ntargets=np.int32)) - for expn in self.expansions: - loopy_knl = expn.prepare_loopy_kernel(loopy_knl) + for knl in self.source_kernels + self.target_kernels: + loopy_knl = knl.prepare_loopy_kernel(loopy_knl) return loopy_knl diff --git a/sumpy/tools.py b/sumpy/tools.py index 8a1e49aecccb156e6df6d18bb6558e90ea701ea2..b68c288f9e70f4f28b3bc377f9d631858c3a8084 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -213,7 +213,7 @@ def gather_loopy_source_arguments(kernel_likes): class KernelComputation: """Common input processing for kernel computations.""" - def __init__(self, ctx, kernels, strength_usage, + def __init__(self, ctx, target_kernels, source_kernels, strength_usage, value_dtypes, name, device=None): """ :arg kernels: list of :class:`sumpy.kernel.Kernel` instances @@ -229,14 +229,14 @@ class KernelComputation: if value_dtypes is None: value_dtypes = [] - for knl in kernels: + for knl in target_kernels: if knl.is_complex_valued: value_dtypes.append(np.complex128) else: value_dtypes.append(np.float64) if not isinstance(value_dtypes, (list, tuple)): - value_dtypes = [np.dtype(value_dtypes)] * len(kernels) + value_dtypes = [np.dtype(value_dtypes)] * len(target_kernels) value_dtypes = [np.dtype(vd) for vd in value_dtypes] # }}} @@ -244,9 +244,9 @@ class KernelComputation: # {{{ process strength_usage if strength_usage is None: - strength_usage = [0] * len(kernels) + strength_usage = list(range(len(source_kernels))) - if len(kernels) != len(strength_usage): + if len(source_kernels) != len(strength_usage): raise ValueError("exprs and strength_usage must have the same length") strength_count = max(strength_usage)+1 @@ -258,7 +258,8 @@ class KernelComputation: self.context = ctx self.device = device - self.kernels = tuple(kernels) + self.source_kernels = tuple(source_kernels) + self.target_kernels = tuple(target_kernels) self.value_dtypes = value_dtypes self.strength_usage = strength_usage self.strength_count = strength_count @@ -276,7 +277,7 @@ class KernelComputation: expression=sympy_conv(kernel.get_global_scaling_const()), temp_var_type=lp.Optional(dtype)) for i, (kernel, dtype) in enumerate( - zip(self.kernels, self.value_dtypes))] + zip(self.target_kernels, self.value_dtypes))] # }}} diff --git a/test/test_fmm.py b/test/test_fmm.py index 73fd236ce29b7ff8445af9440efa17f4e7ab89ec..637eb8b83e504c6da08321af17e08ac2d5709189 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -165,14 +165,14 @@ def test_sumpy_fmm(ctx_factory, knl, local_expn_class, mpole_expn_class): from functools import partial for order in order_values: - out_kernels = [knl] + target_kernels = [knl] from sumpy.fmm import SumpyExpansionWranglerCodeContainer wcc = SumpyExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), - out_kernels) + target_kernels) wrangler = wcc.get_wrangler(queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order, kernel_extra_kwargs=extra_kwargs) @@ -182,7 +182,7 @@ def test_sumpy_fmm(ctx_factory, knl, local_expn_class, mpole_expn_class): pot, = drive_fmm(trav, wrangler, (weights,)) from sumpy import P2P - p2p = P2P(ctx, out_kernels, exclude_self=False) + p2p = P2P(ctx, target_kernels, exclude_self=False) evt, (ref_pot,) = p2p(queue, targets, sources, (weights,), **extra_kwargs) @@ -198,6 +198,100 @@ def test_sumpy_fmm(ctx_factory, knl, local_expn_class, mpole_expn_class): pconv_verifier() +def test_unified_single_and_double(ctx_factory): + """ + Test that running one FMM for single layer + double layer gives the + same result as running one FMM for each and adding the results together + at the end + """ + logging.basicConfig(level=logging.INFO) + + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + knl = LaplaceKernel(2) + local_expn_class = LaplaceConformingVolumeTaylorLocalExpansion + mpole_expn_class = LaplaceConformingVolumeTaylorMultipoleExpansion + + nsources = 1000 + ntargets = 300 + dtype = np.float64 + + from boxtree.tools import ( + make_normal_particle_array as p_normal) + + sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) + offset = np.zeros(knl.dim) + offset[0] = 0.1 + + targets = ( + p_normal(queue, ntargets, knl.dim, dtype, seed=18) + + offset) + + del offset + + from boxtree import TreeBuilder + tb = TreeBuilder(ctx) + + tree, _ = tb(queue, sources, targets=targets, + max_particles_in_box=30, debug=True) + + from boxtree.traversal import FMMTraversalBuilder + tbuild = FMMTraversalBuilder(ctx) + trav, _ = tbuild(queue, tree, debug=True) + + from pyopencl.clrandom import PhiloxGenerator + rng = PhiloxGenerator(ctx, seed=44) + weights = ( + rng.uniform(queue, nsources, dtype=np.float64), + rng.uniform(queue, nsources, dtype=np.float64), + ) + + logger.info("computing direct (reference) result") + + dtype = np.float64 + order = 3 + + from functools import partial + from sumpy.kernel import DirectionalSourceDerivative, AxisTargetDerivative + + deriv_knl = DirectionalSourceDerivative(knl, "dir_vec") + + target_kernels = [knl, AxisTargetDerivative(0, knl)] + source_kernel_vecs = [[knl], [deriv_knl], [knl, deriv_knl]] + strength_usages = [[0], [1], [0, 1]] + + alpha = np.linspace(0, 2*np.pi, nsources, np.float64) + dir_vec = np.vstack([np.cos(alpha), np.sin(alpha)]) + + results = [] + for source_kernels, strength_usage in zip(source_kernel_vecs, strength_usages): + source_extra_kwargs = {} + if deriv_knl in source_kernels: + source_extra_kwargs["dir_vec"] = dir_vec + from sumpy.fmm import SumpyExpansionWranglerCodeContainer + wcc = SumpyExpansionWranglerCodeContainer( + ctx, + partial(mpole_expn_class, knl), + partial(local_expn_class, knl), + target_kernels=target_kernels, source_kernels=source_kernels, + strength_usage=strength_usage) + wrangler = wcc.get_wrangler(queue, tree, dtype, + fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order, + source_extra_kwargs=source_extra_kwargs) + + from boxtree.fmm import drive_fmm + + pot = drive_fmm(trav, wrangler, weights) + results.append(np.array([pot[0].get(), pot[1].get()])) + + ref_pot = results[0] + results[1] + pot = results[2] + rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf) + + assert rel_err < 1e-12 + + def test_sumpy_fmm_timing_data_collection(ctx_factory): logging.basicConfig(level=logging.INFO) @@ -233,7 +327,7 @@ def test_sumpy_fmm_timing_data_collection(ctx_factory): rng = PhiloxGenerator(ctx) weights = rng.uniform(queue, nsources, dtype=np.float64) - out_kernels = [knl] + target_kernels = [knl] from functools import partial @@ -242,7 +336,7 @@ def test_sumpy_fmm_timing_data_collection(ctx_factory): ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), - out_kernels) + target_kernels) wrangler = wcc.get_wrangler(queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order) @@ -290,7 +384,7 @@ def test_sumpy_fmm_exclude_self(ctx_factory): target_to_source = np.arange(tree.ntargets, dtype=np.int32) self_extra_kwargs = {"target_to_source": target_to_source} - out_kernels = [knl] + target_kernels = [knl] from functools import partial @@ -299,7 +393,7 @@ def test_sumpy_fmm_exclude_self(ctx_factory): ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), - out_kernels, + target_kernels, exclude_self=True) wrangler = wcc.get_wrangler(queue, tree, dtype, @@ -311,7 +405,7 @@ def test_sumpy_fmm_exclude_self(ctx_factory): pot, = drive_fmm(trav, wrangler, (weights,)) from sumpy import P2P - p2p = P2P(ctx, out_kernels, exclude_self=True) + p2p = P2P(ctx, target_kernels, exclude_self=True) evt, (ref_pot,) = p2p(queue, sources, sources, (weights,), **self_extra_kwargs) diff --git a/test/test_kernels.py b/test/test_kernels.py index cd81e847c359568a6dd9b2610890449f3d569870..658816a9f922fc4875ecac1801224f529554f0e5 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -131,7 +131,7 @@ def test_p2e_multiple(ctx_factory, base_knl, expn_class): if isinstance(base_knl, StokesletKernel): extra_kwargs["mu"] = 0.2 - in_kernels = [ + source_kernels = [ DirectionalSourceDerivative(base_knl, "dir_vec"), base_knl, ] @@ -169,7 +169,7 @@ def test_p2e_multiple(ctx_factory, base_knl, expn_class): rscale = 0.5 # pick something non-1 # apply p2e at the same time - p2e = P2EFromSingleBox(ctx, expn, kernels=in_kernels, strength_usage=[0, 1]) + p2e = P2EFromSingleBox(ctx, expn, kernels=source_kernels, strength_usage=[0, 1]) evt, (mpoles,) = p2e(queue, source_boxes=source_boxes, box_source_starts=box_source_starts, @@ -190,11 +190,12 @@ def test_p2e_multiple(ctx_factory, base_knl, expn_class): # apply p2e separately expected_result = np.zeros_like(actual_result) - for i, in_kernel in enumerate(in_kernels): + for i, source_kernel in enumerate(source_kernels): extra_source_kwargs = extra_kwargs.copy() - if isinstance(in_kernel, DirectionalSourceDerivative): + if isinstance(source_kernel, DirectionalSourceDerivative): extra_source_kwargs["dir_vec"] = dir_vec - p2e = P2EFromSingleBox(ctx, expn, kernels=[in_kernel], strength_usage=[i]) + p2e = P2EFromSingleBox(ctx, expn, + kernels=[source_kernel], strength_usage=[i]) evt, (mpoles,) = p2e(queue, source_boxes=source_boxes, box_source_starts=box_source_starts, @@ -275,7 +276,7 @@ def test_p2e2p(ctx_factory, base_knl, expn_class, order, with_source_derivative) else: knl = base_knl - out_kernels = [ + target_kernels = [ knl, AxisTargetDerivative(0, knl), ] @@ -283,8 +284,8 @@ def test_p2e2p(ctx_factory, base_knl, expn_class, order, with_source_derivative) from sumpy import P2EFromSingleBox, E2PFromSingleBox, P2P p2e = P2EFromSingleBox(ctx, expn, kernels=[knl]) - e2p = E2PFromSingleBox(ctx, expn, kernels=out_kernels) - p2p = P2P(ctx, out_kernels, exclude_self=False) + e2p = E2PFromSingleBox(ctx, expn, kernels=target_kernels) + p2p = P2P(ctx, target_kernels, exclude_self=False) from pytools.convergence import EOCRecorder eoc_rec_pot = EOCRecorder() @@ -480,7 +481,7 @@ def test_translations(ctx_factory, knl, local_expn_class, mpole_expn_class): res = 20 nsources = 15 - out_kernels = [knl] + target_kernels = [knl] extra_kwargs = {} if isinstance(knl, HelmholtzKernel): @@ -566,11 +567,11 @@ def test_translations(ctx_factory, knl, local_expn_class, mpole_expn_class): from sumpy import P2EFromSingleBox, E2PFromSingleBox, P2P, E2EFromCSR p2m = P2EFromSingleBox(ctx, m_expn) m2m = E2EFromCSR(ctx, m_expn, m_expn) - m2p = E2PFromSingleBox(ctx, m_expn, out_kernels) + m2p = E2PFromSingleBox(ctx, m_expn, target_kernels) m2l = E2EFromCSR(ctx, m_expn, l_expn) l2l = E2EFromCSR(ctx, l_expn, l_expn) - l2p = E2PFromSingleBox(ctx, l_expn, out_kernels) - p2p = P2P(ctx, out_kernels, exclude_self=False) + l2p = E2PFromSingleBox(ctx, l_expn, target_kernels) + p2p = P2P(ctx, target_kernels, exclude_self=False) fp = FieldPlotter(centers[:, -1], extent=0.3, npoints=res) targets = fp.points diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py index d56341d273aab24444cd028be98feb16f57183cd..c51fc3ce3769d3269742a29b3f113f164cd778ef 100644 --- a/test/test_matrixgen.py +++ b/test/test_matrixgen.py @@ -109,24 +109,28 @@ def test_qbx_direct(ctx_factory, factor, lpot_id): from sumpy.kernel import LaplaceKernel, DirectionalSourceDerivative if lpot_id == 1: - knl = LaplaceKernel(ndim) + base_knl = LaplaceKernel(ndim) + knl = base_knl elif lpot_id == 2: - knl = LaplaceKernel(ndim) - knl = DirectionalSourceDerivative(knl, dir_vec_name="dsource_vec") + base_knl = LaplaceKernel(ndim) + knl = DirectionalSourceDerivative(base_knl, dir_vec_name="dsource_vec") else: raise ValueError("unknow lpot_id") from sumpy.expansion.local import LineTaylorLocalExpansion - lknl = LineTaylorLocalExpansion(knl, order) + expn = LineTaylorLocalExpansion(knl, order) from sumpy.qbx import LayerPotential - lpot = LayerPotential(ctx, [lknl]) + lpot = LayerPotential(ctx, expansion=expn, source_kernels=(knl,), + target_kernels=(base_knl,)) from sumpy.qbx import LayerPotentialMatrixGenerator - mat_gen = LayerPotentialMatrixGenerator(ctx, [lknl]) + mat_gen = LayerPotentialMatrixGenerator(ctx, expansion=expn, + source_kernels=(knl,), target_kernels=(base_knl,)) from sumpy.qbx import LayerPotentialMatrixBlockGenerator - blk_gen = LayerPotentialMatrixBlockGenerator(ctx, [lknl]) + blk_gen = LayerPotentialMatrixBlockGenerator(ctx, expansion=expn, + source_kernels=(knl,), target_kernels=(base_knl,)) for n in [200, 300, 400]: targets, sources, centers, expansion_radii, sigma = \ diff --git a/test/test_qbx.py b/test/test_qbx.py index f26f7bf873001d0a298a3765d0e59cdd8b40b4fa..11f7da7b65d3dde2e63da2cf1f29e91718cddc95 100644 --- a/test/test_qbx.py +++ b/test/test_qbx.py @@ -54,7 +54,8 @@ def test_direct(ctx_factory): from sumpy.qbx import LayerPotential from sumpy.expansion.local import LineTaylorLocalExpansion - lpot = LayerPotential(ctx, [LineTaylorLocalExpansion(lknl, order)]) + lpot = LayerPotential(ctx, expansion=LineTaylorLocalExpansion(lknl, order), + target_kernels=(lknl,), source_kernels=(lknl,)) mode_nr = 25