diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py
index 83dc6b30ddb9c3237c30ede4059455992eb7b271..afb3ff3b38b17b62cddab6658ee68e5786e93337 100644
--- a/sumpy/expansion/local.py
+++ b/sumpy/expansion/local.py
@@ -55,18 +55,21 @@ class LineTaylorLocalExpansion(LocalExpansionBase):
     def get_coefficient_identifiers(self):
         return list(range(self.order+1))
 
-    def coefficients_from_source(self, avec, bvec, rscale, sac=None):
+    def coefficients_from_source(self, avec, bvec, rscale, sac=None, kernel=None):
         # no point in heeding rscale here--just ignore it
         if bvec is None:
             raise RuntimeError("cannot use line-Taylor expansions in a setting "
                     "where the center-target vector is not known at coefficient "
                     "formation")
 
+        if kernel is None:
+            kernel = self.kernel
+
         tau = sym.Symbol("tau")
 
         avec_line = avec + tau*bvec
 
-        line_kernel = self.kernel.get_expression(avec_line)
+        line_kernel = kernel.get_expression(avec_line)
 
         from sumpy.symbolic import USE_SYMENGINE
 
@@ -75,8 +78,8 @@ class LineTaylorLocalExpansion(LocalExpansionBase):
             deriv_taker = MiDerivativeTaker(line_kernel, (tau,))
 
             return [my_syntactic_subs(
-                        self.kernel.postprocess_at_target(
-                            self.kernel.postprocess_at_source(
+                        kernel.postprocess_at_target(
+                            kernel.postprocess_at_source(
                                 deriv_taker.diff(i),
                                 avec), bvec),
                         {tau: 0})
@@ -89,8 +92,8 @@ class LineTaylorLocalExpansion(LocalExpansionBase):
             #
             # See also https://gitlab.tiker.net/inducer/pytential/merge_requests/12
 
-            return [self.kernel.postprocess_at_target(
-                        self.kernel.postprocess_at_source(
+            return [kernel.postprocess_at_target(
+                        kernel.postprocess_at_source(
                             line_kernel.diff("tau", i), avec),
                         bvec)
                     .subs("tau", 0)
@@ -113,10 +116,11 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase):
     Coefficients represent derivative values of the kernel.
     """
 
-    def coefficients_from_source(self, avec, bvec, rscale, sac=None):
+    def coefficients_from_source(self, avec, bvec, rscale, sac=None, kernel=None):
         from sumpy.tools import MiDerivativeTaker
-        ppkernel = self.kernel.postprocess_at_source(
-                self.kernel.get_expression(avec), avec)
+        if kernel is None:
+            kernel = self.kernel
+        ppkernel = kernel.postprocess_at_source(kernel.get_expression(avec), avec)
 
         taker = MiDerivativeTaker(ppkernel, avec)
         return [
@@ -372,9 +376,11 @@ class _FourierBesselLocalExpansion(LocalExpansionBase):
     def get_coefficient_identifiers(self):
         return list(range(-self.order, self.order+1))
 
-    def coefficients_from_source(self, avec, bvec, rscale, sac=None):
+    def coefficients_from_source(self, avec, bvec, rscale, sac=None, kernel=None):
         if not self.use_rscale:
             rscale = 1
+        if kernel is None:
+            kernel = self.kernel
 
         from sumpy.symbolic import sym_real_norm_2
         hankel_1 = sym.Function("hankel_1")
@@ -384,7 +390,7 @@ class _FourierBesselLocalExpansion(LocalExpansionBase):
         # The coordinates are negated since avec points from source to center.
         source_angle_rel_center = sym.atan2(-avec[1], -avec[0])
         avec_len = sym_real_norm_2(avec)
-        return [self.kernel.postprocess_at_source(
+        return [kernel.postprocess_at_source(
                     hankel_1(c, arg_scale * avec_len)
                     * rscale ** abs(c)
                     * sym.exp(sym.I * c * source_angle_rel_center), avec)
diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py
index 313b49bdffce8a3027e64904dee4426545c4b013..8dc7497d5848e3b08ea08d69417176a75ce0b7ed 100644
--- a/sumpy/expansion/multipole.py
+++ b/sumpy/expansion/multipole.py
@@ -53,9 +53,10 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase):
     Coefficients represent the terms in front of the kernel derivatives.
     """
 
-    def coefficients_from_source(self, avec, bvec, rscale, sac=None):
+    def coefficients_from_source(self, avec, bvec, rscale, sac=None, kernel=None):
         from sumpy.kernel import DirectionalSourceDerivative
-        kernel = self.kernel
+        if kernel is None:
+            kernel = self.kernel
 
         from sumpy.tools import mi_power, mi_factorial
 
@@ -284,10 +285,13 @@ class _HankelBased2DMultipoleExpansion(MultipoleExpansionBase):
     def get_coefficient_identifiers(self):
         return list(range(-self.order, self.order+1))
 
-    def coefficients_from_source(self, avec, bvec, rscale, sac=None):
+    def coefficients_from_source(self, avec, bvec, rscale, sac=None, kernel=None):
         if not self.use_rscale:
             rscale = 1
 
+        if kernel is None:
+            kernel = self.kernel
+
         from sumpy.symbolic import sym_real_norm_2
         bessel_j = sym.Function("bessel_j")
         avec_len = sym_real_norm_2(avec)
@@ -297,7 +301,7 @@ class _HankelBased2DMultipoleExpansion(MultipoleExpansionBase):
         # The coordinates are negated since avec points from source to center.
         source_angle_rel_center = sym.atan2(-avec[1], -avec[0])
         return [
-                self.kernel.postprocess_at_source(
+                kernel.postprocess_at_source(
                     bessel_j(c, arg_scale * avec_len)
                     / rscale ** abs(c)
                     * sym.exp(sym.I * c * -source_angle_rel_center),
diff --git a/sumpy/p2e.py b/sumpy/p2e.py
index 58c38321d0270382ad58111ccf67421c1abc4ba8..ba6f77a99097ad7f5cc0aeb42c1a8b9291ebb97c 100644
--- a/sumpy/p2e.py
+++ b/sumpy/p2e.py
@@ -23,6 +23,8 @@ THE SOFTWARE.
 import numpy as np
 import loopy as lp
 from loopy.version import MOST_RECENT_LANGUAGE_VERSION
+import pymbolic
+from pymbolic.mapper import WalkMapper
 
 from sumpy.tools import KernelCacheWrapper
 
@@ -45,8 +47,8 @@ Particle-to-expansion
 # {{{ P2E base class
 
 class P2EBase(KernelCacheWrapper):
-    def __init__(self, ctx, expansion,
-            options=[], name=None, device=None):
+    def __init__(self, ctx, expansion, kernels=None,
+            options=[], name=None, device=None, strength_expr=None, nstrengths=1):
         """
         :arg expansion: a subclass of :class:`sympy.expansion.ExpansionBase`
         :arg strength_usage: A list of integers indicating which expression
@@ -58,6 +60,13 @@ class P2EBase(KernelCacheWrapper):
         if device is None:
             device = ctx.devices[0]
 
+        if kernels is None:
+            kernels = [expansion.kernel]
+
+        if strength_expr is None:
+            import pymbolic
+            strength_expr = pymbolic.parse("strength0 * kernel0")
+
         from sumpy.kernel import TargetDerivativeRemover
         expansion = expansion.with_kernel(
                 TargetDerivativeRemover()(expansion.kernel))
@@ -67,9 +76,20 @@ class P2EBase(KernelCacheWrapper):
         self.options = options
         self.name = name or self.default_name
         self.device = device
+        self.kernels = kernels
+        self.strength_expr, self.extra_source_variables = \
+            process_strength_expr(strength_expr, nstrengths, len(kernels))
+        self.nstrengths = nstrengths
 
         self.dim = expansion.dim
 
+    def get_result_expr(self, icoeff):
+        subst_dict = dict((
+            pymbolic.var(f"kernel{iknl}"), pymbolic.var(f"coeff{icoeff}_{iknl}")
+            ) for iknl in range(len(self.kernels)))
+        res = pymbolic.substitute(self.strength_expr, subst_dict)
+        return res
+
     def get_loopy_instructions(self):
         from sumpy.symbolic import make_sym_vector
         avec = make_sym_vector("a", self.dim)
@@ -80,10 +100,15 @@ class P2EBase(KernelCacheWrapper):
         from sumpy.assignment_collection import SymbolicAssignmentCollection
         sac = SymbolicAssignmentCollection()
 
-        coeff_names = [
-            sac.assign_unique("coeff%d" % i, coeff_i)
+        coeff_names = []
+        for knl_idx, kernel in enumerate(self.kernels):
             for i, coeff_i in enumerate(
-                self.expansion.coefficients_from_source(avec, None, rscale, sac))]
+                self.expansion.coefficients_from_source(avec, None, rscale,
+                     sac, kernel=kernel)
+            ):
+                coeff_names.append(
+                    sac.assign_unique(f"coeff{i}_{knl_idx}", coeff_i)
+                )
 
         sac.run_global_cse()
 
@@ -127,20 +152,21 @@ class P2EFromSingleBox(P2EBase):
                     for isrc
                         <> a[idim] = center[idim] - sources[idim, isrc] {dup=idim}
 
-                        <> strength = strengths[isrc]
+                        <> strength = strengths[0, isrc]
                         """] + self.get_loopy_instructions() + ["""
                     end
-                    """] + ["""
+                    """] + [f"""
                     tgt_expansions[src_ibox-tgt_base_ibox, {coeffidx}] = \
-                            simul_reduce(sum, isrc, strength*coeff{coeffidx}) \
+                        simul_reduce(sum, isrc, {self.get_result_expr(coeffidx)}) \
                             {{id_prefix=write_expn}}
-                    """.format(coeffidx=i) for i in range(ncoeffs)] + ["""
+                    """ for coeffidx in range(ncoeffs)] + ["""
                 end
                 """],
                 [
                     lp.GlobalArg("sources", None, shape=(self.dim, "nsources"),
                         dim_tags="sep,c"),
-                    lp.GlobalArg("strengths", None, shape="nsources"),
+                    lp.GlobalArg("strengths", None, shape="nstrengths, nsources",
+                        dim_tags="sep,C"),
                     lp.GlobalArg("box_source_starts,box_source_counts_nonchild",
                         None, shape=None),
                     lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"),
@@ -155,7 +181,7 @@ class P2EFromSingleBox(P2EBase):
                 assumptions="nsrc_boxes>=1",
                 silenced_warnings="write_race(write_expn*)",
                 default_offset=lp.auto,
-                fixed_parameters=dict(dim=self.dim),
+                fixed_parameters=dict(dim=self.dim, nstrengths=self.nstrengths),
                 lang_version=MOST_RECENT_LANGUAGE_VERSION)
 
         loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl)
@@ -206,7 +232,8 @@ class P2EFromCSR(P2EBase):
                 [
                     lp.GlobalArg("sources", None, shape=(self.dim, "nsources"),
                         dim_tags="sep,c"),
-                    lp.GlobalArg("strengths", None, shape="nsources"),
+                    lp.GlobalArg("strengths", None, shape="nstrengths, nsources",
+                        dim_tags="sep,C"),
                     lp.GlobalArg("source_box_starts,source_box_lists",
                         None, shape=None, offset=lp.auto),
                     lp.GlobalArg("box_source_starts,box_source_counts_nonchild",
@@ -245,14 +272,14 @@ class P2EFromCSR(P2EBase):
                             <> a[idim] = center[idim] - sources[idim, isrc] \
                                     {dup=idim}
 
-                            <> strength = strengths[isrc]
+                            <> strength = strengths[0, isrc]
                             """] + self.get_loopy_instructions() + ["""
                         end
                     end
                     """] + ["""
                     tgt_expansions[tgt_ibox - tgt_base_ibox, {coeffidx}] = \
                             simul_reduce(sum, (isrc_box, isrc),
-                                strength*coeff{coeffidx}) \
+                                strength*coeff{coeffidx}_0) \
                             {{id_prefix=write_expn}}
                     """.format(coeffidx=i) for i in range(ncoeffs)] + ["""
                 end
@@ -262,7 +289,7 @@ class P2EFromCSR(P2EBase):
                 assumptions="ntgt_boxes>=1",
                 silenced_warnings="write_race(write_expn*)",
                 default_offset=lp.auto,
-                fixed_parameters=dict(dim=self.dim),
+                fixed_parameters=dict(dim=self.dim, nstrengths=self.nstrengths),
                 lang_version=MOST_RECENT_LANGUAGE_VERSION)
 
         loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl)
@@ -298,4 +325,39 @@ class P2EFromCSR(P2EBase):
 
 # }}}
 
+
+# {{{ helper functions
+
+class CollectVariableMapper(WalkMapper):
+    def __init__(self):
+        self.variables = set()
+
+    def post_visit(self, expr, *args, **kwargs):
+        if isinstance(expr, pymbolic.primitives.Variable):
+            self.variables.add(expr)
+
+
+def process_strength_expr(expr, nstrengths, nkernels):
+    collect_variable_mapper = CollectVariableMapper()
+    collect_variable_mapper(expr)
+    variables = collect_variable_mapper.variables
+
+    # Get variables that are not strengths nor kernels
+    source_variables = [var for var in variables if
+        not (var.name.startswith("strength") or var.name.startswith("kernel"))]
+
+    # Use strengths array and index with isrc
+    subst = {}
+    for i in range(nstrengths):
+        old = pymbolic.var(f"strength{i}")
+        new = pymbolic.var("strengths")[0, pymbolic.var("isrc")]
+        subst[old] = new
+    print(subst)
+    expr = pymbolic.substitute(expr, subst)
+
+    return expr, source_variables
+
+
+#}}}
+
 # vim: foldmethod=marker
diff --git a/test/test_kernels.py b/test/test_kernels.py
index f8d5122d2ce5ecfce05b0fa85453328127781c6e..4db163f85cd79c0e01dea3fbf4c044595d11bce1 100644
--- a/test/test_kernels.py
+++ b/test/test_kernels.py
@@ -229,7 +229,7 @@ def test_p2e2p(ctx_factory, base_knl, expn_class, order, with_source_derivative)
                 box_source_counts_nonchild=box_source_counts_nonchild,
                 centers=centers,
                 sources=sources,
-                strengths=strengths,
+                strengths=(strengths,),
                 nboxes=1,
                 tgt_base_ibox=0,
                 rscale=rscale,
@@ -495,7 +495,7 @@ def test_translations(ctx_factory, knl, local_expn_class, mpole_expn_class):
                 box_source_counts_nonchild=p2m_box_source_counts_nonchild,
                 centers=centers,
                 sources=sources,
-                strengths=strengths,
+                strengths=(strengths,),
                 nboxes=nboxes,
                 rscale=m1_rscale,