diff --git a/sumpy/fmm.py b/sumpy/fmm.py
index eedebda64eb8baea0e8b370adeb88d49bae511a7..5b61a0c0559b0c794537174761eb1fbda9f2c12e 100644
--- a/sumpy/fmm.py
+++ b/sumpy/fmm.py
@@ -60,22 +60,22 @@ class SumpyExpansionWranglerCodeContainer:
     def __init__(self, cl_context,
             multipole_expansion_factory,
             local_expansion_factory,
-            out_kernels, exclude_self=False, use_rscale=None,
-            strength_usage=None, in_kernels=None):
+            target_kernels, exclude_self=False, use_rscale=None,
+            strength_usage=None, source_kernels=None):
         """
         :arg multipole_expansion_factory: a callable of a single argument (order)
             that returns a multipole expansion.
         :arg local_expansion_factory: a callable of a single argument (order)
             that returns a local expansion.
-        :arg out_kernels: a list of output kernels
+        :arg target_kernels: a list of output kernels
         :arg exclude_self: whether the self contribution should be excluded
         :arg strength_usage: passed unchanged to p2l, p2m and p2p.
-        :arg in_kernels: passed unchanged to p2l, p2m and p2p.
+        :arg source_kernels: passed unchanged to p2l, p2m and p2p.
         """
         self.multipole_expansion_factory = multipole_expansion_factory
         self.local_expansion_factory = local_expansion_factory
-        self.in_kernels = in_kernels
-        self.out_kernels = out_kernels
+        self.source_kernels = source_kernels
+        self.target_kernels = target_kernels
         self.exclude_self = exclude_self
         self.use_rscale = use_rscale
         self.strength_usage = strength_usage
@@ -85,7 +85,7 @@ class SumpyExpansionWranglerCodeContainer:
     @memoize_method
     def get_base_kernel(self):
         from pytools import single_valued
-        return single_valued(k.get_base_kernel() for k in self.out_kernels)
+        return single_valued(k.get_base_kernel() for k in self.target_kernels)
 
     @memoize_method
     def multipole_expansion(self, order):
@@ -98,14 +98,14 @@ class SumpyExpansionWranglerCodeContainer:
     @memoize_method
     def p2m(self, tgt_order):
         return P2EFromSingleBox(self.cl_context,
-                kernels=self.in_kernels,
+                kernels=self.source_kernels,
                 expansion=self.multipole_expansion(tgt_order),
                 strength_usage=self.strength_usage)
 
     @memoize_method
     def p2l(self, tgt_order):
         return P2EFromCSR(self.cl_context,
-                kernels=self.in_kernels,
+                kernels=self.source_kernels,
                 expansion=self.local_expansion(tgt_order),
                 strength_usage=self.strength_usage)
 
@@ -131,18 +131,18 @@ class SumpyExpansionWranglerCodeContainer:
     def m2p(self, src_order):
         return E2PFromCSR(self.cl_context,
                 self.multipole_expansion(src_order),
-                self.out_kernels)
+                self.target_kernels)
 
     @memoize_method
     def l2p(self, src_order):
         return E2PFromSingleBox(self.cl_context,
                 self.local_expansion(src_order),
-                self.out_kernels)
+                self.target_kernels)
 
     @memoize_method
     def p2p(self):
-        return P2PFromCSR(self.cl_context, out_kernels=self.out_kernels,
-                          in_kernels=self.in_kernels,
+        return P2PFromCSR(self.cl_context, target_kernels=self.target_kernels,
+                          source_kernels=self.source_kernels,
                           exclude_self=self.exclude_self,
                           strength_usage=self.strength_usage)
 
@@ -321,7 +321,7 @@ class SumpyExpansionWrangler:
                     self.queue,
                     self.tree.ntargets,
                     dtype=self.dtype)
-                for k in self.code.out_kernels])
+                for k in self.code.target_kernels])
 
     def reorder_sources(self, source_array):
         return source_array.with_queue(self.queue)[self.tree.user_source_ids]
diff --git a/sumpy/p2e.py b/sumpy/p2e.py
index 647827ad987b0537ec8c7c2073603a9d15e7ae44..5200bed3a0eb327c2bd234deddb7e172f3311d27 100644
--- a/sumpy/p2e.py
+++ b/sumpy/p2e.py
@@ -73,7 +73,8 @@ class P2EBase(KernelComputation, KernelCacheWrapper):
             assert tdr(knl) == knl
             assert sdr(knl) == expansion.kernel
 
-        KernelComputation.__init__(self, ctx=ctx, out_kernels=[], in_kernels=kernels,
+        KernelComputation.__init__(self, ctx=ctx, target_kernels=[],
+            source_kernels=kernels,
             strength_usage=strength_usage, value_dtypes=None,
             name=name, device=device)
 
@@ -91,7 +92,7 @@ class P2EBase(KernelComputation, KernelCacheWrapper):
         sac = SymbolicAssignmentCollection()
 
         coeff_names = []
-        for knl_idx, kernel in enumerate(self.in_kernels):
+        for knl_idx, kernel in enumerate(self.source_kernels):
             for i, coeff_i in enumerate(
                 self.expansion.coefficients_from_source(kernel, avec, None, rscale,
                      sac)
@@ -102,7 +103,7 @@ class P2EBase(KernelComputation, KernelCacheWrapper):
         sac.run_global_cse()
 
         code_transformers = [self.expansion.get_code_transformer()] \
-            + [kernel.get_code_transformer() for kernel in self.in_kernels]
+            + [kernel.get_code_transformer() for kernel in self.source_kernels]
 
         from sumpy.codegen import to_loopy_insns
         return to_loopy_insns(
@@ -115,13 +116,13 @@ class P2EBase(KernelComputation, KernelCacheWrapper):
 
     def get_cache_key(self):
         return (type(self).__name__, self.name, self.expansion,
-                tuple(self.in_kernels), tuple(self.strength_usage))
+                tuple(self.source_kernels), tuple(self.strength_usage))
 
     def get_result_expr(self, icoeff):
         isrc = pymbolic.var("isrc")
         return sum(pymbolic.var(f"coeff{icoeff}_{i}")
                     * pymbolic.var("strengths")[self.strength_usage[i], isrc]
-                for i in range(len(self.in_kernels)))
+                for i in range(len(self.source_kernels)))
 
 # }}}
 
@@ -173,7 +174,7 @@ class P2EFromSingleBox(P2EBase):
                     lp.ValueArg("nboxes,aligned_nboxes,tgt_base_ibox", np.int32),
                     lp.ValueArg("nsources", np.int32),
                     "..."
-                ] + gather_loopy_source_arguments(self.in_kernels
+                ] + gather_loopy_source_arguments(self.source_kernels
                         + (self.expansion,)),
                 name=self.name,
                 assumptions="nsrc_boxes>=1",
@@ -244,7 +245,7 @@ class P2EFromCSR(P2EBase):
                         np.int32),
                     lp.ValueArg("nsources", np.int32),
                     "..."
-                ] + gather_loopy_source_arguments(self.in_kernels
+                ] + gather_loopy_source_arguments(self.source_kernels
                         + (self.expansion,)))
 
         loopy_knl = lp.make_kernel(
diff --git a/sumpy/p2p.py b/sumpy/p2p.py
index 073e5b4d9afa42c329442369c7574049f1e7b78c..2863dd17a46701773a9c1a054b0c490cc8d72660 100644
--- a/sumpy/p2p.py
+++ b/sumpy/p2p.py
@@ -51,8 +51,8 @@ Particle-to-particle
 # {{{ p2p base class
 
 class P2PBase(KernelComputation, KernelCacheWrapper):
-    def __init__(self, ctx, out_kernels, exclude_self, strength_usage=None,
-            value_dtypes=None, name=None, device=None, in_kernels=None):
+    def __init__(self, ctx, target_kernels, exclude_self, strength_usage=None,
+            value_dtypes=None, name=None, device=None, source_kernels=None):
         """
         :arg kernels: list of :class:`sumpy.kernel.Kernel` instances
         :arg strength_usage: A list of integers indicating which expression
@@ -66,34 +66,34 @@ class P2PBase(KernelComputation, KernelCacheWrapper):
         tdr = TargetDerivativeRemover()
         sdr = SourceDerivativeRemover()
 
-        if in_kernels is None:
-            in_kernels = [single_valued(tdr(knl) for knl in out_kernels)]
-            out_kernels = [sdr(knl) for knl in out_kernels]
+        if source_kernels is None:
+            source_kernels = [single_valued(tdr(knl) for knl in target_kernels)]
+            target_kernels = [sdr(knl) for knl in target_kernels]
         else:
-            for knl in in_kernels:
+            for knl in source_kernels:
                 assert(tdr(knl) == knl)
-            for knl in out_kernels:
+            for knl in target_kernels:
                 assert(sdr(knl) == knl)
 
-        base_in_kernel = single_valued(sdr(knl) for knl in in_kernels)
-        base_out_kernel = single_valued(tdr(knl) for knl in out_kernels)
-        assert base_in_kernel == base_out_kernel
+        base_source_kernel = single_valued(sdr(knl) for knl in source_kernels)
+        base_target_kernel = single_valued(tdr(knl) for knl in target_kernels)
+        assert base_source_kernel == base_target_kernel
 
-        KernelComputation.__init__(self, ctx=ctx, out_kernels=out_kernels,
-            in_kernels=in_kernels, strength_usage=strength_usage,
+        KernelComputation.__init__(self, ctx=ctx, target_kernels=target_kernels,
+            source_kernels=source_kernels, strength_usage=strength_usage,
             value_dtypes=value_dtypes, name=name, device=device)
 
         self.exclude_self = exclude_self
-        self.in_kernels = in_kernels
-        self.out_kernels = out_kernels
+        self.source_kernels = source_kernels
+        self.target_kernels = target_kernels
 
         self.dim = single_valued(knl.dim for knl in
-            list(self.out_kernels) + list(self.in_kernels))
+            list(self.target_kernels) + list(self.source_kernels))
 
     def get_cache_key(self):
-        return (type(self).__name__, tuple(self.out_kernels), self.exclude_self,
+        return (type(self).__name__, tuple(self.target_kernels), self.exclude_self,
                 tuple(self.strength_usage), tuple(self.value_dtypes),
-                tuple(self.in_kernels))
+                tuple(self.source_kernels))
 
     def get_loopy_insns_and_result_names(self):
         from sumpy.symbolic import make_sym_vector
@@ -106,10 +106,10 @@ class P2PBase(KernelComputation, KernelCacheWrapper):
 
         isrc_sym = var("isrc")
 
-        if len(self.in_kernels) == 1:
-            in_knl = self.in_kernels[0]
+        if len(self.source_kernels) == 1:
+            in_knl = self.source_kernels[0]
             exprs = []
-            for i, out_knl in enumerate(self.out_kernels):
+            for i, out_knl in enumerate(self.target_kernels):
                 expr = out_knl.postprocess_at_target(
                         in_knl.postprocess_at_source(
                             in_knl.get_expression(dvec),
@@ -119,9 +119,9 @@ class P2PBase(KernelComputation, KernelCacheWrapper):
                 exprs.append(expr)
         else:
             exprs = []
-            for i, out_knl in enumerate(self.out_kernels):
+            for i, out_knl in enumerate(self.target_kernels):
                 expr_sum = 0
-                for j, in_knl in enumerate(self.in_kernels):
+                for j, in_knl in enumerate(self.source_kernels):
                     expr = in_knl.postprocess_at_source(
                                 in_knl.get_expression(dvec),
                                 dvec)
@@ -146,8 +146,8 @@ class P2PBase(KernelComputation, KernelCacheWrapper):
         loopy_insns = to_loopy_insns(sac.assignments.items(),
                 vector_names={"d"},
                 pymbolic_expr_maps=(
-                    [knl.get_code_transformer() for knl in self.in_kernels]
-                    + [knl.get_code_transformer() for knl in self.out_kernels]),
+                    [knl.get_code_transformer() for knl in self.source_kernels]
+                    + [knl.get_code_transformer() for knl in self.target_kernels]),
                 retain_names=result_names,
                 complex_dtype=np.complex128  # FIXME
                 )
@@ -185,7 +185,7 @@ class P2PBase(KernelComputation, KernelCacheWrapper):
                 lp.ValueArg("ntargets", None)]
                 + ([lp.GlobalArg("target_to_source", None, shape=("ntargets",))]
                     if self.exclude_self else [])
-                + gather_loopy_source_arguments(self.in_kernels))
+                + gather_loopy_source_arguments(self.source_kernels))
 
     def get_kernel(self):
         raise NotImplementedError
@@ -241,7 +241,7 @@ class P2P(P2PBase):
             + [f"""
                 result[{iknl}, itgt] = knl_{iknl}_scaling * \
                     simul_reduce(sum, isrc, pair_result_{iknl}) {{inames=itgt}}
-               """ for iknl in range(len(self.out_kernels))]
+               """ for iknl in range(len(self.target_kernels))]
             + ["end"],
             arguments,
             assumptions="nsources>=1 and ntargets>=1",
@@ -249,12 +249,12 @@ class P2P(P2PBase):
             fixed_parameters=dict(
                 dim=self.dim,
                 nstrengths=self.strength_count,
-                nresults=len(self.out_kernels)),
+                nresults=len(self.target_kernels)),
             lang_version=MOST_RECENT_LANGUAGE_VERSION)
 
         loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
 
-        for knl in self.out_kernels:
+        for knl in self.target_kernels:
             loopy_knl = knl.prepare_loopy_kernel(loopy_knl)
 
         return loopy_knl
@@ -303,7 +303,7 @@ class P2PMatrixGenerator(P2PBase):
             + [f"""
                 result_{iknl}[itgt, isrc] = knl_{iknl}_scaling \
                     * pair_result_{iknl} {{inames=isrc:itgt}}
-                """ for iknl in range(len(self.out_kernels))]
+                """ for iknl in range(len(self.target_kernels))]
             + ["end"],
             arguments,
             assumptions="nsources>=1 and ntargets>=1",
@@ -313,7 +313,7 @@ class P2PMatrixGenerator(P2PBase):
 
         loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
 
-        for knl in self.out_kernels:
+        for knl in self.target_kernels:
             loopy_knl = knl.prepare_loopy_kernel(loopy_knl)
 
         return loopy_knl
@@ -374,7 +374,7 @@ class P2PMatrixBlockGenerator(P2PBase):
                     result_{iknl}[imat] = \
                         knl_{iknl}_scaling * pair_result_{iknl} \
                             {{id_prefix=write_p2p}}
-                """ for iknl in range(len(self.out_kernels))]
+                """ for iknl in range(len(self.target_kernels))]
             + ["end"],
             arguments,
             assumptions="nresult>=1",
@@ -387,7 +387,7 @@ class P2PMatrixBlockGenerator(P2PBase):
         loopy_knl = lp.add_dtypes(loopy_knl,
             dict(nsources=np.int32, ntargets=np.int32))
 
-        for knl in self.out_kernels:
+        for knl in self.target_kernels:
             loopy_knl = knl.prepare_loopy_kernel(loopy_knl)
 
         return loopy_knl
@@ -507,7 +507,7 @@ class P2PFromCSR(P2PBase):
                         knl_{iknl}_scaling * \
                         simul_reduce(sum, isrc, pair_result_{iknl}) \
                         {{id_prefix=write_csr}}
-                """ for iknl in range(len(self.out_kernels))]
+                """ for iknl in range(len(self.target_kernels))]
             + ["""
                     end
                 end
@@ -520,7 +520,7 @@ class P2PFromCSR(P2PBase):
             fixed_parameters=dict(
                 dim=self.dim,
                 nstrengths=self.strength_count,
-                nkernels=len(self.out_kernels)),
+                nkernels=len(self.target_kernels)),
             lang_version=MOST_RECENT_LANGUAGE_VERSION)
 
         loopy_knl = lp.add_dtypes(loopy_knl,
@@ -530,7 +530,7 @@ class P2PFromCSR(P2PBase):
         loopy_knl = lp.tag_array_axes(loopy_knl, "targets", "sep,C")
         loopy_knl = lp.tag_array_axes(loopy_knl, "sources", "sep,C")
 
-        for knl in self.out_kernels:
+        for knl in self.target_kernels:
             loopy_knl = knl.prepare_loopy_kernel(loopy_knl)
 
         return loopy_knl
diff --git a/sumpy/qbx.py b/sumpy/qbx.py
index 0da725b069ca4aebb2092ecd3f7790cc7db6d0b9..0e8add77a1368ef7917e1e38c4e92ea629d8b53f 100644
--- a/sumpy/qbx.py
+++ b/sumpy/qbx.py
@@ -69,40 +69,40 @@ def stringify_expn_index(i):
 class LayerPotentialBase(KernelComputation, KernelCacheWrapper):
     def __init__(self, ctx, expansion, strength_usage=None,
             value_dtypes=None, name=None, device=None,
-            in_kernels=None, out_kernels=None):
+            source_kernels=None, target_kernels=None):
 
         from sumpy.kernel import SourceDerivativeRemover, TargetDerivativeRemover
         from pytools import single_valued
         sdr = SourceDerivativeRemover()
         tdr = TargetDerivativeRemover()
-        if in_kernels is None and out_kernels is None:
+        if source_kernels is None and target_kernels is None:
             from warnings import warn
             warn("A list for the second argument of the constructor is deprecated."
-                "Use a single expansion and pass in_kernels and out_kernels",
+                "Use a single expansion and pass source_kernels and target_kernels",
                 DeprecationWarning, stacklevel=2)
-            in_kernel = single_valued(tdr(exp.kernel) for exp in expansion)
-            base_kernel = sdr(in_kernel)
-            out_kernels = tuple(sdr(exp.kernel) for exp in expansion)
-            in_kernels = (in_kernel,)
+            source_kernel = single_valued(tdr(exp.kernel) for exp in expansion)
+            base_kernel = sdr(source_kernel)
+            target_kernels = tuple(sdr(exp.kernel) for exp in expansion)
+            source_kernels = (source_kernel,)
             expansion = expansion[0].copy(kernel=base_kernel)
 
-        KernelComputation.__init__(self, ctx=ctx, out_kernels=out_kernels,
-                strength_usage=strength_usage, in_kernels=in_kernels,
+        KernelComputation.__init__(self, ctx=ctx, target_kernels=target_kernels,
+                strength_usage=strength_usage, source_kernels=source_kernels,
                 value_dtypes=value_dtypes, name=name, device=device)
 
-        self.dim = single_valued(knl.dim for knl in self.out_kernels)
+        self.dim = single_valued(knl.dim for knl in self.target_kernels)
         self.expansion = expansion
 
     def get_cache_key(self):
-        return (type(self).__name__, self.expansion, tuple(self.out_kernels),
-                tuple(self.in_kernels), tuple(self.strength_usage),
+        return (type(self).__name__, self.expansion, tuple(self.target_kernels),
+                tuple(self.source_kernels), tuple(self.strength_usage),
                 tuple(self.value_dtypes))
 
     def _expand(self, sac, avec, bvec, rscale, isrc):
         coefficients = np.zeros(len(self.expansion), dtype=object)
         from sumpy.symbolic import PymbolicToSympyMapper
         conv = PymbolicToSympyMapper()
-        for idx, knl in enumerate(self.in_kernels):
+        for idx, knl in enumerate(self.source_kernels):
             strength = conv(self.get_strength_or_not(isrc, idx))
             coefficients += np.array([coeff * strength for
                                 i, coeff in enumerate(
@@ -112,7 +112,7 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper):
         return list(coefficients)
 
     def _evaluate(self, sac, avec, bvec, rscale, expansion_nr, coefficients):
-        expn = self.out_kernels[expansion_nr]
+        expn = self.target_kernels[expansion_nr]
         assigned_coeffs = [
             sym.Symbol(
                 sac.assign_unique("expn%dcoeff%s" % (
@@ -139,14 +139,14 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper):
 
         coefficients = self._expand(sac, avec, bvec, rscale, isrc_sym)
         result_names = [self._evaluate(sac, avec, bvec, rscale, i, coefficients)
-                        for i in range(len(self.out_kernels))]
+                        for i in range(len(self.target_kernels))]
 
         logger.info("compute expansion expressions: done")
 
         sac.run_global_cse()
 
         pymbolic_expr_maps = [knl.get_code_transformer() for knl in (
-            self.out_kernels + self.in_kernels)]
+            self.target_kernels + self.source_kernels)]
 
         from sumpy.codegen import to_loopy_insns
         loopy_insns = to_loopy_insns(
@@ -183,7 +183,8 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper):
                     None, shape="ntargets"),
                 lp.ValueArg("nsources", None),
                 lp.ValueArg("ntargets", None)]
-                + gather_loopy_source_arguments(self.out_kernels + self.in_kernels))
+                + gather_loopy_source_arguments(
+                    self.target_kernels + self.source_kernels))
 
     def get_kernel(self):
         raise NotImplementedError
@@ -239,7 +240,7 @@ class LayerPotential(LayerPotentialBase):
             for i in range(self.strength_count)]
             + [lp.GlobalArg("result_%d" % i,
                 None, shape="ntargets", order="C")
-            for i in range(len(self.out_kernels))])
+            for i in range(len(self.target_kernels))])
 
         loopy_knl = lp.make_kernel(["""
             {[itgt, isrc, idim]: \
@@ -260,7 +261,7 @@ class LayerPotential(LayerPotentialBase):
                     simul_reduce(sum, isrc, pair_result_{i}) \
                         {{id_prefix=write_lpot,inames=itgt}}
                 """.format(i=iknl)
-                for iknl in range(len(self.out_kernels))]
+                for iknl in range(len(self.target_kernels))]
             + ["end"],
             arguments,
             name=self.name,
@@ -271,7 +272,7 @@ class LayerPotential(LayerPotentialBase):
             lang_version=MOST_RECENT_LANGUAGE_VERSION)
 
         loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
-        for expn in self.out_kernels + self.in_kernels:
+        for expn in self.target_kernels + self.source_kernels:
             loopy_knl = expn.prepare_loopy_kernel(loopy_knl)
 
         return loopy_knl
@@ -334,7 +335,7 @@ class LayerPotentialMatrixGenerator(LayerPotentialBase):
                     knl_{i}_scaling * pair_result_{i} \
                         {{inames=isrc:itgt}}
                 """.format(i=iknl)
-                for iknl in range(len(self.out_kernels))]
+                for iknl in range(len(self.target_kernels))]
             + ["end"],
             arguments,
             name=self.name,
@@ -344,7 +345,7 @@ class LayerPotentialMatrixGenerator(LayerPotentialBase):
             lang_version=MOST_RECENT_LANGUAGE_VERSION)
 
         loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
-        for expn in self.in_kernels + self.out_kernels:
+        for expn in self.source_kernels + self.target_kernels:
             loopy_knl = expn.prepare_loopy_kernel(loopy_knl)
 
         return loopy_knl
@@ -409,7 +410,7 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase):
                     result_{i}[imat] = knl_{i}_scaling * pair_result_{i} \
                             {{id_prefix=write_lpot}}
                 """.format(i=iknl)
-                for iknl in range(len(self.out_kernels))]
+                for iknl in range(len(self.target_kernels))]
             + ["end"],
             arguments,
             name=self.name,
@@ -423,7 +424,7 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase):
         loopy_knl = lp.add_dtypes(loopy_knl,
             dict(nsources=np.int32, ntargets=np.int32))
 
-        for expn in self.in_kernels + self.out_kernels:
+        for expn in self.source_kernels + self.target_kernels:
             loopy_knl = expn.prepare_loopy_kernel(loopy_knl)
 
         return loopy_knl
diff --git a/sumpy/tools.py b/sumpy/tools.py
index 6de1643e71c7e87be2029f57c7c5f03f52e94348..97040959095b8919944d5ade52e9063805363dae 100644
--- a/sumpy/tools.py
+++ b/sumpy/tools.py
@@ -201,7 +201,7 @@ def gather_loopy_source_arguments(kernel_likes):
 class KernelComputation:
     """Common input processing for kernel computations."""
 
-    def __init__(self, ctx, out_kernels, in_kernels, strength_usage,
+    def __init__(self, ctx, target_kernels, source_kernels, strength_usage,
             value_dtypes, name, device=None):
         """
         :arg kernels: list of :class:`sumpy.kernel.Kernel` instances
@@ -217,14 +217,14 @@ class KernelComputation:
 
         if value_dtypes is None:
             value_dtypes = []
-            for knl in out_kernels:
+            for knl in target_kernels:
                 if knl.is_complex_valued:
                     value_dtypes.append(np.complex128)
                 else:
                     value_dtypes.append(np.float64)
 
         if not isinstance(value_dtypes, (list, tuple)):
-            value_dtypes = [np.dtype(value_dtypes)] * len(out_kernels)
+            value_dtypes = [np.dtype(value_dtypes)] * len(target_kernels)
         value_dtypes = [np.dtype(vd) for vd in value_dtypes]
 
         # }}}
@@ -232,9 +232,9 @@ class KernelComputation:
         # {{{ process strength_usage
 
         if strength_usage is None:
-            strength_usage = [0] * len(in_kernels)
+            strength_usage = [0] * len(source_kernels)
 
-        if len(in_kernels) != len(strength_usage):
+        if len(source_kernels) != len(strength_usage):
             raise ValueError("exprs and strength_usage must have the same length")
         strength_count = max(strength_usage)+1
 
@@ -246,8 +246,8 @@ class KernelComputation:
         self.context = ctx
         self.device = device
 
-        self.in_kernels = tuple(in_kernels)
-        self.out_kernels = tuple(out_kernels)
+        self.source_kernels = tuple(source_kernels)
+        self.target_kernels = tuple(target_kernels)
         self.value_dtypes = value_dtypes
         self.strength_usage = strength_usage
         self.strength_count = strength_count
@@ -265,7 +265,7 @@ class KernelComputation:
                     expression=sympy_conv(kernel.get_global_scaling_const()),
                     temp_var_type=lp.Optional(dtype))
                 for i, (kernel, dtype) in enumerate(
-                    zip(self.out_kernels, self.value_dtypes))]
+                    zip(self.target_kernels, self.value_dtypes))]
 
 # }}}
 
diff --git a/test/test_fmm.py b/test/test_fmm.py
index ca3b40aaaf4111710d88a3ba275c60b9d035e43b..50a0755cff8700d973c084832669185bff594ac0 100644
--- a/test/test_fmm.py
+++ b/test/test_fmm.py
@@ -165,14 +165,14 @@ def test_sumpy_fmm(ctx_factory, knl, local_expn_class, mpole_expn_class):
 
     from functools import partial
     for order in order_values:
-        out_kernels = [knl]
+        target_kernels = [knl]
 
         from sumpy.fmm import SumpyExpansionWranglerCodeContainer
         wcc = SumpyExpansionWranglerCodeContainer(
                 ctx,
                 partial(mpole_expn_class, knl),
                 partial(local_expn_class, knl),
-                out_kernels)
+                target_kernels)
         wrangler = wcc.get_wrangler(queue, tree, dtype,
                 fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order,
                 kernel_extra_kwargs=extra_kwargs)
@@ -182,7 +182,7 @@ def test_sumpy_fmm(ctx_factory, knl, local_expn_class, mpole_expn_class):
         pot, = drive_fmm(trav, wrangler, (weights,))
 
         from sumpy import P2P
-        p2p = P2P(ctx, out_kernels, exclude_self=False)
+        p2p = P2P(ctx, target_kernels, exclude_self=False)
         evt, (ref_pot,) = p2p(queue, targets, sources, (weights,),
                 **extra_kwargs)
 
@@ -252,24 +252,24 @@ def test_unified_single_and_double(ctx_factory):
 
     deriv_knl = DirectionalSourceDerivative(knl, "dir_vec")
 
-    out_kernels = [knl, AxisTargetDerivative(0, knl)]
-    in_kernel_vecs = [[knl], [deriv_knl], [knl, deriv_knl]]
+    target_kernels = [knl, AxisTargetDerivative(0, knl)]
+    source_kernel_vecs = [[knl], [deriv_knl], [knl, deriv_knl]]
     strength_usages = [[0], [1], [0, 1]]
 
     alpha = np.linspace(0, 2*np.pi, nsources, np.float64)
     dir_vec = np.vstack([np.cos(alpha), np.sin(alpha)])
 
     results = []
-    for in_kernels, strength_usage in zip(in_kernel_vecs, strength_usages):
+    for source_kernels, strength_usage in zip(source_kernel_vecs, strength_usages):
         source_extra_kwargs = {}
-        if deriv_knl in in_kernels:
+        if deriv_knl in source_kernels:
             source_extra_kwargs["dir_vec"] = dir_vec
         from sumpy.fmm import SumpyExpansionWranglerCodeContainer
         wcc = SumpyExpansionWranglerCodeContainer(
                 ctx,
                 partial(mpole_expn_class, knl),
                 partial(local_expn_class, knl),
-                out_kernels=out_kernels, in_kernels=in_kernels,
+                target_kernels=target_kernels, source_kernels=source_kernels,
                 strength_usage=strength_usage)
         wrangler = wcc.get_wrangler(queue, tree, dtype,
                 fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order,
@@ -322,7 +322,7 @@ def test_sumpy_fmm_timing_data_collection(ctx_factory):
     rng = PhiloxGenerator(ctx)
     weights = rng.uniform(queue, nsources, dtype=np.float64)
 
-    out_kernels = [knl]
+    target_kernels = [knl]
 
     from functools import partial
 
@@ -331,7 +331,7 @@ def test_sumpy_fmm_timing_data_collection(ctx_factory):
             ctx,
             partial(mpole_expn_class, knl),
             partial(local_expn_class, knl),
-            out_kernels)
+            target_kernels)
 
     wrangler = wcc.get_wrangler(queue, tree, dtype,
             fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order)
@@ -379,7 +379,7 @@ def test_sumpy_fmm_exclude_self(ctx_factory):
     target_to_source = np.arange(tree.ntargets, dtype=np.int32)
     self_extra_kwargs = {"target_to_source": target_to_source}
 
-    out_kernels = [knl]
+    target_kernels = [knl]
 
     from functools import partial
 
@@ -388,7 +388,7 @@ def test_sumpy_fmm_exclude_self(ctx_factory):
             ctx,
             partial(mpole_expn_class, knl),
             partial(local_expn_class, knl),
-            out_kernels,
+            target_kernels,
             exclude_self=True)
 
     wrangler = wcc.get_wrangler(queue, tree, dtype,
@@ -400,7 +400,7 @@ def test_sumpy_fmm_exclude_self(ctx_factory):
     pot, = drive_fmm(trav, wrangler, (weights,))
 
     from sumpy import P2P
-    p2p = P2P(ctx, out_kernels, exclude_self=True)
+    p2p = P2P(ctx, target_kernels, exclude_self=True)
     evt, (ref_pot,) = p2p(queue, sources, sources, (weights,),
             **self_extra_kwargs)
 
diff --git a/test/test_kernels.py b/test/test_kernels.py
index 3d0dea7a82091fbdf57cccbf88e3b10cd77d3390..7a80e93c9cf6f7e5d65fc122753bfde6f618d815 100644
--- a/test/test_kernels.py
+++ b/test/test_kernels.py
@@ -131,7 +131,7 @@ def test_p2e_multiple(ctx_factory, base_knl, expn_class):
     if isinstance(base_knl, StokesletKernel):
         extra_kwargs["mu"] = 0.2
 
-    in_kernels = [
+    source_kernels = [
         DirectionalSourceDerivative(base_knl, "dir_vec"),
         base_knl,
     ]
@@ -169,7 +169,7 @@ def test_p2e_multiple(ctx_factory, base_knl, expn_class):
     rscale = 0.5  # pick something non-1
 
     # apply p2e at the same time
-    p2e = P2EFromSingleBox(ctx, expn, kernels=in_kernels, strength_usage=[0, 1])
+    p2e = P2EFromSingleBox(ctx, expn, kernels=source_kernels, strength_usage=[0, 1])
     evt, (mpoles,) = p2e(queue,
             source_boxes=source_boxes,
             box_source_starts=box_source_starts,
@@ -190,11 +190,12 @@ def test_p2e_multiple(ctx_factory, base_knl, expn_class):
 
     # apply p2e separately
     expected_result = np.zeros_like(actual_result)
-    for i, in_kernel in enumerate(in_kernels):
+    for i, source_kernel in enumerate(source_kernels):
         extra_source_kwargs = extra_kwargs.copy()
-        if isinstance(in_kernel, DirectionalSourceDerivative):
+        if isinstance(source_kernel, DirectionalSourceDerivative):
             extra_source_kwargs["dir_vec"] = dir_vec
-        p2e = P2EFromSingleBox(ctx, expn, kernels=[in_kernel], strength_usage=[i])
+        p2e = P2EFromSingleBox(ctx, expn,
+            kernels=[source_kernel], strength_usage=[i])
         evt, (mpoles,) = p2e(queue,
             source_boxes=source_boxes,
             box_source_starts=box_source_starts,
@@ -275,7 +276,7 @@ def test_p2e2p(ctx_factory, base_knl, expn_class, order, with_source_derivative)
     else:
         knl = base_knl
 
-    out_kernels = [
+    target_kernels = [
             knl,
             AxisTargetDerivative(0, knl),
             ]
@@ -283,8 +284,8 @@ def test_p2e2p(ctx_factory, base_knl, expn_class, order, with_source_derivative)
 
     from sumpy import P2EFromSingleBox, E2PFromSingleBox, P2P
     p2e = P2EFromSingleBox(ctx, expn, kernels=[knl])
-    e2p = E2PFromSingleBox(ctx, expn, kernels=out_kernels)
-    p2p = P2P(ctx, out_kernels, exclude_self=False)
+    e2p = E2PFromSingleBox(ctx, expn, kernels=target_kernels)
+    p2p = P2P(ctx, target_kernels, exclude_self=False)
 
     from pytools.convergence import EOCRecorder
     eoc_rec_pot = EOCRecorder()
@@ -480,7 +481,7 @@ def test_translations(ctx_factory, knl, local_expn_class, mpole_expn_class):
     res = 20
     nsources = 15
 
-    out_kernels = [knl]
+    target_kernels = [knl]
 
     extra_kwargs = {}
     if isinstance(knl, HelmholtzKernel):
@@ -566,11 +567,11 @@ def test_translations(ctx_factory, knl, local_expn_class, mpole_expn_class):
         from sumpy import P2EFromSingleBox, E2PFromSingleBox, P2P, E2EFromCSR
         p2m = P2EFromSingleBox(ctx, m_expn)
         m2m = E2EFromCSR(ctx, m_expn, m_expn)
-        m2p = E2PFromSingleBox(ctx, m_expn, out_kernels)
+        m2p = E2PFromSingleBox(ctx, m_expn, target_kernels)
         m2l = E2EFromCSR(ctx, m_expn, l_expn)
         l2l = E2EFromCSR(ctx, l_expn, l_expn)
-        l2p = E2PFromSingleBox(ctx, l_expn, out_kernels)
-        p2p = P2P(ctx, out_kernels, exclude_self=False)
+        l2p = E2PFromSingleBox(ctx, l_expn, target_kernels)
+        p2p = P2P(ctx, target_kernels, exclude_self=False)
 
         fp = FieldPlotter(centers[:, -1], extent=0.3, npoints=res)
         targets = fp.points