diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 40517d542b3a0e942f4b78b90a9ed4b2e9360244..d72495e28a8cf0b9679f6e58f292a4503d61c385 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -17,7 +17,12 @@
 #    reports:
 #      junit: test/pytest.xml
 
+stages:
+  - test
+  - deploy
+
 Pytest POCL:
+  stage: test
   script:
   - export PY_EXE=python3
   - export PYOPENCL_TEST=portable:pthread
@@ -34,6 +39,7 @@ Pytest POCL:
       junit: test/pytest.xml
 
 Pytest Titan V:
+  stage: test
   script:
   - py_version=3
   - export PYOPENCL_TEST=nvi:titan
@@ -51,6 +57,7 @@ Pytest Titan V:
       junit: test/pytest.xml
 
 Pytest Conda:
+  stage: test
   script:
   # Disable caching to ensure SymEngine code generation is exercised.
   - export SUMPY_NO_CACHE=1
@@ -68,6 +75,7 @@ Pytest Conda:
       junit: test/pytest.xml
 
 Pytest POCL Titan V:
+  stage: test
   script:
   # Disable caching to ensure SymEngine code generation is exercised.
   - export SUMPY_NO_CACHE=1
@@ -85,6 +93,7 @@ Pytest POCL Titan V:
       junit: test/pytest.xml
 
 Examples Conda:
+  stage: test
   script: |
     grep -v symengine .test-conda-env-py3.yml > .test-conda-env.yml
     CONDA_ENVIRONMENT=.test-conda-env.yml
@@ -99,14 +108,25 @@ Examples Conda:
   - tags
 
 Documentation:
-  script:
-  - EXTRA_INSTALL="pybind11 numpy mako"
-  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/main/build-docs.sh
-  - ". ./build-docs.sh"
+  stage: deploy
+  script: |
+    PROJECT=sumpy
+    EXTRA_INSTALL="pybind11 numpy mako"
+    curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/main/ci-support.sh
+    . ci-support.sh
+    build_py_project_in_venv
+    build_docs
+    if [[ -d ~/.scicomp-benchmarks/asv/$PROJECT ]]; then
+      cp -rf ~/.scicomp-benchmarks/asv/$PROJECT _build/html/benchmarks
+    fi
+    maybe_upload_docs
+
   tags:
-  - python3
+  - linux
+  - benchmark
 
 Flake8:
+  stage: test
   script:
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/main/prepare-and-run-flake8.sh
   - . ./prepare-and-run-flake8.sh "$CI_PROJECT_NAME" test examples benchmarks
@@ -116,6 +136,7 @@ Flake8:
   - tags
 
 Benchmarks:
+  stage: test
   script:
   - CONDA_ENVIRONMENT=.test-conda-env-py3.yml
   - PROJECT=sumpy
diff --git a/README.rst b/README.rst
index 7d4041be88d22e6f3102e914b543494863cbb804..3c5bf836f21956861106c5aac58e57652849e3c1 100644
--- a/README.rst
+++ b/README.rst
@@ -35,6 +35,4 @@ Resources:
 
 * `documentation <https://documen.tician.de/sumpy>`_
 * `source code via git <https://github.com/inducer/sumpy>`_
-
-If you can see inside the UIUC firewall, you may browse
-`benchmark results <https://koelsch.d.tiker.net/benchmarks/asv/sumpy/>`_.
+* `benchmarks <https://documen.tician.de/sumpy/benchmarks>`_
diff --git a/asv.conf.json b/asv.conf.json
index 4c34933d134fb2279ff05e11498e3f16a34beed6..800817d68e353523789be7bae34551d51f9fb636 100644
--- a/asv.conf.json
+++ b/asv.conf.json
@@ -20,7 +20,7 @@
 
     // List of branches to benchmark. If not provided, defaults to "master"
     // (for git) or "default" (for mercurial).
-    // "branches": ["master"], // for git
+    "branches": ["main"], // for git
     // "branches": ["default"],    // for mercurial
 
     // The DVCS being used.  If not set, it will be automatically
diff --git a/sumpy/expansion/__init__.py b/sumpy/expansion/__init__.py
index c0216da413508443e243557dae6e77c8b45d5373..79bc9cb8e79139f4b2470d4b88c69018992ca7f7 100644
--- a/sumpy/expansion/__init__.py
+++ b/sumpy/expansion/__init__.py
@@ -185,14 +185,6 @@ class ExpansionBase:
 
         return type(self)(**new_kwargs)
 
-    def get_kernel_derivative_taker(self, dvec, rscale, sac):
-        """Return a ExprDerivativeTaker instance that supports taking
-        derivatives of the kernel with respect to dvec
-        """
-        from sumpy.tools import ExprDerivativeTaker
-        return ExprDerivativeTaker(self.kernel.get_expression(dvec), dvec, rscale,
-                sac)
-
 
 # }}}
 
@@ -695,11 +687,6 @@ class LaplaceConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase):
     def __init__(self, kernel, order, use_rscale):
         self.expansion_terms_wrangler_key = (order, kernel.dim)
 
-    def get_kernel_derivative_taker(self, dvec, rscale, sac):
-        from sumpy.tools import LaplaceDerivativeTaker
-        return LaplaceDerivativeTaker(self.kernel.get_expression(dvec), dvec,
-                rscale, sac)
-
 
 class HelmholtzConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase):
 
@@ -711,11 +698,6 @@ class HelmholtzConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase):
         helmholtz_k_name = kernel.get_base_kernel().helmholtz_k_name
         self.expansion_terms_wrangler_key = (order, kernel.dim, helmholtz_k_name)
 
-    def get_kernel_derivative_taker(self, dvec, rscale, sac):
-        from sumpy.tools import HelmholtzDerivativeTaker
-        return HelmholtzDerivativeTaker(self.kernel.get_expression(dvec), dvec,
-                rscale, sac)
-
 
 class BiharmonicConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase):
 
@@ -726,10 +708,6 @@ class BiharmonicConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase):
     def __init__(self, kernel, order, use_rscale):
         self.expansion_terms_wrangler_key = (order, kernel.dim)
 
-    def get_kernel_derivative_taker(self, dvec, rscale, sac):
-        from sumpy.tools import RadialDerivativeTaker
-        return RadialDerivativeTaker(self.kernel.get_expression(dvec), dvec,
-                rscale, sac)
 
 # }}}
 
diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py
index 247938d74d7d7982bf1053696766a587411d873c..c1369525da1db734f756f5f65e8544cbccfd9a92 100644
--- a/sumpy/expansion/local.py
+++ b/sumpy/expansion/local.py
@@ -29,7 +29,7 @@ from sumpy.expansion import (
     BiharmonicConformingVolumeTaylorExpansion)
 
 from sumpy.tools import mi_increment_axis, matvec_toeplitz_upper_triangular
-
+from pytools import single_valued
 
 class LocalExpansionBase(ExpansionBase):
     pass
@@ -132,7 +132,8 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase):
         if not self.use_rscale:
             rscale = 1
 
-        base_taker = self.get_kernel_derivative_taker(avec, rscale, sac)
+        base_kernel = single_valued(knl.get_base_kernel() for knl in kernels)
+        base_taker = base_kernel.get_derivative_taker(avec, rscale, sac)
         result = [0]*len(self)
 
         for knl, weight in zip(kernels, weights):
@@ -241,7 +242,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase):
 
         # The vector has the kernel derivatives and depends only on the distance
         # between the two centers
-        taker = src_expansion.get_kernel_derivative_taker(dvec, src_rscale, sac)
+        taker = src_expansion.kernel.get_derivative_taker(dvec, src_rscale, sac)
         vector_stored = []
         # Calculate the kernel derivatives for the compressed set
         for term in \
@@ -564,7 +565,7 @@ class _FourierBesselLocalExpansion(LocalExpansionBase):
             for j in self.get_coefficient_identifiers():
                 translated_coeffs.append(
                     sum(
-                        (-1) ** j
+                        sym.Integer(-1) ** j
                         * hankel_1(m + j, arg_scale * dvec_len)
                         * src_rscale ** abs(m)
                         * tgt_rscale ** abs(j)
diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py
index 28369afe8ebc1a75576baf690ef5a84adb8f118f..b9b1a07700b1019d18ea06b2613cd9acca4f251c 100644
--- a/sumpy/expansion/multipole.py
+++ b/sumpy/expansion/multipole.py
@@ -91,7 +91,7 @@ class VolumeTaylorMultipoleExpansionBase(MultipoleExpansionBase):
         if not self.use_rscale:
             rscale = 1
 
-        base_taker = self.get_kernel_derivative_taker(bvec, rscale, sac)
+        base_taker = kernel.get_derivative_taker(bvec, rscale, sac)
         taker = kernel.postprocess_at_target(base_taker, bvec)
 
         result = []
diff --git a/sumpy/kernel.py b/sumpy/kernel.py
index 44daa9555276360f1e1e389617535feb4e2f632d..494a6107248004883cd70c9e79dc58474c8d74ea 100644
--- a/sumpy/kernel.py
+++ b/sumpy/kernel.py
@@ -372,6 +372,13 @@ class ExpressionKernel(Kernel):
 
     mapper_method = "map_expression_kernel"
 
+    def get_derivative_taker(self, dvec, rscale, sac):
+        """Return a :class:`sumpy.tools.ExprDerivativeTaker` instance that supports
+        taking derivatives of the base kernel with respect to dvec.
+        """
+        from sumpy.tools import ExprDerivativeTaker
+        return ExprDerivativeTaker(self.get_expression(dvec), dvec, rscale, sac)
+
 
 one_kernel_2d = ExpressionKernel(
         dim=2,
@@ -417,6 +424,13 @@ class LaplaceKernel(ExpressionKernel):
 
     mapper_method = "map_laplace_kernel"
 
+    def get_derivative_taker(self, dvec, rscale, sac):
+        """Return a :class:`sumpy.tools.ExprDerivativeTaker` instance that supports
+        taking derivatives of the base kernel with respect to dvec.
+        """
+        from sumpy.tools import LaplaceDerivativeTaker
+        return LaplaceDerivativeTaker(self.get_expression(dvec), dvec, rscale, sac)
+
 
 class BiharmonicKernel(ExpressionKernel):
     init_arg_names = ("dim",)
@@ -453,6 +467,14 @@ class BiharmonicKernel(ExpressionKernel):
 
     mapper_method = "map_biharmonic_kernel"
 
+    def get_derivative_taker(self, dvec, rscale, sac):
+        """Return a :class:`sumpy.tools.ExprDerivativeTaker` instance that supports
+        taking derivatives of the base kernel with respect to dvec.
+        """
+        from sumpy.tools import RadialDerivativeTaker
+        return RadialDerivativeTaker(self.get_expression(dvec), dvec, rscale,
+                sac)
+
 
 class HelmholtzKernel(ExpressionKernel):
     init_arg_names = ("dim", "helmholtz_k_name", "allow_evanescent")
@@ -523,6 +545,13 @@ class HelmholtzKernel(ExpressionKernel):
 
     mapper_method = "map_helmholtz_kernel"
 
+    def get_derivative_taker(self, dvec, rscale, sac):
+        """Return a :class:`sumpy.tools.ExprDerivativeTaker` instance that supports
+        taking derivatives of the base kernel with respect to dvec.
+        """
+        from sumpy.tools import HelmholtzDerivativeTaker
+        return HelmholtzDerivativeTaker(self.get_expression(dvec), dvec, rscale, sac)
+
 
 class YukawaKernel(ExpressionKernel):
     init_arg_names = ("dim", "yukawa_lambda_name")
@@ -597,6 +626,13 @@ class YukawaKernel(ExpressionKernel):
 
     mapper_method = "map_yukawa_kernel"
 
+    def get_derivative_taker(self, dvec, rscale, sac):
+        """Return a :class:`sumpy.tools.ExprDerivativeTaker` instance that supports
+        taking derivatives of the base kernel with respect to dvec.
+        """
+        from sumpy.tools import HelmholtzDerivativeTaker
+        return HelmholtzDerivativeTaker(self.get_expression(dvec), dvec, rscale, sac)
+
 
 class StokesletKernel(ExpressionKernel):
     init_arg_names = ("dim", "icomp", "jcomp", "viscosity_mu_name")
@@ -778,6 +814,9 @@ class KernelWrapper(Kernel):
         raise NotImplementedError("replace_base_kernel is not implemented "
             "for this wrapper.")
 
+    def get_derivative_taker(self, dvec, rscale, sac):
+        return self.inner_kernel.get_derivative_taker(dvec, rscale, sac)
+
 # }}}