diff --git a/sumpy/expansion/__init__.py b/sumpy/expansion/__init__.py
index 9d3a917a9a16516bf6968a0446eccc0886554ed2..fffb48c8e6470e3df59abc2d2777fd1c0983317c 100644
--- a/sumpy/expansion/__init__.py
+++ b/sumpy/expansion/__init__.py
@@ -622,7 +622,8 @@ class VolumeTaylorExpansion(VolumeTaylorExpansionBase):
     expansion_terms_wrangler_cache = {}
 
     # not user-facing, be strict about having to pass use_rscale
-    def __init__(self, kernel, order, use_rscale):
+    def __init__(self, kernel, order, use_rscale, _use_fft):
+        self._use_fft = _use_fft
         self.expansion_terms_wrangler_key = (order, kernel.dim)
 
 
@@ -632,7 +633,8 @@ class LaplaceConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase):
     expansion_terms_wrangler_cache = {}
 
     # not user-facing, be strict about having to pass use_rscale
-    def __init__(self, kernel, order, use_rscale):
+    def __init__(self, kernel, order, use_rscale, _use_fft):
+        self._use_fft = _use_fft
         self.expansion_terms_wrangler_key = (order, kernel.dim)
 
 
@@ -642,7 +644,8 @@ class HelmholtzConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase):
     expansion_terms_wrangler_cache = {}
 
     # not user-facing, be strict about having to pass use_rscale
-    def __init__(self, kernel, order, use_rscale):
+    def __init__(self, kernel, order, use_rscale, _use_fft):
+        self._use_fft = _use_fft
         helmholtz_k_name = kernel.get_base_kernel().helmholtz_k_name
         self.expansion_terms_wrangler_key = (order, kernel.dim, helmholtz_k_name)
 
@@ -653,7 +656,8 @@ class BiharmonicConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase):
     expansion_terms_wrangler_cache = {}
 
     # not user-facing, be strict about having to pass use_rscale
-    def __init__(self, kernel, order, use_rscale):
+    def __init__(self, kernel, order, use_rscale, _use_fft):
+        self._use_fft = _use_fft
         self.expansion_terms_wrangler_key = (order, kernel.dim)
 
 # }}}
diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py
index c32778ae14c525c2a7acdffbc2850ead0a160691..6edf79d481f99420c5300ae50955c45ce75a9c77 100644
--- a/sumpy/expansion/local.py
+++ b/sumpy/expansion/local.py
@@ -30,6 +30,9 @@ from sumpy.expansion import (
     HelmholtzConformingVolumeTaylorExpansion,
     BiharmonicConformingVolumeTaylorExpansion)
 
+from sumpy.tools import (matvec_toeplitz_upper_triangular,
+    fft_toeplitz_upper_triangular)
+
 
 class LocalExpansionBase(ExpansionBase):
     pass
@@ -167,7 +170,8 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase):
 
             taker = src_expansion.get_kernel_derivative_taker(dvec)
 
-            from sumpy.tools import add_mi
+            from sumpy.tools import add_mi, _fft_uneval_expr
+            from pytools import generate_nonnegative_integer_tuples_below as gnitb
 
             max_mi = [0]*self.dim
             for i in range(self.dim):
@@ -176,54 +180,74 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase):
                 max_mi[i] += max(mi[i] for mi in
                                   self.get_coefficient_identifiers())
 
+            toeplitz_matrix_coeffs = list(gnitb([m + 1 for m in max_mi]))
+            toeplitz_matrix_ident_to_index = dict((ident, i) for i, ident in
+                                enumerate(toeplitz_matrix_coeffs))
+
             # Create a expansion terms wrangler for derivatives up to order
             # (tgt order)+(src order) including a corresponding reduction matrix
-            tgtplusderiv_exp_terms_wrangler = \
+            # For eg: 2D full Taylor Laplace, this is (n, m),
+            # n+m<=2*p, n<=2*p, m<=2*p
+            srcplusderiv_terms_wrangler = \
                 src_expansion.expansion_terms_wrangler.copy(
                         order=self.order + src_expansion.order, max_mi=tuple(max_mi))
-            tgtplusderiv_coeff_ids = \
-                tgtplusderiv_exp_terms_wrangler.get_coefficient_identifiers()
-            tgtplusderiv_full_coeff_ids = \
-                tgtplusderiv_exp_terms_wrangler.get_full_coefficient_identifiers()
-
-            tgtplusderiv_ident_to_index = dict((ident, i) for i, ident in
-                                enumerate(tgtplusderiv_full_coeff_ids))
+            srcplusderiv_full_coeff_ids = \
+                srcplusderiv_terms_wrangler.get_full_coefficient_identifiers()
+            srcplusderiv_ident_to_index = dict((ident, i) for i, ident in
+                                enumerate(srcplusderiv_full_coeff_ids))
 
+            # The vector has the kernel derivatives and depends only on the distance
+            # between the two centers
+            taker = src_expansion.get_kernel_derivative_taker(dvec)
+            vector_stored = []
+            # Calculate the kernel derivatives for the compressed set
+            for term in \
+                    srcplusderiv_terms_wrangler.get_coefficient_identifiers():
+                kernel_deriv = src_expansion.get_scaled_multipole(
+                    taker.diff(term),
+                    dvec, src_rscale,
+                    nderivatives=sum(term),
+                    nderivatives_for_scaling=sum(term),
+                )
+                vector_stored.append(kernel_deriv)
+            # Calculate the kernel derivatives for the full set
+            vector_full = \
+                srcplusderiv_terms_wrangler.get_full_kernel_derivatives_from_stored(
+                            vector_stored, src_rscale)
+
+            needed_vector_terms = set([])
+            # For eg: 2D full Taylor Laplace, we only need kernel derivatives
+            # (n1+n2, m1+m2), n1+m1<=p, n2+m2<=p
+            for tgt_deriv in self.get_coefficient_identifiers():
+                for src_deriv in src_expansion.get_coefficient_identifiers():
+                    needed = add_mi(src_deriv, tgt_deriv)
+                    needed_vector_terms.add(needed)
+
+            vector = [0]*len(toeplitz_matrix_coeffs)
+            for i, term in enumerate(toeplitz_matrix_coeffs):
+                if term in srcplusderiv_ident_to_index:
+                    vector[i] = vector_full[srcplusderiv_ident_to_index[term]]
+
+            # Calculate the first row of the upper triangular Toeplitz matrix
+            toeplitz_first_row = [0] * len(toeplitz_matrix_coeffs)
+            for coeff, term in zip(
+                    src_coeff_exprs,
+                    src_expansion.get_coefficient_identifiers()):
+                toeplitz_first_row[toeplitz_matrix_ident_to_index[term]] = coeff
+
+            # Do the matvec
+            if self._use_fft:
+                output = fft_toeplitz_upper_triangular(toeplitz_first_row, vector)
+            else:
+                output = matvec_toeplitz_upper_triangular(toeplitz_first_row, vector)
+
+            # Filter out the dummy rows and scale them for target
             result = []
-            for lexp_mi in self.get_coefficient_identifiers():
-                lexp_mi_terms = []
-
-                # Embed the source coefficients in the coefficient array
-                # for the (tgt order)+(src order) wrangler, offset by lexp_mi.
-                embedded_coeffs = [0] * len(tgtplusderiv_full_coeff_ids)
-                for coeff, term in zip(
-                        src_coeff_exprs,
-                        src_expansion.get_coefficient_identifiers()):
-                    embedded_coeffs[
-                            tgtplusderiv_ident_to_index[add_mi(lexp_mi, term)]] \
-                                    = coeff
-
-                # Compress the embedded coefficient set
-                stored_coeffs = tgtplusderiv_exp_terms_wrangler \
-                        .get_stored_mpole_coefficients_from_full(
-                                embedded_coeffs, src_rscale)
-
-                # Sum the embedded coefficient set
-                for i, coeff in enumerate(stored_coeffs):
-                    if coeff == 0:
-                        continue
-                    nderivatives_for_scaling = \
-                            sum(tgtplusderiv_coeff_ids[i])-sum(lexp_mi)
-                    kernel_deriv = (
-                            src_expansion.get_scaled_multipole(
-                                taker.diff(tgtplusderiv_coeff_ids[i]),
-                                dvec, src_rscale,
-                                nderivatives=sum(tgtplusderiv_coeff_ids[i]),
-                                nderivatives_for_scaling=nderivatives_for_scaling))
-
-                    lexp_mi_terms.append(
-                            coeff * kernel_deriv * tgt_rscale**sum(lexp_mi))
-                result.append(sym.Add(*lexp_mi_terms))
+            rscale_ratio = _fft_uneval_expr(tgt_rscale/src_rscale)
+            for term in self.get_coefficient_identifiers():
+                index = toeplitz_matrix_ident_to_index[term]
+                result.append(output[index]*rscale_ratio**sum(term))
+
             logger.info("building translation operator: done")
             return result
 
@@ -311,39 +335,39 @@ class VolumeTaylorLocalExpansion(
         VolumeTaylorExpansion,
         VolumeTaylorLocalExpansionBase):
 
-    def __init__(self, kernel, order, use_rscale=None):
+    def __init__(self, kernel, order, use_rscale=None, _use_fft=False):
         VolumeTaylorLocalExpansionBase.__init__(self, kernel, order, use_rscale)
-        VolumeTaylorExpansion.__init__(self, kernel, order, use_rscale)
+        VolumeTaylorExpansion.__init__(self, kernel, order, use_rscale, _use_fft)
 
 
 class LaplaceConformingVolumeTaylorLocalExpansion(
         LaplaceConformingVolumeTaylorExpansion,
         VolumeTaylorLocalExpansionBase):
 
-    def __init__(self, kernel, order, use_rscale=None):
+    def __init__(self, kernel, order, use_rscale=None, _use_fft=False):
         VolumeTaylorLocalExpansionBase.__init__(self, kernel, order, use_rscale)
         LaplaceConformingVolumeTaylorExpansion.__init__(
-                self, kernel, order, use_rscale)
+                self, kernel, order, use_rscale, _use_fft)
 
 
 class HelmholtzConformingVolumeTaylorLocalExpansion(
         HelmholtzConformingVolumeTaylorExpansion,
         VolumeTaylorLocalExpansionBase):
 
-    def __init__(self, kernel, order, use_rscale=None):
+    def __init__(self, kernel, order, use_rscale=None, _use_fft=False):
         VolumeTaylorLocalExpansionBase.__init__(self, kernel, order, use_rscale)
         HelmholtzConformingVolumeTaylorExpansion.__init__(
-                self, kernel, order, use_rscale)
+                self, kernel, order, use_rscale, _use_fft)
 
 
 class BiharmonicConformingVolumeTaylorLocalExpansion(
         BiharmonicConformingVolumeTaylorExpansion,
         VolumeTaylorLocalExpansionBase):
 
-    def __init__(self, kernel, order, use_rscale=None):
+    def __init__(self, kernel, order, use_rscale=None, _use_fft=False):
         VolumeTaylorLocalExpansionBase.__init__(self, kernel, order, use_rscale)
         BiharmonicConformingVolumeTaylorExpansion.__init__(
-                self, kernel, order, use_rscale)
+                self, kernel, order, use_rscale, _use_fft)
 
 # }}}
 
diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py
index b7e2769ba21b470cb163515da6e70c55eb360d2b..8e217109cdb4b466cffdbbdfda66b5920f6fd876 100644
--- a/sumpy/expansion/multipole.py
+++ b/sumpy/expansion/multipole.py
@@ -301,39 +301,39 @@ class VolumeTaylorMultipoleExpansion(
         VolumeTaylorExpansion,
         VolumeTaylorMultipoleExpansionBase):
 
-    def __init__(self, kernel, order, use_rscale=None):
+    def __init__(self, kernel, order, use_rscale=None, _use_fft=False):
         VolumeTaylorMultipoleExpansionBase.__init__(self, kernel, order, use_rscale)
-        VolumeTaylorExpansion.__init__(self, kernel, order, use_rscale)
+        VolumeTaylorExpansion.__init__(self, kernel, order, use_rscale, _use_fft)
 
 
 class LaplaceConformingVolumeTaylorMultipoleExpansion(
         LaplaceConformingVolumeTaylorExpansion,
         VolumeTaylorMultipoleExpansionBase):
 
-    def __init__(self, kernel, order, use_rscale=None):
+    def __init__(self, kernel, order, use_rscale=None, _use_fft=False):
         VolumeTaylorMultipoleExpansionBase.__init__(self, kernel, order, use_rscale)
         LaplaceConformingVolumeTaylorExpansion.__init__(
-                self, kernel, order, use_rscale)
+                self, kernel, order, use_rscale, _use_fft)
 
 
 class HelmholtzConformingVolumeTaylorMultipoleExpansion(
         HelmholtzConformingVolumeTaylorExpansion,
         VolumeTaylorMultipoleExpansionBase):
 
-    def __init__(self, kernel, order, use_rscale=None):
+    def __init__(self, kernel, order, use_rscale=None, _use_fft=False):
         VolumeTaylorMultipoleExpansionBase.__init__(self, kernel, order, use_rscale)
         HelmholtzConformingVolumeTaylorExpansion.__init__(
-                self, kernel, order, use_rscale)
+                self, kernel, order, use_rscale, _use_fft)
 
 
 class BiharmonicConformingVolumeTaylorMultipoleExpansion(
         BiharmonicConformingVolumeTaylorExpansion,
         VolumeTaylorMultipoleExpansionBase):
 
-    def __init__(self, kernel, order, use_rscale=None):
+    def __init__(self, kernel, order, use_rscale=None, _use_fft=False):
         VolumeTaylorMultipoleExpansionBase.__init__(self, kernel, order, use_rscale)
         BiharmonicConformingVolumeTaylorExpansion.__init__(
-                self, kernel, order, use_rscale)
+                self, kernel, order, use_rscale, _use_fft)
 
 # }}}
 
diff --git a/sumpy/symbolic.py b/sumpy/symbolic.py
index 7a86958aebd7eea6c8054b11e61413314886ebe9..0fed239d30b04fe38360efee190de5f38b66293b 100644
--- a/sumpy/symbolic.py
+++ b/sumpy/symbolic.py
@@ -76,7 +76,7 @@ _find_symbolic_backend()
 # Before adding a function here, make sure it's present in both modules.
 SYMBOLIC_API = """
 Add Basic Mul Pow exp sqrt log symbols sympify cos sin atan2 Function Symbol
-Derivative Integer Matrix Subs I pi functions""".split()
+Derivative Integer Matrix Subs I pi functions Number Float""".split()
 
 if USE_SYMENGINE:
     import symengine as sym
diff --git a/sumpy/tools.py b/sumpy/tools.py
index 2719f43a2e128da47997f0d1a010072d9bcec1dd..d6daeb459d709cd726da9f2d32ff2e9d6e3abc9d 100644
--- a/sumpy/tools.py
+++ b/sumpy/tools.py
@@ -3,6 +3,8 @@ from __future__ import division, absolute_import
 __copyright__ = """
 Copyright (C) 2012 Andreas Kloeckner
 Copyright (C) 2018 Alexandru Fikl
+Copyright (C) 2006-2019 SymPy Development Team
+Copyright (C) 2020 Isuru Fernando
 """
 
 __license__ = """
@@ -23,6 +25,36 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
+
+=============================================================================
+Copyright (c) 2006-2019 SymPy Development Team
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+  a. Redistributions of source code must retain the above copyright notice,
+     this list of conditions and the following disclaimer.
+  b. Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+  c. Neither the name of SymPy nor the names of its contributors
+     may be used to endorse or promote products derived from this software
+     without specific prior written permission.
+
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
 """
 
 import six
@@ -30,6 +62,8 @@ from six.moves import range, zip
 from pytools import memoize_method, memoize_in
 import numpy as np
 import sumpy.symbolic as sym
+import numbers
+import math
 
 import pyopencl as cl
 import pyopencl.array  # noqa
@@ -670,6 +704,8 @@ def my_syntactic_subs(expr, subst_dict):
         return expr
 
 
+# {{{ matrices
+
 def reduced_row_echelon_form(m):
     """Calculates a reduced row echelon form of a
     matrix `m`.
@@ -760,4 +796,118 @@ def solve_symbolic(A, b):  # noqa: N803
     red = reduced_row_echelon_form(big)[0]
     return red[:, big.shape[0]:]
 
+# }}}
+
+
+# {{{ FFT
+
+def _fft_uneval_expr(expr):
+    """
+    Creates a CSE node if the expr is not a numeric type
+    """
+    if isinstance(expr, (numbers.Number, sym.Number, int, float, complex)):
+        return expr
+    # UnevaluatedExpr is not implemented in SymEngine, but that's fine
+    # as SymEngine doesn't distribute 2*(a+b) to 2*a + 2*b.
+    # SymPy distributes which destroys the complexity.
+    return sym.UnevaluatedExpr(expr)
+
+
+def _complex_tuple_mul(a, b):
+    """
+    Multiply the two complex numbers represented as a tuple
+    for real and imaginary parts
+    """
+    return (_fft_uneval_expr((a[0]*b[0])-(a[1]*b[1])),
+            _fft_uneval_expr((a[0]*b[1])+(a[1]*b[0])))
+
+
+def _binary_reverse(n, bits):
+    # Returns the reverse of the number n in binary form with bits
+    # number of bits
+    b = bin(n)[2:].rjust(bits, "0")
+    return int(b[::-1], 2)
+
+
+def fft(seq, inverse=False):
+    """
+    Return the discrete fourier transform of the sequence seq.
+    seq should be a python iterable with tuples of length 2
+    corresponding to the real part and imaginary part.
+    """
+
+    a = seq
+    n = len(a)
+    if n < 2:
+        return a
+
+    b = n.bit_length() - 1
+    if n & (n - 1):  # not a power of 2
+        b += 1
+        n = 2**b
+
+    a += [(0, 0)]*(n - len(a))
+    for i in range(1, n):
+        j = _binary_reverse(i, b)
+        if i < j:
+            a[i], a[j] = a[j], a[i]
+    ang = 2*math.pi/n
+    # Rewrite cosines and sines using cosines of angle in the first quadrant
+    # This is to reduce duplicate of floating point numbers with 1 ULP difference
+    # and also make sure quantities like cos(pi/2) - sin(pi/2) produces 0 exactly.
+    w = [(math.cos(ang*i), math.cos(ang*(n/4.0 - i))) for i in range((n + 3)//4)]
+    w[0] = (1, 0)
+    w += [(-math.cos(ang*(n/2 - i)), math.cos(ang*(i - n/4.0))) for
+            i in range((n + 3)//4, n//2)]
+    if n % 4 == 0:
+        w[n//4] = (0, 1)
+    if inverse:
+        w = [(x[0], -x[1]) for x in w]
+    h = 2
+    while h <= n:
+        hf, ut = h // 2, n // h
+        for i in range(0, n, h):
+            for j in range(hf):
+                u, v = a[i + j], _complex_tuple_mul(a[i + j + hf], w[ut * j])
+                a[i + j] = (u[0] + v[0], u[1] + v[1])
+                a[i + j + hf] = (u[0] - v[0], u[1] - v[1])
+        h *= 2
+
+    if inverse:
+        a = [(x[0]/n, x[1]/n) for x in a]
+
+    return a
+
+
+def fft_toeplitz_upper_triangular(first_row, x):
+    """
+    Returns the matvec of the Toeplitz matrix given by
+    the first row and the vector x using a Fourier transform
+    """
+    assert len(first_row) == len(x)
+    n = len(first_row)
+    v = list(first_row)
+    v += [0]*(n-1)
+
+    x = list(reversed(x))
+    x += [0]*(n-1)
+
+    v_fft = fft([(a, 0) for a in v])
+    x_fft = fft([(a, 0) for a in x])
+    res_fft = [_complex_tuple_mul(a, b) for a, b in zip(v_fft, x_fft)]
+    res = fft(res_fft, inverse=True)
+    return [a for a, _ in reversed(res[:n])]
+
+
+def matvec_toeplitz_upper_triangular(first_row, vector):
+    n = len(first_row)
+    assert len(vector) == n
+    output = [0]*n
+    for row in range(n):
+        for col in range(row, n):
+            output[row] += first_row[col-row]*vector[col]
+    return output
+
+# }}}
+
 # vim: fdm=marker
diff --git a/test/test_kernels.py b/test/test_kernels.py
index 8225723cdd4f7df2afb7c487a578cc8c9ee81d28..64c5b217097db9b8e2987602ab423340cedb5264 100644
--- a/test/test_kernels.py
+++ b/test/test_kernels.py
@@ -45,6 +45,7 @@ from sumpy.expansion.local import (
 from sumpy.kernel import (LaplaceKernel, HelmholtzKernel, AxisTargetDerivative,
         DirectionalSourceDerivative, BiharmonicKernel, StokesletKernel)
 from pytools.convergence import PConvergenceVerifier
+import sumpy.symbolic as sym
 
 import logging
 logger = logging.getLogger(__name__)
@@ -346,6 +347,77 @@ def test_p2e2p(ctx_getter, base_knl, expn_class, order, with_source_derivative):
     assert eoc_rec_grad_x.order_estimate() > tgt_order_grad - grad_slack
 
 
+# {{{ test toeplitz
+
+def _m2l_translate_simple(tgt_expansion, src_expansion, src_coeff_exprs, src_rscale,
+                           dvec, tgt_rscale):
+
+    if not tgt_expansion.use_rscale:
+        src_rscale = 1
+        tgt_rscale = 1
+
+    from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansionBase
+    if not isinstance(src_expansion, VolumeTaylorMultipoleExpansionBase):
+        return 1
+
+    # We know the general form of the multipole expansion is:
+    #
+    #    coeff0 * diff(kernel, mi0) + coeff1 * diff(kernel, mi1) + ...
+    #
+    # To get the local expansion coefficients, we take derivatives of
+    # the multipole expansion.
+    taker = src_expansion.get_kernel_derivative_taker(dvec)
+
+    from sumpy.tools import add_mi
+
+    result = []
+    for deriv in tgt_expansion.get_coefficient_identifiers():
+        local_result = []
+        for coeff, term in zip(
+                src_coeff_exprs,
+                src_expansion.get_coefficient_identifiers()):
+
+            kernel_deriv = (
+                    src_expansion.get_scaled_multipole(
+                        taker.diff(add_mi(deriv, term)),
+                        dvec, src_rscale,
+                        nderivatives=sum(deriv) + sum(term),
+                        nderivatives_for_scaling=sum(term)))
+
+            local_result.append(
+                    coeff * kernel_deriv * tgt_rscale**sum(deriv))
+        result.append(sym.Add(*local_result))
+    return result
+
+
+def test_m2l_toeplitz(ctx_getter):
+    dim = 3
+    knl = LaplaceKernel(dim)
+    local_expn_class = LaplaceConformingVolumeTaylorLocalExpansion
+    mpole_expn_class = LaplaceConformingVolumeTaylorMultipoleExpansion
+
+    local_expn = local_expn_class(knl, order=5)
+    mpole_expn = mpole_expn_class(knl, order=5)
+
+    dvec = sym.make_sym_vector("d", dim)
+    src_coeff_exprs = list(1 + np.random.randn(len(mpole_expn)))
+    src_rscale = 2.0
+    tgt_rscale = 1.0
+
+    expected_output = _m2l_translate_simple(local_expn, mpole_expn, src_coeff_exprs,
+                                           src_rscale, dvec, tgt_rscale)
+    actual_output = local_expn.translate_from(mpole_expn, src_coeff_exprs,
+                                              src_rscale, dvec, tgt_rscale)
+
+    replace_dict = dict((d, np.random.rand(1)[0]) for d in dvec)
+    for sym_a, sym_b in zip(expected_output, actual_output):
+        num_a = sym_a.xreplace(replace_dict)
+        num_b = sym_b.xreplace(replace_dict)
+        assert abs(num_a - num_b)/abs(num_a) < 1e-10
+
+
+# }}}
+
 @pytest.mark.parametrize("knl, local_expn_class, mpole_expn_class", [
     (LaplaceKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion),
     (LaplaceKernel(2), LaplaceConformingVolumeTaylorLocalExpansion,
diff --git a/test/test_tools.py b/test/test_tools.py
new file mode 100644
index 0000000000000000000000000000000000000000..743caa14f9d9e632bf32aa679c984da8a0556506
--- /dev/null
+++ b/test/test_tools.py
@@ -0,0 +1,56 @@
+from __future__ import division, absolute_import, print_function
+
+__copyright__ = "Copyright (C) 2020 Isuru Fernando"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import logging
+logger = logging.getLogger(__name__)
+
+import sumpy.symbolic as sym
+from sumpy.tools import (fft_toeplitz_upper_triangular,
+    matvec_toeplitz_upper_triangular)
+import numpy as np
+
+
+def test_fft():
+    k = 5
+    v = np.random.rand(k)
+    x = np.random.rand(k)
+
+    fft = fft_toeplitz_upper_triangular(v, x)
+    matvec = matvec_toeplitz_upper_triangular(v, x)
+
+    for i in range(k):
+        assert abs(fft[i] - matvec[i]) < 1e-15
+
+
+def test_fft_small_floats():
+    k = 5
+    v = sym.make_sym_vector("v", k)
+    x = sym.make_sym_vector("x", k)
+
+    fft = fft_toeplitz_upper_triangular(v, x)
+    for expr in fft:
+        for f in expr.atoms(sym.Float):
+            if f == 0:
+                continue
+            assert abs(f) > 1e-10