diff --git a/pymbolic/algorithm.py b/pymbolic/algorithm.py
index 1d8d4ec454c3042f0292dd2c0caf1a97c7326667..a63aea94b727da44d61a0dcfd58287df1573da0c 100644
--- a/pymbolic/algorithm.py
+++ b/pymbolic/algorithm.py
@@ -1,4 +1,6 @@
-import pymbolic.traits as traits
+from __future__ import division
+import cmath
+from pytools import memoize
 
 
 
@@ -32,6 +34,7 @@ def extended_euclidean(q, r):
     """Return a tuple (p, a, b) such that p = aq + br,
     where p is the greatest common divisor.
     """
+    import pymbolic.traits as traits
 
     # see [Davenport], Appendix, p. 214
 
@@ -55,6 +58,88 @@ def extended_euclidean(q, r):
 
 
 
+
+@memoize
+def find_factors(N):
+    from math import sqrt
+
+    N1 = 2
+    max_N1 = int(sqrt(N))+1
+    while N % N1 != 0 and N1 <= max_N1:
+        N1 += 1
+
+    if N1 > max_N1:
+        N1 = N
+
+    N2 = N // N1
+
+    return N1, N2
+
+
+
+
+def fft(x, sign=1, wrap_intermediate=lambda x: x):
+    """Computes the Fourier transform of x:
+
+    F[x]_i = \sum_{j=0}^{n-1} z^{ij} x_j
+
+    where z = exp(sign*-2j*pi/n) and n = len(x).
+    """
+
+    # http://en.wikipedia.org/wiki/Cooley-Tukey_FFT_algorithm
+    # revision 293076305, http://is.gd/1c7PI
+
+    from math import pi
+    import numpy
+
+    N = len(x)
+
+    if N == 1:
+        return x
+
+    N1, N2 = find_factors(N)
+
+    # do the transform
+    sub_ffts = [
+            wrap_intermediate(
+                fft(x[n1::N1], sign, wrap_intermediate)
+                * numpy.exp(numpy.linspace(0, sign*(-2j)*pi*n1/N1, N2,
+                    endpoint=False)))
+            for n1 in range(N1)]
+
+    return numpy.hstack([
+        sum(subvec * cmath.exp(sign*(-2j)*pi*n1*k1/N1)
+            for n1, subvec in enumerate(sub_ffts))
+        for k1 in range(N1)
+        ])
+
+
+
+
+def ifft(x, wrap_intermediate=lambda x:x):
+    return (1/len(x))*fft(x, -1, wrap_intermediate)
+
+
+
+
+
+def csr_matrix_multiply(S,x):
+    """Multiplies a scipy.sparse.csr_matrix S by an object-array vector x.
+    """
+    h, w = S.shape
+
+    import numpy
+    result = numpy.empty_like(x)
+
+    for i in xrange(h):
+        result[i] = sum(S.data[idx]*x[S.indices[idx]] 
+                for idx in range(S.indptr[i], S.indptr[i+1]))
+
+    return result
+
+
+
+
 if __name__ == "__main__":
     import integer
     q = integer.Integer(14)
diff --git a/test/test_pymbolic.py b/test/test_pymbolic.py
index 1638ae1e6bbf1b07feae59817fc9b494a1e604ce..ff79551b68ad3b9bd49e61ea02389bf5976c3c69 100644
--- a/test/test_pymbolic.py
+++ b/test/test_pymbolic.py
@@ -1,3 +1,8 @@
+from __future__ import division
+
+
+
+
 def test_expand():
     from pymbolic import var, expand
 
@@ -5,8 +10,107 @@ def test_expand():
     u = (x+1)**5
     expand(u)
 
+
+
+
 def test_substitute():
     from pymbolic import parse, substitute, evaluate
     u = parse("5+x.min**2")
     xmin = parse("x.min")
     assert evaluate(substitute(u, {xmin:25})) == 630
+
+
+
+
+def test_fft_with_floats():
+    import py.test
+    numpy = py.test.importorskip("numpy")
+    import numpy.linalg as la
+
+    from pymbolic.algorithm import fft, ifft
+
+    for n in [2**i for i in range(4, 10)]+[17, 12, 948]:
+        a = numpy.random.rand(n) + 1j*numpy.random.rand(n)
+        f_a = fft(a)
+        a2 = ifft(f_a)
+        assert la.norm(a-a2) < 1e-10
+
+        f_a_numpy = numpy.fft.fft(a)
+        assert la.norm(f_a-f_a_numpy) < 1e-10
+
+
+
+
+from pymbolic.mapper import IdentityMapper
+class NearZeroKiller(IdentityMapper):
+    def map_constant(self, expr):
+        if isinstance(expr, complex):
+            r = expr.real
+            i = expr.imag
+            if abs(r) < 1e-15:
+                r = 0
+            if abs(i) < 1e-15:
+                i = 0
+            return complex(r, i)
+        else:
+            return expr
+
+
+
+
+
+def test_fft():
+    import py.test
+    numpy = py.test.importorskip("numpy")
+
+    from pymbolic import var
+    from pymbolic.algorithm import fft
+
+    vars = numpy.array([var(chr(97+i)) for i in range(16)], dtype=object)
+    print vars
+
+    def wrap_intermediate(x):
+        if len(x) > 1:
+            from hedge.optemplate import make_common_subexpression
+            return make_common_subexpression(x)
+        else:
+            return x
+
+    nzk = NearZeroKiller()
+    print nzk(fft(vars))
+    traced_fft = nzk(fft(vars, wrap_intermediate=wrap_intermediate))
+
+    from pymbolic.mapper.stringifier import PREC_NONE
+    from pymbolic.mapper.c_code import CCodeMapper
+    ccm = CCodeMapper()
+
+    code = [ccm(tfi, PREC_NONE) for tfi in traced_fft]
+
+    for i, cse in enumerate(ccm.cses):
+        print "_cse%d = %s" % (i, cse)
+
+    for i, line in enumerate(code):
+        print "result[%d] = %s" % (i, line)
+
+
+
+
+def test_sparse_multiply():
+    import py.test
+    numpy = py.test.importorskip("numpy")
+    py.test.importorskip("scipy")
+    import scipy.sparse as ss
+    import scipy.sparse.linalg as sla
+
+    la = numpy.linalg
+
+    mat = numpy.random.randn(10, 10)
+    s_mat = ss.csr_matrix(mat)
+
+    vec = numpy.random.randn(10)
+    mat_vec = s_mat*vec
+
+    from pymbolic.algorithm import csr_matrix_multiply
+    mat_vec_2 = csr_matrix_multiply(s_mat, vec)
+
+    assert la.norm(mat_vec-mat_vec_2) < 1e-14