From 37fac646c37e89ccb2e5a01d7d5c574448d515db Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Thu, 1 Jun 2017 15:49:40 -0400
Subject: [PATCH 01/30] Switch Cahn-Hilliard over to Yukawa kernel, towards PDE
 verification

---
 examples/cahn-hilliard.py               | 54 ++++++++++++++++++-------
 pytential/symbolic/pde/cahn_hilliard.py | 40 ++++++++++--------
 2 files changed, 63 insertions(+), 31 deletions(-)

diff --git a/examples/cahn-hilliard.py b/examples/cahn-hilliard.py
index f7cd6559..5b794f65 100644
--- a/examples/cahn-hilliard.py
+++ b/examples/cahn-hilliard.py
@@ -1,4 +1,5 @@
 import numpy as np
+import numpy.linalg as la
 import pyopencl as cl
 import pyopencl.clmath  # noqa
 
@@ -50,11 +51,7 @@ def main():
     density_discr = qbx.density_discr
 
     from pytential.symbolic.pde.cahn_hilliard import CahnHilliardOperator
-    chop = CahnHilliardOperator(
-            # FIXME: Constants?
-            lambda1=1.5,
-            lambda2=1.25,
-            c=1)
+    chop = CahnHilliardOperator(b=5, c=1)
 
     unk = chop.make_unknown("sigma")
     bound_op = bind(qbx, chop.operator(unk))
@@ -65,12 +62,12 @@ def main():
 
     def g(xvec):
         x, y = xvec
-        return cl.clmath.atan2(y, x)
+        return cl.clmath.cos(5*cl.clmath.atan2(y, x))
 
     bc = sym.make_obj_array([
         # FIXME: Realistic BC
-        g(nodes),
-        -g(nodes),
+        5+g(nodes),
+        3-g(nodes),
         ])
 
     from pytential.solve import gmres
@@ -80,19 +77,43 @@ def main():
             stall_iterations=0,
             hard_failure=True)
 
+    sigma = gmres_result.solution
+
     # }}}
 
-    # {{{ postprocess/visualize
+    # {{{ check pde
 
-    sigma = gmres_result.solution
+    def check_pde():
+        from sumpy.point_calculus import CalculusPatch
+        cp = CalculusPatch(np.zeros(2))
+        targets = cl.array.to_device(queue, cp.points)
+
+        u, v = bind(
+                (qbx, PointsTarget(targets)),
+                chop.representation(unk))(queue, sigma=sigma)
+
+        u = u.get().real
+        v = v.get().real
+
+        print(la.norm(u), la.norm(v))
+        print(la.norm(
+            cp.laplace(cp.laplace(u)) - chop.b * cp.laplace(u) + chop.c*u))
+
+        print(la.norm(
+            v + cp.laplace(u) - chop.b*u))
+        1/0
+
+    check_pde()
+
+    # }}}
+
+    # {{{ postprocess/visualize
 
     from sumpy.visualization import FieldPlotter
     fplot = FieldPlotter(np.zeros(2), extent=5, npoints=500)
 
     targets = cl.array.to_device(queue, fplot.points)
 
-    qbx_stick_out = qbx.copy(target_stick_out_factor=0.05)
-
     indicator_qbx = qbx_stick_out.copy(qbx_order=2)
 
     from sumpy.kernel import LaplaceKernel
@@ -104,9 +125,9 @@ def main():
             queue, sigma=ones_density).get()
 
     try:
-        fld_in_vol = bind(
+        u, v = bind(
                 (qbx_stick_out, PointsTarget(targets)),
-                chop.representation(unk))(queue, sigma=sigma).get()
+                chop.representation(unk))(queue, sigma=sigma)
     except QBXTargetAssociationFailedException as e:
         fplot.write_vtk_file(
                 "failed-targets.vts",
@@ -115,12 +136,15 @@ def main():
                     ]
                 )
         raise
+    u = u.get().real
+    v = v.get().real
 
     #fplot.show_scalar_in_mayavi(fld_in_vol.real, max_val=5)
     fplot.write_vtk_file(
             "potential.vts",
             [
-                ("potential", fld_in_vol),
+                ("u", u),
+                ("v", v),
                 ("indicator", indicator),
                 ]
             )
diff --git a/pytential/symbolic/pde/cahn_hilliard.py b/pytential/symbolic/pde/cahn_hilliard.py
index fd6ec482..62faa4c8 100644
--- a/pytential/symbolic/pde/cahn_hilliard.py
+++ b/pytential/symbolic/pde/cahn_hilliard.py
@@ -34,10 +34,15 @@ from functools import partial
 
 
 class CahnHilliardOperator(L2WeightedPDEOperator):
-    def __init__(self, lambda1, lambda2, c):
-        self.lambdas = (lambda1, lambda2)
+    def __init__(self, b, c):
+        self.b = b
         self.c = c
 
+        lam1 = -(np.sqrt(b**2-4*c)-b)/2
+        lam2 = (np.sqrt(b**2-4*c)+b)/2
+
+        self.lambdas = sorted([lam1, lam2], key=abs, reverse=True)  # biggest first
+
     def make_unknown(self, name):
         return sym.make_sym_vector(name, 2)
 
@@ -45,35 +50,38 @@ class CahnHilliardOperator(L2WeightedPDEOperator):
         if op_map is None:
             op_map = lambda x: x  # noqa: E731
 
-        from sumpy.kernel import HelmholtzKernel
-        hhk = HelmholtzKernel(2, allow_evanescent=True)
-        hhk_scaling = 1j/4
+        from sumpy.kernel import YukawaKernel
+        knl = YukawaKernel(2)
 
         if i == 0:
             lam1, lam2 = self.lambdas
             return (
-                    # FIXME: Verify scaling
-                    -1/(2*np.pi*(lam1**2-lam2**2)) / hhk_scaling
-                    *
-                    (
-                        op_map(sym.S(hhk, density, k=1j*lam1,
+                    1/(lam1**2-lam2**2)
+                    * (
+                        op_map(sym.S(knl, density, lam=lam1,
                             qbx_forced_limit=qbx_forced_limit))
                         -
-                        op_map(sym.S(hhk, density, k=1j*lam2,
+                        op_map(sym.S(knl, density, lam=lam2,
                             qbx_forced_limit=qbx_forced_limit))))
         else:
             return (
-                    # FIXME: Verify scaling
-
-                    -1/(2*np.pi) / hhk_scaling
-                    * op_map(sym.S(hhk, density, k=1j*self.lambdas[i-1],
+                    op_map(sym.S(knl, density, lam=self.lambdas[i-1],
                         qbx_forced_limit=qbx_forced_limit)))
 
     def representation(self, unknown):
+        """Return (u, v) in a :mod:`numpy` object array.
+        """
         sig1, sig2 = unknown
         S_G = partial(self.S_G, qbx_forced_limit=None)  # noqa: N806
+        laplacian = partial(sym.laplace, 2)
 
-        return S_G(1, sig1) + S_G(0, sig2)
+        def u(op_map=None):
+            return S_G(1, sig1, op_map=op_map) + S_G(0, sig2, op_map=op_map)
+
+        return sym.make_obj_array([
+            u(),
+            -u(op_map=laplacian) + self.b*u()
+            ])
 
     def operator(self, unknown):
         sig1, sig2 = unknown
-- 
GitLab


From 4638de23e20903e8c5540b0d8e12abdc7008a56d Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Thu, 1 Jun 2017 18:32:07 -0400
Subject: [PATCH 02/30] adding volume potential

---
 examples/cahn-hilliard.py | 297 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 283 insertions(+), 14 deletions(-)

diff --git a/examples/cahn-hilliard.py b/examples/cahn-hilliard.py
index f7cd6559..1f589405 100644
--- a/examples/cahn-hilliard.py
+++ b/examples/cahn-hilliard.py
@@ -18,6 +18,9 @@ qbx_order = bdry_quad_order
 bdry_ovsmp_quad_order = 4*bdry_quad_order
 fmm_order = 8
 
+vol_quad_order = 5
+vol_qbx_order  = 2
+
 # }}}
 
 
@@ -28,37 +31,303 @@ def main():
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
 
-    from meshmode.mesh.generation import ellipse, make_curve_mesh
-    from functools import partial
+# {{{ volume mesh generation
+
+    from enum import Enum
+    class Geometry(Enum):
+        RegularRectangle = 1
+        Circle           = 2
+
+    shape = Geometry.Circle
+
+    if shape == Geometry.RegularRectangle:
+      from meshmode.mesh.generation import generate_regular_rect_mesh
+      ext = 1.
+      h   = 0.05
+      mesh = generate_regular_rect_mesh(
+              a=(-ext/2., -ext/2.),
+              b=( ext/2.,  ext/2.),
+              n=(int(ext/h), int(ext/h))
+              )
+    elif shape == Geometry.Circle:
+        from meshmode.mesh.io import import generate_gmsh
+        h    = 0.05
+        mesh = generate_emsh(
+                FileSource("circle.step"),
+                2,
+                order=mesh_order,
+                force_ambient_dim=2,
+                orther_options=["-string",
+                                "Mesh.CharacteristicLengthMax = %g;" % h]
+                )
+    else:
+        1/0
+
+    logger.info("%d elements" % mesh.nelements)
+
+# }}}
+
+# {{{ discretization and connections
 
-    mesh = make_curve_mesh(
-                partial(ellipse, 2),
-                np.linspace(0, 1, nelements+1),
-                mesh_order)
+    vol_discr = Discretization(ctx, mesh,
+            InterpolatoryQuadratureSimplexGroupFactory(vol_quad_order))
 
-    pre_density_discr = Discretization(
-            cl_ctx, mesh,
-            InterpolatoryQuadratureSimplexGroupFactory(bdry_quad_order))
+    from meshmode.mesh import BTAG_ALL
+    from meshmode.discretization.connection import make_face_restriction
+    pre_density_connection = make_face_restriction(
+            vol_discr,
+            InterpolatoryQuadratureSimplexGroupFactory(bdry_quad_order),
+            BTAG_ALL)
+    pre_density_discr = pre_density_connection.to_discr
 
     from pytential.qbx import (
             QBXLayerPotentialSource, QBXTargetAssociationFailedException)
+
     qbx, _ = QBXLayerPotentialSource(
-            pre_density_discr, fine_order=bdry_ovsmp_quad_order, qbx_order=qbx_order,
+            pre_density_discr,
+            fine_order=bdry_ovsmp_quad_order, qbx_order=qbx_order,
             fmm_order=fmm_order,
             expansion_disks_in_tree_have_extent=True,
             ).with_refinement()
+
     density_discr = qbx.density_discr
 
+    # composition of connetions
+    # vol_discr --> pre_density_discr --> density_discr
+    # via ChainedDiscretizationConnection
+
+
+#    from meshmode.mesh.generation import ellipse, make_curve_mesh
+#    from functools import partial
+#
+#    mesh = make_curve_mesh(
+#                partial(ellipse, 2),
+#                np.linspace(0, 1, nelements+1),
+#                mesh_order)
+#
+#    pre_density_discr = Discretization(
+#            cl_ctx, mesh,
+#            InterpolatoryQuadratureSimplexGroupFactory(bdry_quad_order))
+#
+#    from pytential.qbx import (
+#            QBXLayerPotentialSource, QBXTargetAssociationFailedException)
+#    qbx, _ = QBXLayerPotentialSource(
+#            pre_density_discr, fine_order=bdry_ovsmp_quad_order, qbx_order=qbx_order,
+#            fmm_order=fmm_order,
+#            expansion_disks_in_tree_have_extent=True,
+#            ).with_refinement()
+#    density_discr = qbx.density_discr
+
+# }}}
+
+# {{{ a kernel class for G0
+# FIXME: will the expressions work when lambda is complex?
+# (may need conversion from 1j to var("I"))
+    class ShidongKernel(ExpressionKernel):
+        init_arg_names = ("dim", "lambda1", "lambda2")
+
+        def __init__(self, dim=None, lambda1=0., lambda2=1.):
+            """
+            :arg lambda1,lambda2: The roots of the quadratic equation w.r.t
+                laplacian.
+            """
+            # Assert against repeated roots.
+            if abs(lambda1**2 - lambda2**2) < 1e-9:
+                raise RuntimeError("illposed since input roots are too close")
+
+# Based on http://mathworld.wolfram.com/ModifiedBesselFunctionoftheSecondKind.html
+            if dim == 2:
+                r       = pymbolic_real_norm_2(make_sym_vector("d", dim))
+                expr    = var("hankel_1")(0, var("I") * lambda1 * r) - \
+                          var("hankel_1")(0, var("I") * lambda2 * r)
+                scaling =  1. / ( 4. * var("I") * (lambda1**2 - lambda2**2) )
+            else:
+                raise RuntimeError("unsupported dimensionality")
+
+            ExpressionKernel.__init__(
+                    self,
+                    dim,
+                    expression=expr,
+                    scaling = scaling,
+                    is_complex_valued=True)
+
+            self.lambda1 = lambda1
+            self.lambda2 = lambda2
+
+        def __getinitargs__(self):
+            return(self._dim, self.lambda1, self.lambda2)
+
+        def update_persistent_hash(self, key_hash, key_builder):
+            key_hash.update(type(self).__name__.encode("utf8"))
+            key_builder.rec(key_hash, 
+                    (self._dim, self.lambda1, self.lambda2)
+                    )
+
+        def __repr__(self):
+            if self._dim is not None:
+                return "ShdgKnl%dD(%f, %f)" % (
+                        self._dim, self.lambda1, self.lambda2)
+            else:
+                return "ShdgKnl(%f, %f)" % (self.lambda1, self.lambda2)
+
+        def prepare_loopy_kernel(self, loopy_knl):
+            from sumpy.codegen import (bessel_preamble_generator, bessel_mangler)
+            loopy_knl = lp.register_function_manglers(loopy_knl,
+                    [bessel_mangler])
+            loopy_knl = lp.register_preamble_generators(loopy_knl,
+                    [bessel_preamble_generator])
+            return loopy_knl
+
+        def get_args(self):
+            k_dtype = np.complex128
+            return [
+                    KernelArgument(
+                        loopy_arg=lp.ValueArg("shidong_kernel", k_dtype),
+                        )]
+
+        mapper_method = "map_shidong_kernel"
+
+# }}}
+
+# {{{ extended kernel getters
+    def get_extkernel_for_G0(lambda1, lambda2):
+        from sumpy.symbolic import pymbolic_real_norm_2
+        from pymbolic.primitives import make_sym_vector
+        from pymbolic import var
+
+        d = make_sym_vector("d", 3)
+        r2 = pymbolic_real_norm_2(d[:-1])
+        expr = var("hankel_1")(0, var("I") * lambda1 * r2
+                + var("I") * d[-1]**2) \
+               - var("hankel_1")(0, var("I") * lambda2 * r2
+                + var("I") * d[-1]**2)
+        scaling =  1. / ( 4. * var("I") * (lambda1**2 - lambda2**2) )
+
+        from sumpy.kernel import ExpressionKernel
+        return ExpressionKernel(
+                dim=3,
+                expression=expr,
+                scaling=scaling,
+                is_complex_valued=True)
+
+    def get_extkernel_for_G1(lamb):
+        from sumpy.symbolic import pymbolic_real_norm_2
+        from pymbolic.primitives import make_sym_vector
+        from pymbolic import var
+
+        d = make_sym_vector("d", 3)
+        r2 = pymbolic_real_norm_2(d[:-1])
+        expr = var("hankel_1")(0, var("I") * lamb * r2
+                + var("I") * d[-1]**2)
+        scaling = - var("I") / 4.
+
+        from sumpy.kernel import ExpressionKernel
+        return ExpressionKernel(
+                dim=3,
+                expression=expr,
+                scaling=scaling,
+                is_complex_valued=True)
+
+
+# }}}
+
+# {{{ equation info
+
+    s = 1.5
+    epsilon = 0.01
+    delta_t = 0.05
+
+    b = s / (epsilon**2)
+    c = 1. / (epsilon * delta_t)
+
+    sqdet = np.sqrt( b**2 - 4. * c )
+    assert np.abs(sqdet) > 1e-6
+
+    lambda1 = ( b + sqdet ) / 2.
+    lambda2 = ( b - sqdet ) / 2.
+
     from pytential.symbolic.pde.cahn_hilliard import CahnHilliardOperator
     chop = CahnHilliardOperator(
             # FIXME: Constants?
-            lambda1=1.5,
-            lambda2=1.25,
-            c=1)
+            lambda1=lambda1,
+            lambda2=lambda2,
+            c=c)
 
     unk = chop.make_unknown("sigma")
     bound_op = bind(qbx, chop.operator(unk))
 
+    yukawa_2d_in_3d_kernel_1 = get_extkernel_for_G1(lambda1)
+    shidong_2d_in_3d_kernel  = get_extkernel_for_G0(lambda1, lambda2)
+
+    from sumpy.qbx import LayerPotential
+    from sumpy.qbx import LineTaylorLocalExpansion
+    layer_pot_v0f1 = LayerPotential(ctx, [
+        LineTaylorLocalExpansion(shidong_2d_in_3d_kernel,
+            order=vol_qbx_order)])
+    layer_pot_v1f1 = LayerPotential(ctx, [
+        LineTaylorLocalExpansion(yukawa_2d_in_3d_kernel_1,
+            order=vol_qbx_order)])
+# }}}
+
+# {{{ volume integral
+
+    vol_x = vol_discr.nodes(),with_queue(queue)
+
+    # target points
+    targets = cl.array.zeros(queue, (3,) + vol_x.shape[1:], vol_x.dtype)
+    targets[:2] = vol_x
+
+    # expansion centers
+    center_dist = 0.125*np.min(
+            cl.clmath.sqrt(
+                bind(vol_discr,
+                    p.area_element(mesh.ambient_dim, mesh.dim))
+                (queue)).get())
+    centers = make_obj_array([ci.copy().reshape(vol_discr.nnodes) for ci in targets])
+    centers[2][:] = center_dist
+    print(center_dist)
+
+    # source points
+    # FIXME: use over sampled source points?
+    sources = cl.array.zeros(queue, (3,) + vol_x.shape[1:], vol_x.dtype)
+    sources[:2] = vol_x
+
+    # a manufactured f1
+    x_sin_factor = 30
+    y_sin_factor = 10
+    def f1_func(x, y)
+        return 0.1 * cl.clmath.sin(x_sin_factor*x) * cl.clmath.sin(y_sin_factor*y)
+
+    # strengths (with quadrature weights)
+    f1 = f1_func(vol_x[0], vol_x[1])
+    vol_weights = bind(vol_discr,
+            p.area_element(mesh.ambient_dim, mesh.dim) * p.QWeight()
+            )(queue)
+
+    print("volume: %d source nodes, %d target nodes" % (
+        vol_discr.nnodes, vol_discr.nnodes))
+
+    evt, (vol_pot_v0f1,) = layer_pot_v0f1(
+            queue,
+            targets=targets.reshape(3, vol_discr.nnodes),
+            centers=centers,
+            sources=sources.reshape(3, vol_discr.nnodes),
+            strengths=(
+                (vol_weights * f1).reshape(vol_discr.nnodes),)
+            )
+
+    evt, (vol_pot_v1f1,) = layer_pot_v1f1(
+            queue,
+            targets=targets.reshape(3, vol_discr.nnodes),
+            centers=centers,
+            sources=sources.reshape(3, vol_discr.nnodes),
+            strengths=(
+                (vol_weights * f1).reshape(vol_discr.nnodes),)
+            )
+
+# }}}
+
+
     # {{{ fix rhs and solve
 
     nodes = density_discr.nodes().with_queue(queue)
@@ -101,7 +370,7 @@ def main():
     indicator = bind(
             (indicator_qbx, PointsTarget(targets)),
             sym.D(LaplaceKernel(2), sym.var("sigma")))(
-            queue, sigma=ones_density).get()
+                    queue, sigma=ones_density).get()
 
     try:
         fld_in_vol = bind(
-- 
GitLab


From bf15b399975e72be0fdf28c82dc6bd07c34df95e Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Thu, 1 Jun 2017 22:05:25 -0400
Subject: [PATCH 03/30] Fix determination of lambda1/2 in Cahn-Hilliard

---
 pytential/symbolic/pde/cahn_hilliard.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pytential/symbolic/pde/cahn_hilliard.py b/pytential/symbolic/pde/cahn_hilliard.py
index 62faa4c8..459f1993 100644
--- a/pytential/symbolic/pde/cahn_hilliard.py
+++ b/pytential/symbolic/pde/cahn_hilliard.py
@@ -38,8 +38,14 @@ class CahnHilliardOperator(L2WeightedPDEOperator):
         self.b = b
         self.c = c
 
-        lam1 = -(np.sqrt(b**2-4*c)-b)/2
-        lam2 = (np.sqrt(b**2-4*c)+b)/2
+        lam1 = np.sqrt(-(np.sqrt(b**2-4*c)-b)/2)
+        lam2 = np.sqrt((np.sqrt(b**2-4*c)+b)/2)
+
+        def f(x):
+            return x**2 - b*x + c
+
+        assert np.abs(f(lam1**2)) < 1e-12
+        assert np.abs(f(lam2**2)) < 1e-12
 
         self.lambdas = sorted([lam1, lam2], key=abs, reverse=True)  # biggest first
 
-- 
GitLab


From 5b6af6d66fc607196573033c11ee295b40813755 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Thu, 1 Jun 2017 22:06:09 -0400
Subject: [PATCH 04/30] Cahn-Hilliard PDE test working

---
 examples/cahn-hilliard.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/examples/cahn-hilliard.py b/examples/cahn-hilliard.py
index 5b794f65..f0b869a6 100644
--- a/examples/cahn-hilliard.py
+++ b/examples/cahn-hilliard.py
@@ -85,7 +85,7 @@ def main():
 
     def check_pde():
         from sumpy.point_calculus import CalculusPatch
-        cp = CalculusPatch(np.zeros(2))
+        cp = CalculusPatch(np.zeros(2), order=4, h=0.1)
         targets = cl.array.to_device(queue, cp.points)
 
         u, v = bind(
@@ -95,9 +95,12 @@ def main():
         u = u.get().real
         v = v.get().real
 
+        lap_u = -(v - chop.b*u)
+
         print(la.norm(u), la.norm(v))
+
         print(la.norm(
-            cp.laplace(cp.laplace(u)) - chop.b * cp.laplace(u) + chop.c*u))
+            cp.laplace(lap_u) - chop.b * cp.laplace(u) + chop.c*u))
 
         print(la.norm(
             v + cp.laplace(u) - chop.b*u))
@@ -114,6 +117,7 @@ def main():
 
     targets = cl.array.to_device(queue, fplot.points)
 
+    qbx_stick_out = qbx.copy(target_stick_out_factor=0.05)
     indicator_qbx = qbx_stick_out.copy(qbx_order=2)
 
     from sumpy.kernel import LaplaceKernel
-- 
GitLab


From 1128669a22ef31f8a54c03a5641af3adbaddc28c Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Thu, 1 Jun 2017 23:08:35 -0400
Subject: [PATCH 05/30] minor fixes

---
 examples/cahn-hilliard.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/cahn-hilliard.py b/examples/cahn-hilliard.py
index 16f6091a..1495bddb 100644
--- a/examples/cahn-hilliard.py
+++ b/examples/cahn-hilliard.py
@@ -50,9 +50,9 @@ def main():
               n=(int(ext/h), int(ext/h))
               )
     elif shape == Geometry.Circle:
-        from meshmode.mesh.io import import generate_gmsh
+        from meshmode.mesh.io import generate_gmsh, FileSource
         h    = 0.05
-        mesh = generate_emsh(
+        mesh = generate_gmsh(
                 FileSource("circle.step"),
                 2,
                 order=mesh_order,
@@ -291,7 +291,7 @@ def main():
     # a manufactured f1
     x_sin_factor = 30
     y_sin_factor = 10
-    def f1_func(x, y)
+    def f1_func(x, y):
         return 0.1 * cl.clmath.sin(x_sin_factor*x) * cl.clmath.sin(y_sin_factor*y)
 
     # strengths (with quadrature weights)
-- 
GitLab


From d02d76fcdc3d2862cb41d7ec9082347605b7f539 Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Fri, 2 Jun 2017 10:17:02 -0400
Subject: [PATCH 06/30] improved root finding algorithm

---
 pytential/symbolic/pde/cahn_hilliard.py | 33 +++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/pytential/symbolic/pde/cahn_hilliard.py b/pytential/symbolic/pde/cahn_hilliard.py
index 459f1993..3bbf1bf0 100644
--- a/pytential/symbolic/pde/cahn_hilliard.py
+++ b/pytential/symbolic/pde/cahn_hilliard.py
@@ -38,12 +38,41 @@ class CahnHilliardOperator(L2WeightedPDEOperator):
         self.b = b
         self.c = c
 
-        lam1 = np.sqrt(-(np.sqrt(b**2-4*c)-b)/2)
-        lam2 = np.sqrt((np.sqrt(b**2-4*c)+b)/2)
+        # Issue:
+        # - let crat = np.abs(4.*c) / ( b**2 + 1e-12 )
+        # - when crat is close to zero, sqrt(b**2-4*c) is close to abs(b), then for b>=0,
+        #   sqrt(b**2-4*c) - b is inaccurate.
+        # - similarly, when crat is close to one, sqrt(b**2-4*c) is close to zero, then for b>0,
+        #   sqrt(b**2-4*c) + b is inaccurate.
+        # Solution:
+        # - find a criteria for crat to choose from formulae, or
+        # - use the computed root with smaller residual
+        def quadratic_formula_1(a, b, c):
+            return ( -b + np.sqrt(b**2 - 4*a*c) ) / (2*a)
+        def quadratic_formula_2(a, b, c):
+            return ( -b - np.sqrt(b**2 - 4*a*c) ) / (2*a)
+        def citardauq_formula_1(a, b, c):
+            return 2*c / (-b - np.sqrt(b**2-4*a*c))
+        def citardauq_formula_2(a, b, c):
+            return 2*c / (-b + np.sqrt(b**2-4*a*c))
 
         def f(x):
             return x**2 - b*x + c
 
+        root11 = quadratic_formula_1(1, -b, c)
+        root12 = citardauq_formula_1(1, -b, c)
+        if np.abs(f(root11)) < np.abs(f(root12)):
+            lam1 = np.sqrt(root11)
+        else:
+            lam1 = np.sqrt(root12)
+
+        root21 = quadratic_formula_2(1, -b, c)
+        root22 = citardauq_formula_2(1, -b, c)
+        if np.abs(f(root21)) < np.abs(f(root22)):
+            lam1 = np.sqrt(root21)
+        else:
+            lam2 = np.sqrt(root22)
+
         assert np.abs(f(lam1**2)) < 1e-12
         assert np.abs(f(lam2**2)) < 1e-12
 
-- 
GitLab


From 495deb6d75f292ae290561a31f0cef1af731082f Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Fri, 2 Jun 2017 10:17:23 -0400
Subject: [PATCH 07/30] more fixes

---
 examples/cahn-hilliard.py | 237 +++++++++++++++++++-------------------
 1 file changed, 119 insertions(+), 118 deletions(-)

diff --git a/examples/cahn-hilliard.py b/examples/cahn-hilliard.py
index 1495bddb..7c8e9ff8 100644
--- a/examples/cahn-hilliard.py
+++ b/examples/cahn-hilliard.py
@@ -9,6 +9,7 @@ from meshmode.discretization.poly_element import \
 
 from pytential import bind, sym, norm  # noqa
 from pytential.target import PointsTarget
+
 # {{{ set some constants for use below
 
 nelements = 20
@@ -23,10 +24,121 @@ vol_qbx_order  = 2
 
 # }}}
 
+# {{{ a kernel class for G0
+# FIXME: will the expressions work when lambda is complex?
+# (may need conversion from 1j to var("I"))
+from sumpy.kernel import ExpressionKernel
+class ShidongKernel(ExpressionKernel):
+    init_arg_names = ("dim", "lambda1", "lambda2")
+
+    def __init__(self, dim=None, lambda1=0., lambda2=1.):
+        """
+        :arg lambda1,lambda2: The roots of the quadratic equation w.r.t
+             laplacian.
+         """
+        # Assert against repeated roots.
+        if abs(lambda1**2 - lambda2**2) < 1e-9:
+            raise RuntimeError("illposed since input roots are too close")
+
+# Based on http://mathworld.wolfram.com/ModifiedBesselFunctionoftheSecondKind.html
+        if dim == 2:
+            r       = pymbolic_real_norm_2(make_sym_vector("d", dim))
+            expr    = var("hankel_1")(0, var("I") * lambda1 * r) - \
+                        var("hankel_1")(0, var("I") * lambda2 * r)
+            scaling =  1. / ( 4. * var("I") * (lambda1**2 - lambda2**2) )
+        else:
+            raise RuntimeError("unsupported dimensionality")
+
+        ExpressionKernel.__init__(
+                self,
+                dim,
+                expression=expr,
+                scaling = scaling,
+                is_complex_valued=True)
+
+        self.lambda1 = lambda1
+        self.lambda2 = lambda2
+
+    def __getinitargs__(self):
+        return(self._dim, self.lambda1, self.lambda2)
+
+    def update_persistent_hash(self, key_hash, key_builder):
+        key_hash.update(type(self).__name__.encode("utf8"))
+        key_builder.rec(key_hash, 
+                (self._dim, self.lambda1, self.lambda2)
+                )
+
+    def __repr__(self):
+        if self._dim is not None:
+            return "ShdgKnl%dD(%f, %f)" % (
+                    self._dim, self.lambda1, self.lambda2)
+        else:
+            return "ShdgKnl(%f, %f)" % (self.lambda1, self.lambda2)
+
+    def prepare_loopy_kernel(self, loopy_knl):
+        from sumpy.codegen import (bessel_preamble_generator, bessel_mangler)
+        loopy_knl = lp.register_function_manglers(loopy_knl,
+                [bessel_mangler])
+        loopy_knl = lp.register_preamble_generators(loopy_knl,
+                [bessel_preamble_generator])
+        return loopy_knl
+
+    def get_args(self):
+        k_dtype = np.complex128
+        return [
+                KernelArgument(
+                    loopy_arg=lp.ValueArg("shidong_kernel", k_dtype),
+                    )]
+
+    mapper_method = "map_shidong_kernel"
+
+# }}}
+
+# {{{ extended kernel getters
+def get_extkernel_for_G0(lambda1, lambda2):
+    from sumpy.symbolic import pymbolic_real_norm_2
+    from pymbolic.primitives import make_sym_vector
+    from pymbolic import var
+
+    d = make_sym_vector("d", 3)
+    r2 = pymbolic_real_norm_2(d[:-1])
+    expr = var("hankel_1")(0, var("I") * lambda1 * r2
+            + var("I") * d[-1]**2) \
+            - var("hankel_1")(0, var("I") * lambda2 * r2
+            + var("I") * d[-1]**2)
+    scaling =  1. / ( 4. * var("I") * (lambda1**2 - lambda2**2) )
+
+    from sumpy.kernel import ExpressionKernel
+    return ExpressionKernel(
+            dim=3,
+            expression=expr,
+            scaling=scaling,
+            is_complex_valued=True)
+
+def get_extkernel_for_G1(lamb):
+    from sumpy.symbolic import pymbolic_real_norm_2
+    from pymbolic.primitives import make_sym_vector
+    from pymbolic import var
+
+    d = make_sym_vector("d", 3)
+    r2 = pymbolic_real_norm_2(d[:-1])
+    expr = var("hankel_1")(0, var("I") * lamb * r2
+            + var("I") * d[-1]**2)
+    scaling = - var("I") / 4.
+
+    from sumpy.kernel import ExpressionKernel
+    return ExpressionKernel(
+            dim=3,
+            expression=expr,
+            scaling=scaling,
+            is_complex_valued=True)
+
+# }}}
 
 def main():
     import logging
     logging.basicConfig(level=logging.INFO)
+    logger = logging.getLogger(__name__)
 
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
@@ -57,11 +169,11 @@ def main():
                 2,
                 order=mesh_order,
                 force_ambient_dim=2,
-                orther_options=["-string",
+                other_options=["-string",
                                 "Mesh.CharacteristicLengthMax = %g;" % h]
                 )
     else:
-        1/0
+        RuntimeError("unsupported geometry")
 
     logger.info("%d elements" % mesh.nelements)
 
@@ -69,7 +181,7 @@ def main():
 
 # {{{ discretization and connections
 
-    vol_discr = Discretization(ctx, mesh,
+    vol_discr = Discretization(cl_ctx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(vol_quad_order))
 
     from meshmode.mesh import BTAG_ALL
@@ -118,117 +230,6 @@ def main():
 #            ).with_refinement()
 #    density_discr = qbx.density_discr
 
-# }}}
-
-# {{{ a kernel class for G0
-# FIXME: will the expressions work when lambda is complex?
-# (may need conversion from 1j to var("I"))
-    class ShidongKernel(ExpressionKernel):
-        init_arg_names = ("dim", "lambda1", "lambda2")
-
-        def __init__(self, dim=None, lambda1=0., lambda2=1.):
-            """
-            :arg lambda1,lambda2: The roots of the quadratic equation w.r.t
-                laplacian.
-            """
-            # Assert against repeated roots.
-            if abs(lambda1**2 - lambda2**2) < 1e-9:
-                raise RuntimeError("illposed since input roots are too close")
-
-# Based on http://mathworld.wolfram.com/ModifiedBesselFunctionoftheSecondKind.html
-            if dim == 2:
-                r       = pymbolic_real_norm_2(make_sym_vector("d", dim))
-                expr    = var("hankel_1")(0, var("I") * lambda1 * r) - \
-                          var("hankel_1")(0, var("I") * lambda2 * r)
-                scaling =  1. / ( 4. * var("I") * (lambda1**2 - lambda2**2) )
-            else:
-                raise RuntimeError("unsupported dimensionality")
-
-            ExpressionKernel.__init__(
-                    self,
-                    dim,
-                    expression=expr,
-                    scaling = scaling,
-                    is_complex_valued=True)
-
-            self.lambda1 = lambda1
-            self.lambda2 = lambda2
-
-        def __getinitargs__(self):
-            return(self._dim, self.lambda1, self.lambda2)
-
-        def update_persistent_hash(self, key_hash, key_builder):
-            key_hash.update(type(self).__name__.encode("utf8"))
-            key_builder.rec(key_hash, 
-                    (self._dim, self.lambda1, self.lambda2)
-                    )
-
-        def __repr__(self):
-            if self._dim is not None:
-                return "ShdgKnl%dD(%f, %f)" % (
-                        self._dim, self.lambda1, self.lambda2)
-            else:
-                return "ShdgKnl(%f, %f)" % (self.lambda1, self.lambda2)
-
-        def prepare_loopy_kernel(self, loopy_knl):
-            from sumpy.codegen import (bessel_preamble_generator, bessel_mangler)
-            loopy_knl = lp.register_function_manglers(loopy_knl,
-                    [bessel_mangler])
-            loopy_knl = lp.register_preamble_generators(loopy_knl,
-                    [bessel_preamble_generator])
-            return loopy_knl
-
-        def get_args(self):
-            k_dtype = np.complex128
-            return [
-                    KernelArgument(
-                        loopy_arg=lp.ValueArg("shidong_kernel", k_dtype),
-                        )]
-
-        mapper_method = "map_shidong_kernel"
-
-# }}}
-
-# {{{ extended kernel getters
-    def get_extkernel_for_G0(lambda1, lambda2):
-        from sumpy.symbolic import pymbolic_real_norm_2
-        from pymbolic.primitives import make_sym_vector
-        from pymbolic import var
-
-        d = make_sym_vector("d", 3)
-        r2 = pymbolic_real_norm_2(d[:-1])
-        expr = var("hankel_1")(0, var("I") * lambda1 * r2
-                + var("I") * d[-1]**2) \
-               - var("hankel_1")(0, var("I") * lambda2 * r2
-                + var("I") * d[-1]**2)
-        scaling =  1. / ( 4. * var("I") * (lambda1**2 - lambda2**2) )
-
-        from sumpy.kernel import ExpressionKernel
-        return ExpressionKernel(
-                dim=3,
-                expression=expr,
-                scaling=scaling,
-                is_complex_valued=True)
-
-    def get_extkernel_for_G1(lamb):
-        from sumpy.symbolic import pymbolic_real_norm_2
-        from pymbolic.primitives import make_sym_vector
-        from pymbolic import var
-
-        d = make_sym_vector("d", 3)
-        r2 = pymbolic_real_norm_2(d[:-1])
-        expr = var("hankel_1")(0, var("I") * lamb * r2
-                + var("I") * d[-1]**2)
-        scaling = - var("I") / 4.
-
-        from sumpy.kernel import ExpressionKernel
-        return ExpressionKernel(
-                dim=3,
-                expression=expr,
-                scaling=scaling,
-                is_complex_valued=True)
-
-
 # }}}
 
 # {{{ equation info
@@ -239,6 +240,7 @@ def main():
 
     b = s / (epsilon**2)
     c = 1. / (epsilon * delta_t)
+    print ( ["CH operator b = ", b, ", c = ", c] )
 
     sqdet = np.sqrt( b**2 - 4. * c )
     assert np.abs(sqdet) > 1e-6
@@ -247,7 +249,7 @@ def main():
     lambda2 = ( b - sqdet ) / 2.
 
     from pytential.symbolic.pde.cahn_hilliard import CahnHilliardOperator
-    chop = CahnHilliardOperator(b=5, c=1)
+    chop = CahnHilliardOperator(b=b, c=c)
 
     unk = chop.make_unknown("sigma")
     bound_op = bind(qbx, chop.operator(unk))
@@ -257,10 +259,10 @@ def main():
 
     from sumpy.qbx import LayerPotential
     from sumpy.qbx import LineTaylorLocalExpansion
-    layer_pot_v0f1 = LayerPotential(ctx, [
+    layer_pot_v0f1 = LayerPotential(cl_ctx, [
         LineTaylorLocalExpansion(shidong_2d_in_3d_kernel,
             order=vol_qbx_order)])
-    layer_pot_v1f1 = LayerPotential(ctx, [
+    layer_pot_v1f1 = LayerPotential(cl_ctx, [
         LineTaylorLocalExpansion(yukawa_2d_in_3d_kernel_1,
             order=vol_qbx_order)])
 # }}}
@@ -323,7 +325,6 @@ def main():
 
 # }}}
 
-
     # {{{ fix rhs and solve
 
     nodes = density_discr.nodes().with_queue(queue)
-- 
GitLab


From ccdbc506c8e28e540da4ab0d4204562e42ec0528 Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Fri, 2 Jun 2017 15:08:21 -0400
Subject: [PATCH 08/30] style fixes

---
 examples/cahn-hilliard.py               | 85 ++++++++++++++++---------
 pytential/symbolic/pde/cahn_hilliard.py | 15 +++--
 2 files changed, 64 insertions(+), 36 deletions(-)

diff --git a/examples/cahn-hilliard.py b/examples/cahn-hilliard.py
index 7c8e9ff8..6c531e1c 100644
--- a/examples/cahn-hilliard.py
+++ b/examples/cahn-hilliard.py
@@ -10,6 +10,12 @@ from meshmode.discretization.poly_element import \
 from pytential import bind, sym, norm  # noqa
 from pytential.target import PointsTarget
 
+from pytools.obj_array import make_obj_array
+import pytential.symbolic.primitives as p
+
+from sumpy.kernel import ExpressionKernel
+import loopy as lp
+
 # {{{ set some constants for use below
 
 nelements = 20
@@ -95,6 +101,17 @@ class ShidongKernel(ExpressionKernel):
 # }}}
 
 # {{{ extended kernel getters
+class HankelBasedKernel(ExpressionKernel):
+    def prepare_loopy_kernel(self, loopy_knl):
+        from sumpy.codegen import (bessel_preamble_generator, bessel_mangler)
+        loopy_knl = lp.register_function_manglers(loopy_knl,
+                [bessel_mangler])
+        loopy_knl = lp.register_preamble_generators(loopy_knl,
+                [bessel_preamble_generator])
+
+        return loopy_knl
+
+
 def get_extkernel_for_G0(lambda1, lambda2):
     from sumpy.symbolic import pymbolic_real_norm_2
     from pymbolic.primitives import make_sym_vector
@@ -108,8 +125,7 @@ def get_extkernel_for_G0(lambda1, lambda2):
             + var("I") * d[-1]**2)
     scaling =  1. / ( 4. * var("I") * (lambda1**2 - lambda2**2) )
 
-    from sumpy.kernel import ExpressionKernel
-    return ExpressionKernel(
+    return HankelBasedKernel(
             dim=3,
             expression=expr,
             scaling=scaling,
@@ -126,8 +142,7 @@ def get_extkernel_for_G1(lamb):
             + var("I") * d[-1]**2)
     scaling = - var("I") / 4.
 
-    from sumpy.kernel import ExpressionKernel
-    return ExpressionKernel(
+    return HankelBasedKernel(
             dim=3,
             expression=expr,
             scaling=scaling,
@@ -240,25 +255,22 @@ def main():
 
     b = s / (epsilon**2)
     c = 1. / (epsilon * delta_t)
-    print ( ["CH operator b = ", b, ", c = ", c] )
-
-    sqdet = np.sqrt( b**2 - 4. * c )
-    assert np.abs(sqdet) > 1e-6
-
-    lambda1 = ( b + sqdet ) / 2.
-    lambda2 = ( b - sqdet ) / 2.
 
+    print("-- setup Cahn-Hilliard operator")
     from pytential.symbolic.pde.cahn_hilliard import CahnHilliardOperator
-    chop = CahnHilliardOperator(b=b, c=c)
+    chop  = CahnHilliardOperator(b=b, c=c)
 
     unk = chop.make_unknown("sigma")
     bound_op = bind(qbx, chop.operator(unk))
 
-    yukawa_2d_in_3d_kernel_1 = get_extkernel_for_G1(lambda1)
-    shidong_2d_in_3d_kernel  = get_extkernel_for_G0(lambda1, lambda2)
+    print ("-- construct kernels")
+    yukawa_2d_in_3d_kernel_1 = get_extkernel_for_G1(chop.lambdas[0])
+    shidong_2d_in_3d_kernel  = get_extkernel_for_G0(chop.lambdas[0],
+            chop.lambdas[1])
 
+    print("-- construct layer potentials")
     from sumpy.qbx import LayerPotential
-    from sumpy.qbx import LineTaylorLocalExpansion
+    from sumpy.expansion.local import LineTaylorLocalExpansion
     layer_pot_v0f1 = LayerPotential(cl_ctx, [
         LineTaylorLocalExpansion(shidong_2d_in_3d_kernel,
             order=vol_qbx_order)])
@@ -269,7 +281,8 @@ def main():
 
 # {{{ volume integral
 
-    vol_x = vol_discr.nodes(),with_queue(queue)
+    print("-- perform volume integrals")
+    vol_x = vol_discr.nodes().with_queue(queue)
 
     # target points
     targets = cl.array.zeros(queue, (3,) + vol_x.shape[1:], vol_x.dtype)
@@ -354,26 +367,38 @@ def main():
 
     def check_pde():
         from sumpy.point_calculus import CalculusPatch
-        cp = CalculusPatch(np.zeros(2), order=4, h=0.1)
-        targets = cl.array.to_device(queue, cp.points)
+        vec_h  = [1e-1, 1e-2, 1e-3, 1e-4]
+        vec_ru = []
+        vec_rv = []
+        for dx in vec_h:
+            cp = CalculusPatch(np.zeros(2), order=4, h=dx)
+            targets = cl.array.to_device(queue, cp.points)
 
-        u, v = bind(
-                (qbx, PointsTarget(targets)),
-                chop.representation(unk))(queue, sigma=sigma)
+            u, v = bind(
+                    (qbx, PointsTarget(targets)),
+                    chop.representation(unk))(queue, sigma=sigma)
+
+            u = u.get().real
+            v = v.get().real
+
+            lap_u = -(v - chop.b*u)
 
-        u = u.get().real
-        v = v.get().real
+            vec_ru.append(la.norm(
+                cp.laplace(lap_u) - chop.b * cp.laplace(u) + chop.c*u))
+            vec_rv.append(la.norm(
+                v + cp.laplace(u) - chop.b*u))
 
-        lap_u = -(v - chop.b*u)
+            # print(la.norm(u), la.norm(v))
 
-        print(la.norm(u), la.norm(v))
+            #print(la.norm(
+            #    cp.laplace(lap_u) - chop.b * cp.laplace(u) + chop.c*u))
 
-        print(la.norm(
-            cp.laplace(lap_u) - chop.b * cp.laplace(u) + chop.c*u))
+            #print(la.norm(
+            #    v + cp.laplace(u) - chop.b*u))
 
-        print(la.norm(
-            v + cp.laplace(u) - chop.b*u))
-        1/0
+        from tabulate import tabulate
+        print(tabulate([vec_h, vec_ru, vec_rv],
+                headers=['h', 'resid_u', 'resid_v']))
 
     check_pde()
 
diff --git a/pytential/symbolic/pde/cahn_hilliard.py b/pytential/symbolic/pde/cahn_hilliard.py
index 3bbf1bf0..2ac6167c 100644
--- a/pytential/symbolic/pde/cahn_hilliard.py
+++ b/pytential/symbolic/pde/cahn_hilliard.py
@@ -40,19 +40,22 @@ class CahnHilliardOperator(L2WeightedPDEOperator):
 
         # Issue:
         # - let crat = np.abs(4.*c) / ( b**2 + 1e-12 )
-        # - when crat is close to zero, sqrt(b**2-4*c) is close to abs(b), then for b>=0,
-        #   sqrt(b**2-4*c) - b is inaccurate.
-        # - similarly, when crat is close to one, sqrt(b**2-4*c) is close to zero, then for b>0,
-        #   sqrt(b**2-4*c) + b is inaccurate.
+        # - when crat is close to zero, sqrt(b**2-4*c) is close to abs(b),
+        #   then for b>=0, sqrt(b**2-4*c) - b is inaccurate.
+        # - similarly, when crat is close to one, sqrt(b**2-4*c) is close to zero,
+        #   then for b>0, sqrt(b**2-4*c) + b is inaccurate.
         # Solution:
         # - find a criteria for crat to choose from formulae, or
         # - use the computed root with smaller residual
         def quadratic_formula_1(a, b, c):
-            return ( -b + np.sqrt(b**2 - 4*a*c) ) / (2*a)
+            return (-b + np.sqrt(b**2 - 4*a*c)) / (2*a)
+
         def quadratic_formula_2(a, b, c):
-            return ( -b - np.sqrt(b**2 - 4*a*c) ) / (2*a)
+            return (-b - np.sqrt(b**2 - 4*a*c)) / (2*a)
+
         def citardauq_formula_1(a, b, c):
             return 2*c / (-b - np.sqrt(b**2-4*a*c))
+
         def citardauq_formula_2(a, b, c):
             return 2*c / (-b + np.sqrt(b**2-4*a*c))
 
-- 
GitLab


From dc48b69231cfc8e29008aec70b11151d5f6fa66b Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Fri, 2 Jun 2017 15:59:49 -0400
Subject: [PATCH 09/30] pass check_pde

---
 examples/cahn-hilliard.py    | 20 ++++++++++++++++++--
 examples/check_pde.txt.cache |  5 +++++
 2 files changed, 23 insertions(+), 2 deletions(-)
 create mode 100644 examples/check_pde.txt.cache

diff --git a/examples/cahn-hilliard.py b/examples/cahn-hilliard.py
index 6c531e1c..6dfbcf71 100644
--- a/examples/cahn-hilliard.py
+++ b/examples/cahn-hilliard.py
@@ -304,6 +304,7 @@ def main():
     sources[:2] = vol_x
 
     # a manufactured f1
+    # FIXME: use f1 from the previous solution
     x_sin_factor = 30
     y_sin_factor = 10
     def f1_func(x, y):
@@ -397,8 +398,13 @@ def main():
             #    v + cp.laplace(u) - chop.b*u))
 
         from tabulate import tabulate
-        print(tabulate([vec_h, vec_ru, vec_rv],
-                headers=['h', 'resid_u', 'resid_v']))
+        # overwrite if file exists
+        # using .cache simply because it is in .gitignore
+        with open('check_pde.txt.cache', 'w') as f:
+            print(tabulate([["h"] + vec_h,
+                ["residual_u"] + vec_ru,
+                ["residual_v"] +, vec_rv]
+                ), file=f)
 
     check_pde()
 
@@ -422,6 +428,16 @@ def main():
             sym.D(LaplaceKernel(2), sym.var("sigma")))(
                     queue, sigma=ones_density).get()
 
+    # clean up the mess
+    def clean_file(filename):
+        import os
+        try:
+            os.remove(filename)
+        except OSError:
+            pass
+    clean_file("failed-targets.vts")
+    clean_file("potential.vts")
+
     try:
         u, v = bind(
                 (qbx_stick_out, PointsTarget(targets)),
diff --git a/examples/check_pde.txt.cache b/examples/check_pde.txt.cache
new file mode 100644
index 00000000..9ae95c22
--- /dev/null
+++ b/examples/check_pde.txt.cache
@@ -0,0 +1,5 @@
+----------  -----------  -----------  -----------  -----------
+h           0.1          0.01         0.001        0.0001
+residual_u  0.264407     0.000264403  4.94253e-07  3.97963e-05
+residual_v  1.76263e-05  1.76271e-08  3.29687e-11  2.55261e-09
+----------  -----------  -----------  -----------  -----------
-- 
GitLab


From effc070b300f32aa5e1a7f93b27afb1c8f9b0c39 Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Fri, 2 Jun 2017 16:00:49 -0400
Subject: [PATCH 10/30] deleted redundant file

---
 examples/check_pde.txt.cache | 5 -----
 1 file changed, 5 deletions(-)
 delete mode 100644 examples/check_pde.txt.cache

diff --git a/examples/check_pde.txt.cache b/examples/check_pde.txt.cache
deleted file mode 100644
index 9ae95c22..00000000
--- a/examples/check_pde.txt.cache
+++ /dev/null
@@ -1,5 +0,0 @@
-----------  -----------  -----------  -----------  -----------
-h           0.1          0.01         0.001        0.0001
-residual_u  0.264407     0.000264403  4.94253e-07  3.97963e-05
-residual_v  1.76263e-05  1.76271e-08  3.29687e-11  2.55261e-09
-----------  -----------  -----------  -----------  -----------
-- 
GitLab


From 34e411d14beb9f85c941797e6c1861bf36d44001 Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Fri, 2 Jun 2017 16:03:58 -0400
Subject: [PATCH 11/30] output ascii to .dat file

---
 .gitignore                | 1 +
 examples/cahn-hilliard.py | 5 ++---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3d5b8042..43a3d720 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,6 +20,7 @@ examples/*.pdf
 
 *.vtu
 *.vts
+*.dat
 
 .cache
 
diff --git a/examples/cahn-hilliard.py b/examples/cahn-hilliard.py
index 6dfbcf71..f628f743 100644
--- a/examples/cahn-hilliard.py
+++ b/examples/cahn-hilliard.py
@@ -399,11 +399,10 @@ def main():
 
         from tabulate import tabulate
         # overwrite if file exists
-        # using .cache simply because it is in .gitignore
-        with open('check_pde.txt.cache', 'w') as f:
+        with open('check_pde.dat', 'w') as f:
             print(tabulate([["h"] + vec_h,
                 ["residual_u"] + vec_ru,
-                ["residual_v"] +, vec_rv]
+                ["residual_v"] + vec_rv]
                 ), file=f)
 
     check_pde()
-- 
GitLab


From 1094a8e59b91c4eebe9d83f41d79310958916148 Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Sat, 3 Jun 2017 12:25:02 -0400
Subject: [PATCH 12/30] still struggling with dtype

---
 examples/cahn-hilliard.py | 213 +++++++++++++++++++++++++++++---------
 1 file changed, 164 insertions(+), 49 deletions(-)

diff --git a/examples/cahn-hilliard.py b/examples/cahn-hilliard.py
index f628f743..7373acf4 100644
--- a/examples/cahn-hilliard.py
+++ b/examples/cahn-hilliard.py
@@ -18,7 +18,7 @@ import loopy as lp
 
 # {{{ set some constants for use below
 
-nelements = 20
+# {{{ all kinds of orders
 bdry_quad_order = 4
 mesh_order = bdry_quad_order
 qbx_order = bdry_quad_order
@@ -27,6 +27,57 @@ fmm_order = 8
 
 vol_quad_order = 5
 vol_qbx_order  = 2
+# }}}
+# {{{ mesh generation
+nelements = 20
+
+from enum import Enum
+class Geometry(Enum):
+    RegularRectangle = 1
+    Circle           = 2
+
+shape = Geometry.Circle
+# }}}
+# {{{ physical parameters
+s = 1.5
+epsilon = 0.01
+delta_t = 0.05
+final_t = delta_t * 1
+theta_y = 60. / 180. * np.pi
+
+b = s / (epsilon**2)
+c = 1. / (epsilon * delta_t)
+# }}}
+# {{{ initial phi
+
+# This phi function is also used to do PDE check
+import pymbolic as pmbl
+x = pmbl.var("x")
+y = pmbl.var("y")
+
+# FIXME: modify pymbolic to use tanh function
+# phi = tanh(x / sqrt (2 * epsilon))
+phi = x**2 + x*y + 2*y**2
+phi3 = phi**3
+laphi  = pmbl.differentiate(pmbl.differentiate(phi, 'x'), 'x') + \
+         pmbl.differentiate(pmbl.differentiate(phi, 'y'), 'y')
+laphi3 = pmbl.differentiate(pmbl.differentiate(phi3, 'x'), 'x') + \
+         pmbl.differentiate(pmbl.differentiate(phi3, 'y'), 'y')
+f1_expr = c * phi - (1+s) / epsilon**2 * laphi + 1 / epsilon**2  * laphi3
+f2_expr = (phi3 - (1+s) * phi) / epsilon**2
+
+def f1_func(x,y):
+    return pmbl.evaluate(f1_expr, {"x": x, "y": y})
+
+def f2_func(x,y):
+    return pmbl.evaluate(f2_expr, {"x": x, "y": y})
+
+def initial_phi(x,y):
+    return pmbl.evaluate(phi, {"x": x, "y": y})
+
+#def initial_phi(x, y):
+#   return np.tanh(x / np.sqrt(2. * initial_epsilon))
+# }}}
 
 # }}}
 
@@ -160,13 +211,6 @@ def main():
 
 # {{{ volume mesh generation
 
-    from enum import Enum
-    class Geometry(Enum):
-        RegularRectangle = 1
-        Circle           = 2
-
-    shape = Geometry.Circle
-
     if shape == Geometry.RegularRectangle:
       from meshmode.mesh.generation import generate_regular_rect_mesh
       ext = 1.
@@ -247,28 +291,21 @@ def main():
 
 # }}}
 
-# {{{ equation info
+# {{{ setup operator and potentials
 
-    s = 1.5
-    epsilon = 0.01
-    delta_t = 0.05
-
-    b = s / (epsilon**2)
-    c = 1. / (epsilon * delta_t)
-
-    print("-- setup Cahn-Hilliard operator")
+    #print("-- setup Cahn-Hilliard operator")
     from pytential.symbolic.pde.cahn_hilliard import CahnHilliardOperator
     chop  = CahnHilliardOperator(b=b, c=c)
 
     unk = chop.make_unknown("sigma")
     bound_op = bind(qbx, chop.operator(unk))
 
-    print ("-- construct kernels")
+    #print ("-- construct kernels")
     yukawa_2d_in_3d_kernel_1 = get_extkernel_for_G1(chop.lambdas[0])
     shidong_2d_in_3d_kernel  = get_extkernel_for_G0(chop.lambdas[0],
             chop.lambdas[1])
 
-    print("-- construct layer potentials")
+    #print("-- construct layer potentials")
     from sumpy.qbx import LayerPotential
     from sumpy.expansion.local import LineTaylorLocalExpansion
     layer_pot_v0f1 = LayerPotential(cl_ctx, [
@@ -279,9 +316,8 @@ def main():
             order=vol_qbx_order)])
 # }}}
 
-# {{{ volume integral
+# {{{ setup for volume integral
 
-    print("-- perform volume integrals")
     vol_x = vol_discr.nodes().with_queue(queue)
 
     # target points
@@ -299,19 +335,10 @@ def main():
     print(center_dist)
 
     # source points
-    # FIXME: use over sampled source points?
+    # TODO: use over sampled source points?
     sources = cl.array.zeros(queue, (3,) + vol_x.shape[1:], vol_x.dtype)
     sources[:2] = vol_x
 
-    # a manufactured f1
-    # FIXME: use f1 from the previous solution
-    x_sin_factor = 30
-    y_sin_factor = 10
-    def f1_func(x, y):
-        return 0.1 * cl.clmath.sin(x_sin_factor*x) * cl.clmath.sin(y_sin_factor*y)
-
-    # strengths (with quadrature weights)
-    f1 = f1_func(vol_x[0], vol_x[1])
     vol_weights = bind(vol_discr,
             p.area_element(mesh.ambient_dim, mesh.dim) * p.QWeight()
             )(queue)
@@ -319,26 +346,53 @@ def main():
     print("volume: %d source nodes, %d target nodes" % (
         vol_discr.nnodes, vol_discr.nnodes))
 
-    evt, (vol_pot_v0f1,) = layer_pot_v0f1(
-            queue,
-            targets=targets.reshape(3, vol_discr.nnodes),
-            centers=centers,
-            sources=sources.reshape(3, vol_discr.nnodes),
-            strengths=(
-                (vol_weights * f1).reshape(vol_discr.nnodes),)
-            )
 
-    evt, (vol_pot_v1f1,) = layer_pot_v1f1(
-            queue,
-            targets=targets.reshape(3, vol_discr.nnodes),
-            centers=centers,
-            sources=sources.reshape(3, vol_discr.nnodes),
-            strengths=(
-                (vol_weights * f1).reshape(vol_discr.nnodes),)
-            )
+# }}}
 
+# {{{ prepare for time stepping
+    timestep_number = 0
+    time            = 0
+    def get_vts_filename(tmstp_num):
+        return "solution-" + '{0:03}'.format(tmstp_num) + ".vts"
+    output_vts_filename = get_vts_filename(timestep_number)
 # }}}
 
+# {{{ [[TIME STEPPING]]
+    while time < final_t:
+        timestep_number += 1
+        time += delta_t
+        output_vts_filename = get_vts_filename(timestep_number)
+
+        # a manufactured f1 function
+        #x_sin_factor = 30
+        #y_sin_factor = 10
+        #def f1_func(x, y):
+        #    return 0.1 * cl.clmath.sin(x_sin_factor*x) \
+        #            * cl.clmath.sin(y_sin_factor*y)
+
+        # get f1 to compute strengths
+        f1 = f1_func(vol_x[0], vol_x[1])
+        f2 = f2_func(vol_x[0], vol_x[1])
+
+        evt, (vol_pot_v0f1,) = layer_pot_v0f1(
+                queue,
+                targets=targets.reshape(3, vol_discr.nnodes),
+                centers=centers,
+                sources=sources.reshape(3, vol_discr.nnodes),
+                strengths=(
+                    (vol_weights * f1).reshape(vol_discr.nnodes),)
+                )
+
+        evt, (vol_pot_v1f1,) = layer_pot_v1f1(
+                queue,
+                targets=targets.reshape(3, vol_discr.nnodes),
+                centers=centers,
+                sources=sources.reshape(3, vol_discr.nnodes),
+                strengths=(
+                    (vol_weights * f1).reshape(vol_discr.nnodes),)
+                )
+
+
     # {{{ fix rhs and solve
 
     nodes = density_discr.nodes().with_queue(queue)
@@ -371,6 +425,8 @@ def main():
         vec_h  = [1e-1, 1e-2, 1e-3, 1e-4]
         vec_ru = []
         vec_rv = []
+        vec_rp = []
+        vec_rm = []
         for dx in vec_h:
             cp = CalculusPatch(np.zeros(2), order=4, h=dx)
             targets = cl.array.to_device(queue, cp.points)
@@ -384,11 +440,65 @@ def main():
 
             lap_u = -(v - chop.b*u)
 
+            # Check for homogeneous PDEs for u and v
             vec_ru.append(la.norm(
                 cp.laplace(lap_u) - chop.b * cp.laplace(u) + chop.c*u))
             vec_rv.append(la.norm(
                 v + cp.laplace(u) - chop.b*u))
 
+            # Check for inhomogeneous PDEs for phi and mu
+
+            targets_in_3d = cl.array.zeros(
+                    queue,
+                    (3,) + cp.points.shape[1:],
+                    vol_x.dtype)
+            targets_in_3d[:2] = cp.points
+
+            #center_dist = 0.125*np.min(
+            #        cl.clmath.sqrt(
+            #            bind(vol_discr,
+            #                p.area_element(mesh.ambient_dim, mesh.dim))
+            #            (queue)).get())
+
+            centers_in_3d = make_obj_array(
+                    [ci.copy()
+                        for ci in targets_in_3d])
+            centers_in_3d[2][:] = center_dist
+
+            evt, (v0f1,) = layer_pot_v0f1(
+                    queue,
+                    targets=targets_in_3d,
+                    centers=centers_in_3d,
+                    sources=sources.reshape(3, vol_discr.nnodes),
+                    strengths=(
+                        (vol_weights * f1).reshape(vol_discr.nnodes),)
+                    )
+
+            evt, (v1f1,) = layer_pot_v1f1(
+                    queue,
+                    targets=targets_in_3d,
+                    centers=centers_in_3d,
+                    sources=sources.reshape(3, vol_discr.nnodes),
+                    strengths=(
+                        (vol_weights * f1).reshape(vol_discr.nnodes),)
+                    )
+
+            f14ck = f1_func(cp.points[0], cp.points[1])
+            f24ck = f2_func(cp.points[0], cp.points[1])
+            utild = v0f1
+            vtild = f24ck - v1f1 + lambda1**2 * v0f1
+
+            ph = utild + u
+            mu = epsilon * (vtild + v)
+            lap_ph = f24ck + chop.b * ph - mu / epsilon
+
+            vec_rp.append(la.norm(
+                cp.laplace(lap_ph) - chop.b * cp.laplace(ph) + chop.c*ph
+                - f14ck))
+            vec_rm.append(la.norm(
+                mu / epsilon + cp.laplace(ph) - chop.b*ph
+                - f24ck))
+
             # print(la.norm(u), la.norm(v))
 
             #print(la.norm(
@@ -402,7 +512,10 @@ def main():
         with open('check_pde.dat', 'w') as f:
             print(tabulate([["h"] + vec_h,
                 ["residual_u"] + vec_ru,
-                ["residual_v"] + vec_rv]
+                ["residual_v"] + vec_rv,
+                ["residual_phi"] + vec_rp,
+                ["residual_mu"] + vec_rm,
+                ]
                 ), file=f)
 
     check_pde()
@@ -412,7 +525,7 @@ def main():
     # {{{ postprocess/visualize
 
     from sumpy.visualization import FieldPlotter
-    fplot = FieldPlotter(np.zeros(2), extent=5, npoints=500)
+    fplot = FieldPlotter(np.zeros(2), extent=1.5, npoints=500)
 
     targets = cl.array.to_device(queue, fplot.points)
 
@@ -464,6 +577,8 @@ def main():
 
     # }}}
 
+# }}}
+
 
 if __name__ == "__main__":
     main()
-- 
GitLab


From adaa8e65888ff264f1de55df48440eb5ad33f37b Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Sun, 4 Jun 2017 16:23:35 -0400
Subject: [PATCH 13/30] debugging volume integral

---
 examples/cahn-hilliard.py | 487 ++++++++++++++++++++------------------
 1 file changed, 256 insertions(+), 231 deletions(-)

diff --git a/examples/cahn-hilliard.py b/examples/cahn-hilliard.py
index 7373acf4..4cbd1680 100644
--- a/examples/cahn-hilliard.py
+++ b/examples/cahn-hilliard.py
@@ -22,19 +22,22 @@ import loopy as lp
 bdry_quad_order = 4
 mesh_order = bdry_quad_order
 qbx_order = bdry_quad_order
-bdry_ovsmp_quad_order = 4*bdry_quad_order
+bdry_ovsmp_quad_order = 4 * bdry_quad_order
 fmm_order = 8
 
 vol_quad_order = 5
-vol_qbx_order  = 2
+vol_qbx_order = 2
 # }}}
 # {{{ mesh generation
 nelements = 20
 
 from enum import Enum
+
+
 class Geometry(Enum):
     RegularRectangle = 1
-    Circle           = 2
+    Circle = 2
+
 
 shape = Geometry.Circle
 # }}}
@@ -57,24 +60,28 @@ y = pmbl.var("y")
 
 # FIXME: modify pymbolic to use tanh function
 # phi = tanh(x / sqrt (2 * epsilon))
-phi = x**2 + x*y + 2*y**2
+phi = x**2 + x * y + 2 * y**2
 phi3 = phi**3
 laphi  = pmbl.differentiate(pmbl.differentiate(phi, 'x'), 'x') + \
          pmbl.differentiate(pmbl.differentiate(phi, 'y'), 'y')
 laphi3 = pmbl.differentiate(pmbl.differentiate(phi3, 'x'), 'x') + \
          pmbl.differentiate(pmbl.differentiate(phi3, 'y'), 'y')
-f1_expr = c * phi - (1+s) / epsilon**2 * laphi + 1 / epsilon**2  * laphi3
-f2_expr = (phi3 - (1+s) * phi) / epsilon**2
+f1_expr = c * phi - (1 + s) / epsilon**2 * laphi + 1 / epsilon**2 * laphi3
+f2_expr = (phi3 - (1 + s) * phi) / epsilon**2
+
 
-def f1_func(x,y):
+def f1_func(x, y):
     return pmbl.evaluate(f1_expr, {"x": x, "y": y})
 
-def f2_func(x,y):
+
+def f2_func(x, y):
     return pmbl.evaluate(f2_expr, {"x": x, "y": y})
 
-def initial_phi(x,y):
+
+def initial_phi(x, y):
     return pmbl.evaluate(phi, {"x": x, "y": y})
 
+
 #def initial_phi(x, y):
 #   return np.tanh(x / np.sqrt(2. * initial_epsilon))
 # }}}
@@ -85,6 +92,8 @@ def initial_phi(x,y):
 # FIXME: will the expressions work when lambda is complex?
 # (may need conversion from 1j to var("I"))
 from sumpy.kernel import ExpressionKernel
+
+
 class ShidongKernel(ExpressionKernel):
     init_arg_names = ("dim", "lambda1", "lambda2")
 
@@ -99,66 +108,64 @@ class ShidongKernel(ExpressionKernel):
 
 # Based on http://mathworld.wolfram.com/ModifiedBesselFunctionoftheSecondKind.html
         if dim == 2:
-            r       = pymbolic_real_norm_2(make_sym_vector("d", dim))
+            r = pymbolic_real_norm_2(make_sym_vector("d", dim))
             expr    = var("hankel_1")(0, var("I") * lambda1 * r) - \
                         var("hankel_1")(0, var("I") * lambda2 * r)
-            scaling =  1. / ( 4. * var("I") * (lambda1**2 - lambda2**2) )
+            scaling = 1. / (4. * var("I") * (lambda1**2 - lambda2**2))
         else:
             raise RuntimeError("unsupported dimensionality")
 
         ExpressionKernel.__init__(
-                self,
-                dim,
-                expression=expr,
-                scaling = scaling,
-                is_complex_valued=True)
+            self,
+            dim,
+            expression=expr,
+            scaling=scaling,
+            is_complex_valued=True)
 
         self.lambda1 = lambda1
         self.lambda2 = lambda2
 
     def __getinitargs__(self):
-        return(self._dim, self.lambda1, self.lambda2)
+        return (self._dim, self.lambda1, self.lambda2)
 
     def update_persistent_hash(self, key_hash, key_builder):
         key_hash.update(type(self).__name__.encode("utf8"))
-        key_builder.rec(key_hash, 
-                (self._dim, self.lambda1, self.lambda2)
-                )
+        key_builder.rec(key_hash, (self._dim, self.lambda1, self.lambda2))
 
     def __repr__(self):
         if self._dim is not None:
-            return "ShdgKnl%dD(%f, %f)" % (
-                    self._dim, self.lambda1, self.lambda2)
+            return "ShdgKnl%dD(%f, %f)" % (self._dim, self.lambda1,
+                                           self.lambda2)
         else:
             return "ShdgKnl(%f, %f)" % (self.lambda1, self.lambda2)
 
     def prepare_loopy_kernel(self, loopy_knl):
         from sumpy.codegen import (bessel_preamble_generator, bessel_mangler)
-        loopy_knl = lp.register_function_manglers(loopy_knl,
-                [bessel_mangler])
-        loopy_knl = lp.register_preamble_generators(loopy_knl,
-                [bessel_preamble_generator])
+        loopy_knl = lp.register_function_manglers(loopy_knl, [bessel_mangler])
+        loopy_knl = lp.register_preamble_generators(
+            loopy_knl, [bessel_preamble_generator])
         return loopy_knl
 
     def get_args(self):
         k_dtype = np.complex128
         return [
-                KernelArgument(
-                    loopy_arg=lp.ValueArg("shidong_kernel", k_dtype),
-                    )]
+            KernelArgument(
+                loopy_arg=lp.ValueArg("shidong_kernel", k_dtype), )
+        ]
 
     mapper_method = "map_shidong_kernel"
 
+
 # }}}
 
+
 # {{{ extended kernel getters
 class HankelBasedKernel(ExpressionKernel):
     def prepare_loopy_kernel(self, loopy_knl):
         from sumpy.codegen import (bessel_preamble_generator, bessel_mangler)
-        loopy_knl = lp.register_function_manglers(loopy_knl,
-                [bessel_mangler])
-        loopy_knl = lp.register_preamble_generators(loopy_knl,
-                [bessel_preamble_generator])
+        loopy_knl = lp.register_function_manglers(loopy_knl, [bessel_mangler])
+        loopy_knl = lp.register_preamble_generators(
+            loopy_knl, [bessel_preamble_generator])
 
         return loopy_knl
 
@@ -174,13 +181,11 @@ def get_extkernel_for_G0(lambda1, lambda2):
             + var("I") * d[-1]**2) \
             - var("hankel_1")(0, var("I") * lambda2 * r2
             + var("I") * d[-1]**2)
-    scaling =  1. / ( 4. * var("I") * (lambda1**2 - lambda2**2) )
+    scaling = 1. / (4. * var("I") * (lambda1**2 - lambda2**2))
 
     return HankelBasedKernel(
-            dim=3,
-            expression=expr,
-            scaling=scaling,
-            is_complex_valued=True)
+        dim=3, expression=expr, scaling=scaling, is_complex_valued=True)
+
 
 def get_extkernel_for_G1(lamb):
     from sumpy.symbolic import pymbolic_real_norm_2
@@ -189,18 +194,16 @@ def get_extkernel_for_G1(lamb):
 
     d = make_sym_vector("d", 3)
     r2 = pymbolic_real_norm_2(d[:-1])
-    expr = var("hankel_1")(0, var("I") * lamb * r2
-            + var("I") * d[-1]**2)
-    scaling = - var("I") / 4.
+    expr = var("hankel_1")(0, var("I") * lamb * r2 + var("I") * d[-1]**2)
+    scaling = -var("I") / 4.
 
     return HankelBasedKernel(
-            dim=3,
-            expression=expr,
-            scaling=scaling,
-            is_complex_valued=True)
+        dim=3, expression=expr, scaling=scaling, is_complex_valued=True)
+
 
 # }}}
 
+
 def main():
     import logging
     logging.basicConfig(level=logging.INFO)
@@ -209,57 +212,56 @@ def main():
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
 
-# {{{ volume mesh generation
+    # {{{ volume mesh generation
 
     if shape == Geometry.RegularRectangle:
-      from meshmode.mesh.generation import generate_regular_rect_mesh
-      ext = 1.
-      h   = 0.05
-      mesh = generate_regular_rect_mesh(
-              a=(-ext/2., -ext/2.),
-              b=( ext/2.,  ext/2.),
-              n=(int(ext/h), int(ext/h))
-              )
+        from meshmode.mesh.generation import generate_regular_rect_mesh
+        ext = 1.
+        h = 0.05
+        mesh = generate_regular_rect_mesh(
+            a=(-ext / 2., -ext / 2.),
+            b=(ext / 2., ext / 2.),
+            n=(int(ext / h), int(ext / h)))
     elif shape == Geometry.Circle:
         from meshmode.mesh.io import generate_gmsh, FileSource
-        h    = 0.05
+        h = 0.05
         mesh = generate_gmsh(
-                FileSource("circle.step"),
-                2,
-                order=mesh_order,
-                force_ambient_dim=2,
-                other_options=["-string",
-                                "Mesh.CharacteristicLengthMax = %g;" % h]
-                )
+            FileSource("circle.step"),
+            2,
+            order=mesh_order,
+            force_ambient_dim=2,
+            other_options=[
+                "-string", "Mesh.CharacteristicLengthMax = %g;" % h
+            ])
     else:
         RuntimeError("unsupported geometry")
 
     logger.info("%d elements" % mesh.nelements)
 
-# }}}
+    # }}}
 
-# {{{ discretization and connections
+    # {{{ discretization and connections
 
-    vol_discr = Discretization(cl_ctx, mesh,
-            InterpolatoryQuadratureSimplexGroupFactory(vol_quad_order))
+    vol_discr = Discretization(
+        cl_ctx, mesh,
+        InterpolatoryQuadratureSimplexGroupFactory(vol_quad_order))
 
     from meshmode.mesh import BTAG_ALL
     from meshmode.discretization.connection import make_face_restriction
     pre_density_connection = make_face_restriction(
-            vol_discr,
-            InterpolatoryQuadratureSimplexGroupFactory(bdry_quad_order),
-            BTAG_ALL)
+        vol_discr,
+        InterpolatoryQuadratureSimplexGroupFactory(bdry_quad_order), BTAG_ALL)
     pre_density_discr = pre_density_connection.to_discr
 
-    from pytential.qbx import (
-            QBXLayerPotentialSource, QBXTargetAssociationFailedException)
+    from pytential.qbx import (QBXLayerPotentialSource,
+                               QBXTargetAssociationFailedException)
 
     qbx, _ = QBXLayerPotentialSource(
-            pre_density_discr,
-            fine_order=bdry_ovsmp_quad_order, qbx_order=qbx_order,
-            fmm_order=fmm_order,
-            expansion_disks_in_tree_have_extent=True,
-            ).with_refinement()
+        pre_density_discr,
+        fine_order=bdry_ovsmp_quad_order,
+        qbx_order=qbx_order,
+        fmm_order=fmm_order,
+        expansion_disks_in_tree_have_extent=True, ).with_refinement()
 
     density_discr = qbx.density_discr
 
@@ -267,97 +269,98 @@ def main():
     # vol_discr --> pre_density_discr --> density_discr
     # via ChainedDiscretizationConnection
 
+    #    from meshmode.mesh.generation import ellipse, make_curve_mesh
+    #    from functools import partial
+    #
+    #    mesh = make_curve_mesh(
+    #                partial(ellipse, 2),
+    #                np.linspace(0, 1, nelements+1),
+    #                mesh_order)
+    #
+    #    pre_density_discr = Discretization(
+    #            cl_ctx, mesh,
+    #            InterpolatoryQuadratureSimplexGroupFactory(bdry_quad_order))
+    #
+    #    from pytential.qbx import (
+    #            QBXLayerPotentialSource, QBXTargetAssociationFailedException)
+    #    qbx, _ = QBXLayerPotentialSource(
+    #            pre_density_discr, fine_order=bdry_ovsmp_quad_order, qbx_order=qbx_order,
+    #            fmm_order=fmm_order,
+    #            expansion_disks_in_tree_have_extent=True,
+    #            ).with_refinement()
+    #    density_discr = qbx.density_discr
 
-#    from meshmode.mesh.generation import ellipse, make_curve_mesh
-#    from functools import partial
-#
-#    mesh = make_curve_mesh(
-#                partial(ellipse, 2),
-#                np.linspace(0, 1, nelements+1),
-#                mesh_order)
-#
-#    pre_density_discr = Discretization(
-#            cl_ctx, mesh,
-#            InterpolatoryQuadratureSimplexGroupFactory(bdry_quad_order))
-#
-#    from pytential.qbx import (
-#            QBXLayerPotentialSource, QBXTargetAssociationFailedException)
-#    qbx, _ = QBXLayerPotentialSource(
-#            pre_density_discr, fine_order=bdry_ovsmp_quad_order, qbx_order=qbx_order,
-#            fmm_order=fmm_order,
-#            expansion_disks_in_tree_have_extent=True,
-#            ).with_refinement()
-#    density_discr = qbx.density_discr
-
-# }}}
+    # }}}
 
-# {{{ setup operator and potentials
+    # {{{ setup operator and potentials
 
     #print("-- setup Cahn-Hilliard operator")
     from pytential.symbolic.pde.cahn_hilliard import CahnHilliardOperator
-    chop  = CahnHilliardOperator(b=b, c=c)
+    chop = CahnHilliardOperator(b=b, c=c)
 
     unk = chop.make_unknown("sigma")
     bound_op = bind(qbx, chop.operator(unk))
 
     #print ("-- construct kernels")
     yukawa_2d_in_3d_kernel_1 = get_extkernel_for_G1(chop.lambdas[0])
-    shidong_2d_in_3d_kernel  = get_extkernel_for_G0(chop.lambdas[0],
-            chop.lambdas[1])
+    shidong_2d_in_3d_kernel = get_extkernel_for_G0(chop.lambdas[0],
+                                                   chop.lambdas[1])
 
     #print("-- construct layer potentials")
     from sumpy.qbx import LayerPotential
     from sumpy.expansion.local import LineTaylorLocalExpansion
     layer_pot_v0f1 = LayerPotential(cl_ctx, [
-        LineTaylorLocalExpansion(shidong_2d_in_3d_kernel,
-            order=vol_qbx_order)])
+        LineTaylorLocalExpansion(shidong_2d_in_3d_kernel, order=vol_qbx_order)
+    ])
     layer_pot_v1f1 = LayerPotential(cl_ctx, [
-        LineTaylorLocalExpansion(yukawa_2d_in_3d_kernel_1,
-            order=vol_qbx_order)])
-# }}}
+        LineTaylorLocalExpansion(
+            yukawa_2d_in_3d_kernel_1, order=vol_qbx_order)
+    ])
+    # }}}
 
-# {{{ setup for volume integral
+    # {{{ setup for volume integral
 
     vol_x = vol_discr.nodes().with_queue(queue)
 
     # target points
-    targets = cl.array.zeros(queue, (3,) + vol_x.shape[1:], vol_x.dtype)
+    targets = cl.array.zeros(queue, (3, ) + vol_x.shape[1:], vol_x.dtype)
     targets[:2] = vol_x
 
     # expansion centers
-    center_dist = 0.125*np.min(
-            cl.clmath.sqrt(
-                bind(vol_discr,
-                    p.area_element(mesh.ambient_dim, mesh.dim))
-                (queue)).get())
-    centers = make_obj_array([ci.copy().reshape(vol_discr.nnodes) for ci in targets])
+    center_dist = 0.125 * np.min(
+        cl.clmath.sqrt(
+            bind(vol_discr, p.area_element(mesh.ambient_dim, mesh.dim))(queue))
+        .get())
+    centers = make_obj_array(
+        [ci.copy().reshape(vol_discr.nnodes) for ci in targets])
     centers[2][:] = center_dist
     print(center_dist)
 
     # source points
     # TODO: use over sampled source points?
-    sources = cl.array.zeros(queue, (3,) + vol_x.shape[1:], vol_x.dtype)
+    sources = cl.array.zeros(queue, (3, ) + vol_x.shape[1:], vol_x.dtype)
     sources[:2] = vol_x
 
-    vol_weights = bind(vol_discr,
-            p.area_element(mesh.ambient_dim, mesh.dim) * p.QWeight()
-            )(queue)
+    vol_weights = bind(
+        vol_discr,
+        p.area_element(mesh.ambient_dim, mesh.dim) * p.QWeight())(queue)
 
-    print("volume: %d source nodes, %d target nodes" % (
-        vol_discr.nnodes, vol_discr.nnodes))
+    print("volume: %d source nodes, %d target nodes" % (vol_discr.nnodes,
+                                                        vol_discr.nnodes))
 
+    # }}}
 
-# }}}
-
-# {{{ prepare for time stepping
+    # {{{ prepare for time stepping
     timestep_number = 0
-    time            = 0
+    time = 0
+
     def get_vts_filename(tmstp_num):
         return "solution-" + '{0:03}'.format(tmstp_num) + ".vts"
+
     output_vts_filename = get_vts_filename(timestep_number)
-# }}}
+    # }}}
 
-# {{{ [[TIME STEPPING]]
+    # {{{ [[TIME STEPPING]]
     while time < final_t:
         timestep_number += 1
         time += delta_t
@@ -374,24 +377,19 @@ def main():
         f1 = f1_func(vol_x[0], vol_x[1])
         f2 = f2_func(vol_x[0], vol_x[1])
 
-        evt, (vol_pot_v0f1,) = layer_pot_v0f1(
-                queue,
-                targets=targets.reshape(3, vol_discr.nnodes),
-                centers=centers,
-                sources=sources.reshape(3, vol_discr.nnodes),
-                strengths=(
-                    (vol_weights * f1).reshape(vol_discr.nnodes),)
-                )
-
-        evt, (vol_pot_v1f1,) = layer_pot_v1f1(
-                queue,
-                targets=targets.reshape(3, vol_discr.nnodes),
-                centers=centers,
-                sources=sources.reshape(3, vol_discr.nnodes),
-                strengths=(
-                    (vol_weights * f1).reshape(vol_discr.nnodes),)
-                )
+        evt, (vol_pot_v0f1, ) = layer_pot_v0f1(
+            queue,
+            targets=targets.reshape(3, vol_discr.nnodes),
+            centers=centers,
+            sources=sources.reshape(3, vol_discr.nnodes),
+            strengths=((vol_weights * f1).reshape(vol_discr.nnodes), ))
 
+        evt, (vol_pot_v1f1, ) = layer_pot_v1f1(
+            queue,
+            targets=targets.reshape(3, vol_discr.nnodes),
+            centers=centers,
+            sources=sources.reshape(3, vol_discr.nnodes),
+            strengths=((vol_weights * f1).reshape(vol_discr.nnodes), ))
 
     # {{{ fix rhs and solve
 
@@ -399,20 +397,22 @@ def main():
 
     def g(xvec):
         x, y = xvec
-        return cl.clmath.cos(5*cl.clmath.atan2(y, x))
+        return cl.clmath.cos(5 * cl.clmath.atan2(y, x))
 
     bc = sym.make_obj_array([
         # FIXME: Realistic BC
-        5+g(nodes),
-        3-g(nodes),
-        ])
+        5 + g(nodes),
+        3 - g(nodes),
+    ])
 
     from pytential.solve import gmres
     gmres_result = gmres(
-            bound_op.scipy_op(queue, "sigma", dtype=np.complex128),
-            bc, tol=1e-8, progress=True,
-            stall_iterations=0,
-            hard_failure=True)
+        bound_op.scipy_op(queue, "sigma", dtype=np.complex128),
+        bc,
+        tol=1e-8,
+        progress=True,
+        stall_iterations=0,
+        hard_failure=True)
 
     sigma = gmres_result.solution
 
@@ -422,36 +422,38 @@ def main():
 
     def check_pde():
         from sumpy.point_calculus import CalculusPatch
-        vec_h  = [1e-1, 1e-2, 1e-3, 1e-4]
+        vec_h = [1e-1, 1e-2, 1e-3, 1e-4]
         vec_ru = []
         vec_rv = []
         vec_rp = []
         vec_rm = []
+        vec_rf1 = []
+        vec_rf2 = []
+        vec_rutld = []
+        vec_rvtld = []
         for dx in vec_h:
             cp = CalculusPatch(np.zeros(2), order=4, h=dx)
             targets = cl.array.to_device(queue, cp.points)
 
-            u, v = bind(
-                    (qbx, PointsTarget(targets)),
-                    chop.representation(unk))(queue, sigma=sigma)
+            u, v = bind((qbx, PointsTarget(targets)),
+                        chop.representation(unk))(
+                            queue, sigma=sigma)
 
             u = u.get().real
             v = v.get().real
 
-            lap_u = -(v - chop.b*u)
+            lap_u = -(v - chop.b * u)
 
             # Check for homogeneous PDEs for u and v
-            vec_ru.append(la.norm(
-                cp.laplace(lap_u) - chop.b * cp.laplace(u) + chop.c*u))
-            vec_rv.append(la.norm(
-                v + cp.laplace(u) - chop.b*u))
+            vec_ru.append(
+                la.norm(
+                    cp.laplace(lap_u) - chop.b * cp.laplace(u) + chop.c * u))
+            vec_rv.append(la.norm(v + cp.laplace(u) - chop.b * u))
 
             # Check for inhomogeneous PDEs for phi and mu
 
-            targets_in_3d = cl.array.zeros(
-                    queue,
-                    (3,) + cp.points.shape[1:],
-                    vol_x.dtype)
+            targets_in_3d = cl.array.zeros(queue, (3, ) + cp.points.shape[1:],
+                                           vol_x.dtype)
             targets_in_3d[:2] = cp.points
 
             #center_dist = 0.125*np.min(
@@ -460,63 +462,93 @@ def main():
             #                p.area_element(mesh.ambient_dim, mesh.dim))
             #            (queue)).get())
 
-            centers_in_3d = make_obj_array(
-                    [ci.copy()
-                        for ci in targets_in_3d])
+            centers_in_3d = make_obj_array([ci.copy() for ci in targets_in_3d])
             centers_in_3d[2][:] = center_dist
 
-            evt, (v0f1,) = layer_pot_v0f1(
-                    queue,
-                    targets=targets_in_3d,
-                    centers=centers_in_3d,
-                    sources=sources.reshape(3, vol_discr.nnodes),
-                    strengths=(
-                        (vol_weights * f1).reshape(vol_discr.nnodes),)
-                    )
-
-            evt, (v1f1,) = layer_pot_v1f1(
-                    queue,
-                    targets=targets_in_3d,
-                    centers=centers_in_3d,
-                    sources=sources.reshape(3, vol_discr.nnodes),
-                    strengths=(
-                        (vol_weights * f1).reshape(vol_discr.nnodes),)
-                    )
-
-            f14ck = f1_func(cp.points[0], cp.points[1])
-            f24ck = f2_func(cp.points[0], cp.points[1])
+            evt, (v0f1, ) = layer_pot_v0f1(
+                queue,
+                targets=targets_in_3d,
+                centers=centers_in_3d,
+                sources=sources.reshape(3, vol_discr.nnodes),
+                strengths=((vol_weights * f1).reshape(vol_discr.nnodes), ))
+            v0f1 = v0f1.get().real
+
+            evt, (v1f1, ) = layer_pot_v1f1(
+                queue,
+                targets=targets_in_3d,
+                centers=centers_in_3d,
+                sources=sources.reshape(3, vol_discr.nnodes),
+                strengths=((vol_weights * f1).reshape(vol_discr.nnodes), ))
+            v1f1 = v1f1.get().real
+
+            f14ck = cl.array.to_device(queue,
+                                       f1_func(cp.points[0], cp.points[1]))
+            f24ck = cl.array.to_device(queue,
+                                       f2_func(cp.points[0], cp.points[1]))
+            phi_init = cl.array.to_device(queue,
+                                          initial_phi(cp.points[0],
+                                                      cp.points[1]))
+
+            f14ck = f14ck.get().real
+            f24ck = f24ck.get().real
+            phi_init = phi_init.get().real
+            vec_rf1.append(
+                la.norm(chop.c * phi_init -
+                        (1 + s) / epsilon**2 * cp.laplace(phi_init) +
+                        1. / epsilon**2 * cp.laplace(phi_init**3) - f14ck))
+            vec_rf2.append(
+                la.norm((phi_init**3 - (1 + s) * phi_init) / epsilon**2 -
+                        f24ck))
+
             utild = v0f1
-            vtild = f24ck - v1f1 + lambda1**2 * v0f1
+            vtild = f24ck - v1f1 + chop.lambdas[0]**2 * v0f1
+            lap_utild = -vtild + chop.b * utild + f24ck
+            # FIXME: Not passing this check. (bugs in volume integral?)
+            vec_rutld.append(
+                la.norm(
+                    cp.laplace(lap_utild) - chop.b * lap_utild +
+                    chop.c * utild - f14ck))
+            print(f14ck)
+            print(f1[0:10])
+            print(cp.laplace(lap_utild))
+            print((lap_utild))
+            print((utild))
+            print(chop.b)
+            print(chop.c)
+            vec_rvtld.append(
+                la.norm(vtild + cp.laplace(utild) - chop.b * utild - f24ck))
 
             ph = utild + u
             mu = epsilon * (vtild + v)
             lap_ph = f24ck + chop.b * ph - mu / epsilon
 
-            vec_rp.append(la.norm(
-                cp.laplace(lap_ph) - chop.b * cp.laplace(ph) + chop.c*ph
-                - f14ck))
-            vec_rm.append(la.norm(
-                mu / epsilon + cp.laplace(ph) - chop.b*ph
-                - f24ck))
-
-            # print(la.norm(u), la.norm(v))
-
-            #print(la.norm(
-            #    cp.laplace(lap_u) - chop.b * cp.laplace(u) + chop.c*u))
-
-            #print(la.norm(
-            #    v + cp.laplace(u) - chop.b*u))
+            vec_rp.append(
+                la.norm(
+                    cp.laplace(lap_ph) - chop.b * cp.laplace(ph) + chop.c * ph
+                    - f14ck))
+            vec_rm.append(
+                la.norm(mu / epsilon + cp.laplace(ph) - chop.b * ph - f24ck))
 
         from tabulate import tabulate
         # overwrite if file exists
         with open('check_pde.dat', 'w') as f:
-            print(tabulate([["h"] + vec_h,
-                ["residual_u"] + vec_ru,
-                ["residual_v"] + vec_rv,
-                ["residual_phi"] + vec_rp,
-                ["residual_mu"] + vec_rm,
-                ]
-                ), file=f)
+            print(
+                "Residuals of PDE and numerical vs. symbolic differentiation:",
+                file=f)
+            print(
+                tabulate([
+                    ["h"] + vec_h,
+                    ["residual_u"] + vec_ru,
+                    ["residual_v"] + vec_rv,
+                    ["residual_f1"] + vec_rf1,
+                    ["residual_f2"] + vec_rf2,
+                    ["residual_u_tld"] + vec_rutld,
+                    ["residual_v_tld"] + vec_rvtld,
+                    ["residual_phi"] + vec_rp,
+                    ["residual_mu"] + vec_rm,
+                ]),
+                file=f)
+        1 / 0
 
     check_pde()
 
@@ -535,10 +567,9 @@ def main():
     from sumpy.kernel import LaplaceKernel
     ones_density = density_discr.zeros(queue)
     ones_density.fill(1)
-    indicator = bind(
-            (indicator_qbx, PointsTarget(targets)),
-            sym.D(LaplaceKernel(2), sym.var("sigma")))(
-                    queue, sigma=ones_density).get()
+    indicator = bind((indicator_qbx, PointsTarget(targets)),
+                     sym.D(LaplaceKernel(2), sym.var("sigma")))(
+                         queue, sigma=ones_density).get()
 
     # clean up the mess
     def clean_file(filename):
@@ -547,33 +578,27 @@ def main():
             os.remove(filename)
         except OSError:
             pass
+
     clean_file("failed-targets.vts")
     clean_file("potential.vts")
 
     try:
-        u, v = bind(
-                (qbx_stick_out, PointsTarget(targets)),
-                chop.representation(unk))(queue, sigma=sigma)
+        u, v = bind((qbx_stick_out, PointsTarget(targets)),
+                    chop.representation(unk))(
+                        queue, sigma=sigma)
     except QBXTargetAssociationFailedException as e:
-        fplot.write_vtk_file(
-                "failed-targets.vts",
-                [
-                    ("failed", e.failed_target_flags.get(queue))
-                    ]
-                )
+        fplot.write_vtk_file("failed-targets.vts",
+                             [("failed", e.failed_target_flags.get(queue))])
         raise
     u = u.get().real
     v = v.get().real
 
     #fplot.show_scalar_in_mayavi(fld_in_vol.real, max_val=5)
-    fplot.write_vtk_file(
-            "potential.vts",
-            [
-                ("u", u),
-                ("v", v),
-                ("indicator", indicator),
-                ]
-            )
+    fplot.write_vtk_file("potential.vts", [
+        ("u", u),
+        ("v", v),
+        ("indicator", indicator),
+    ])
 
     # }}}
 
-- 
GitLab


From 0a249aa5825d705e79a2a0ec8a6da5f2f2485622 Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Fri, 9 Jun 2017 15:42:58 -0400
Subject: [PATCH 14/30] current state for merge

---
 examples/cahn-hilliard.py | 258 ++++++++++++++++++++++++--------------
 1 file changed, 167 insertions(+), 91 deletions(-)

diff --git a/examples/cahn-hilliard.py b/examples/cahn-hilliard.py
index 4cbd1680..2308ce8e 100644
--- a/examples/cahn-hilliard.py
+++ b/examples/cahn-hilliard.py
@@ -16,6 +16,15 @@ import pytential.symbolic.primitives as p
 from sumpy.kernel import ExpressionKernel
 import loopy as lp
 
+
+# clean up the mess
+def clean_file(filename):
+    import os
+    try:
+        os.remove(filename)
+    except OSError:
+        pass
+
 # {{{ set some constants for use below
 
 # {{{ all kinds of orders
@@ -58,7 +67,9 @@ import pymbolic as pmbl
 x = pmbl.var("x")
 y = pmbl.var("y")
 
-# FIXME: modify pymbolic to use tanh function
+# b, c are known constants
+
+# TODO: modify pymbolic to use tanh function
 # phi = tanh(x / sqrt (2 * epsilon))
 phi = x**2 + x * y + 2 * y**2
 phi3 = phi**3
@@ -69,27 +80,35 @@ laphi3 = pmbl.differentiate(pmbl.differentiate(phi3, 'x'), 'x') + \
 f1_expr = c * phi - (1 + s) / epsilon**2 * laphi + 1 / epsilon**2 * laphi3
 f2_expr = (phi3 - (1 + s) * phi) / epsilon**2
 
-
 def f1_func(x, y):
     return pmbl.evaluate(f1_expr, {"x": x, "y": y})
 
-
 def f2_func(x, y):
     return pmbl.evaluate(f2_expr, {"x": x, "y": y})
 
-
 def initial_phi(x, y):
     return pmbl.evaluate(phi, {"x": x, "y": y})
+# }}}
+# {{{ manufactured rhs such that the solution is phi given above
+
+lalaphi = pmbl.differentiate(pmbl.differentiate(laphi, 'x'), 'x') + \
+        pmbl.differentiate(pmbl.differentiate(laphi, 'y'), 'y')
+mu      = - epsilon * laphi - phi / epsilon + phi3 / epsilon
+
+rhs1_expr = lalaphi - b * laphi + c * phi
+rhs2_expr = mu / epsilon + laphi - b * phi
 
+def rhs1_func(x, y):
+    return pmbl.evaluate(rhs1_expr, {"x": x, "y": y})
 
-#def initial_phi(x, y):
-#   return np.tanh(x / np.sqrt(2. * initial_epsilon))
+def rhs2_func(x, y):
+    return pmbl.evaluate(rhs2_expr, {"x": x, "y": y})
 # }}}
 
 # }}}
 
 # {{{ a kernel class for G0
-# FIXME: will the expressions work when lambda is complex?
+# TODO: will the expressions work when lambda is complex?
 # (may need conversion from 1j to var("I"))
 from sumpy.kernel import ExpressionKernel
 
@@ -158,7 +177,6 @@ class ShidongKernel(ExpressionKernel):
 
 # }}}
 
-
 # {{{ extended kernel getters
 class HankelBasedKernel(ExpressionKernel):
     def prepare_loopy_kernel(self, loopy_knl):
@@ -176,11 +194,14 @@ def get_extkernel_for_G0(lambda1, lambda2):
     from pymbolic import var
 
     d = make_sym_vector("d", 3)
-    r2 = pymbolic_real_norm_2(d[:-1])
-    expr = var("hankel_1")(0, var("I") * lambda1 * r2
-            + var("I") * d[-1]**2) \
-            - var("hankel_1")(0, var("I") * lambda2 * r2
-            + var("I") * d[-1]**2)
+    # r2 = pymbolic_real_norm_2(d[:-1])
+    r3 = pymbolic_real_norm_2(d)
+    #expr = var("hankel_1")(0, var("I") * lambda1 * r2
+    #        + var("I") * d[-1]**2) \
+    #        - var("hankel_1")(0, var("I") * lambda2 * r2
+    #        + var("I") * d[-1]**2)
+    expr = var("hankel_1")(0, var("I") * lambda1 * r3) \
+            - var("hankel_1")(0, var("I") * lambda2 * r3)
     scaling = 1. / (4. * var("I") * (lambda1**2 - lambda2**2))
 
     return HankelBasedKernel(
@@ -193,8 +214,10 @@ def get_extkernel_for_G1(lamb):
     from pymbolic import var
 
     d = make_sym_vector("d", 3)
-    r2 = pymbolic_real_norm_2(d[:-1])
-    expr = var("hankel_1")(0, var("I") * lamb * r2 + var("I") * d[-1]**2)
+    #r2 = pymbolic_real_norm_2(d[:-1])
+    r3 = pymbolic_real_norm_2(d)
+    #expr = var("hankel_1")(0, var("I") * lamb * r2 + var("I") * d[-1]**2)
+    expr = var("hankel_1")(0, var("I") * lamb * r3)
     scaling = -var("I") / 4.
 
     return HankelBasedKernel(
@@ -292,30 +315,39 @@ def main():
 
     # }}}
 
+    # {{{ visualizers
+
+    from meshmode.discretization.visualization import make_visualizer
+    vol_vis = make_visualizer(queue, vol_discr, 20)
+    bdry_vis = make_visualizer(queue, density_discr, 20)
+
+    # }}}
+
     # {{{ setup operator and potentials
 
-    #print("-- setup Cahn-Hilliard operator")
     from pytential.symbolic.pde.cahn_hilliard import CahnHilliardOperator
     chop = CahnHilliardOperator(b=b, c=c)
 
     unk = chop.make_unknown("sigma")
     bound_op = bind(qbx, chop.operator(unk))
 
-    #print ("-- construct kernels")
     yukawa_2d_in_3d_kernel_1 = get_extkernel_for_G1(chop.lambdas[0])
     shidong_2d_in_3d_kernel = get_extkernel_for_G0(chop.lambdas[0],
                                                    chop.lambdas[1])
 
-    #print("-- construct layer potentials")
     from sumpy.qbx import LayerPotential
     from sumpy.expansion.local import LineTaylorLocalExpansion
+
     layer_pot_v0f1 = LayerPotential(cl_ctx, [
         LineTaylorLocalExpansion(shidong_2d_in_3d_kernel, order=vol_qbx_order)
     ])
     layer_pot_v1f1 = LayerPotential(cl_ctx, [
         LineTaylorLocalExpansion(
             yukawa_2d_in_3d_kernel_1, order=vol_qbx_order)
-    ])
+        ])
+
+    #print(layer_pot_v0f1.get_kernel())
+    # print(layer_pot_v1f1.get_kernel())
     # }}}
 
     # {{{ setup for volume integral
@@ -341,6 +373,9 @@ def main():
     sources = cl.array.zeros(queue, (3, ) + vol_x.shape[1:], vol_x.dtype)
     sources[:2] = vol_x
 
+    #np.set_printoptions(threshold=np.inf)
+    #print(sources)
+
     vol_weights = bind(
         vol_discr,
         p.area_element(mesh.ambient_dim, mesh.dim) * p.QWeight())(queue)
@@ -366,16 +401,9 @@ def main():
         time += delta_t
         output_vts_filename = get_vts_filename(timestep_number)
 
-        # a manufactured f1 function
-        #x_sin_factor = 30
-        #y_sin_factor = 10
-        #def f1_func(x, y):
-        #    return 0.1 * cl.clmath.sin(x_sin_factor*x) \
-        #            * cl.clmath.sin(y_sin_factor*y)
-
         # get f1 to compute strengths
-        f1 = f1_func(vol_x[0], vol_x[1])
-        f2 = f2_func(vol_x[0], vol_x[1])
+        f1 = rhs1_func(vol_x[0], vol_x[1])
+        f2 = rhs2_func(vol_x[0], vol_x[1])
 
         evt, (vol_pot_v0f1, ) = layer_pot_v0f1(
             queue,
@@ -391,6 +419,34 @@ def main():
             sources=sources.reshape(3, vol_discr.nnodes),
             strengths=((vol_weights * f1).reshape(vol_discr.nnodes), ))
 
+        plot_volume_potentials = True
+        if plot_volume_potentials:
+
+            evt, (vol_pot_v0id, ) = layer_pot_v0f1(
+                queue,
+                targets=targets.reshape(3, vol_discr.nnodes),
+                centers=centers,
+                sources=sources.reshape(3, vol_discr.nnodes),
+                strengths=((vol_weights * 1).reshape(vol_discr.nnodes), ))
+
+            evt, (vol_pot_v1id, ) = layer_pot_v1f1(
+                queue,
+                targets=targets.reshape(3, vol_discr.nnodes),
+                centers=centers,
+                sources=sources.reshape(3, vol_discr.nnodes),
+                strengths=((vol_weights * 1).reshape(vol_discr.nnodes), ))
+
+            #print(vol_pot_v1f1.get().real[0:20])
+            #print(vol_pot_v1f1.get().real[0:20])
+            #print(vol_pot_v0id.get().real[0:20])
+            #print(vol_pot_v1id.get().real[0:20])
+
+            clean_file("ch-volume.vtu")
+            vol_vis.write_vtk_file("ch-volume.vtu", [
+                ("vol_pot_v0f1", vol_pot_v0f1), ("vol_pot_v1f1", vol_pot_v1f1),
+                ("vol_pot_v0id", vol_pot_v0id), ("vol_pot_v1id", vol_pot_v1id)
+            ])
+
     # {{{ fix rhs and solve
 
     nodes = density_discr.nodes().with_queue(queue)
@@ -400,7 +456,7 @@ def main():
         return cl.clmath.cos(5 * cl.clmath.atan2(y, x))
 
     bc = sym.make_obj_array([
-        # FIXME: Realistic BC
+        # TODO: Realistic BC
         5 + g(nodes),
         3 - g(nodes),
     ])
@@ -418,11 +474,56 @@ def main():
 
     # }}}
 
+    # }}}
+
+    # {{{ postprocess/visualize
+
+    immersed_visualization = False
+    if immersed_visualization == True:
+        from sumpy.visualization import FieldPlotter
+        fplot = FieldPlotter(np.zeros(2), extent=1.5, npoints=500)
+
+        targets = cl.array.to_device(queue, fplot.points)
+
+        qbx_stick_out = qbx.copy(target_stick_out_factor=0.05)
+        indicator_qbx = qbx_stick_out.copy(qbx_order=2)
+
+        from sumpy.kernel import LaplaceKernel
+        ones_density = density_discr.zeros(queue)
+        ones_density.fill(1)
+        indicator = bind((indicator_qbx, PointsTarget(targets)),
+                         sym.D(LaplaceKernel(2), sym.var("sigma")))(
+                             queue, sigma=ones_density).get()
+
+        clean_file("failed-targets.vts")
+        clean_file("potential.vts")
+
+        try:
+            u, v = bind((qbx_stick_out, PointsTarget(targets)),
+                        chop.representation(unk))(
+                            queue, sigma=sigma)
+        except QBXTargetAssociationFailedException as e:
+            fplot.write_vtk_file("failed-targets.vts",
+                                 [("failed", e.failed_target_flags.get(queue))])
+            raise
+        u = u.get().real
+        v = v.get().real
+
+        #fplot.show_scalar_in_mayavi(fld_in_vol.real, max_val=5)
+        fplot.write_vtk_file("potential.vts", [
+            ("u", u),
+            ("v", v),
+            ("indicator", indicator),
+        ])
+
+    # }}}
+
     # {{{ check pde
 
     def check_pde():
         from sumpy.point_calculus import CalculusPatch
-        vec_h = [1e-1, 1e-2, 1e-3, 1e-4]
+        vec_h = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7]
+        #vec_h = [1e-1]
         vec_ru = []
         vec_rv = []
         vec_rp = []
@@ -431,6 +532,8 @@ def main():
         vec_rf2 = []
         vec_rutld = []
         vec_rvtld = []
+        vec_rv1id = []  # V1[id]
+        vec_rv0id = []  # V0[id]
         for dx in vec_h:
             cp = CalculusPatch(np.zeros(2), order=4, h=dx)
             targets = cl.array.to_device(queue, cp.points)
@@ -447,7 +550,8 @@ def main():
             # Check for homogeneous PDEs for u and v
             vec_ru.append(
                 la.norm(
-                    cp.laplace(lap_u) - chop.b * cp.laplace(u) + chop.c * u))
+                    cp.laplace(lap_u) - chop.b * cp.laplace(u) +
+                    chop.c * u))
             vec_rv.append(la.norm(v + cp.laplace(u) - chop.b * u))
 
             # Check for inhomogeneous PDEs for phi and mu
@@ -472,6 +576,16 @@ def main():
                 sources=sources.reshape(3, vol_discr.nnodes),
                 strengths=((vol_weights * f1).reshape(vol_discr.nnodes), ))
             v0f1 = v0f1.get().real
+            print(v0f1)
+
+            evt, (v0id, ) = layer_pot_v0f1(
+                queue,
+                targets=targets_in_3d,
+                centers=centers_in_3d,
+                sources=sources.reshape(3, vol_discr.nnodes),
+                strengths=((vol_weights * 1).reshape(vol_discr.nnodes), ))
+            v0id = v0id.get().real
+            print(v0id)
 
             evt, (v1f1, ) = layer_pot_v1f1(
                 queue,
@@ -480,6 +594,23 @@ def main():
                 sources=sources.reshape(3, vol_discr.nnodes),
                 strengths=((vol_weights * f1).reshape(vol_discr.nnodes), ))
             v1f1 = v1f1.get().real
+            print(v1f1)
+
+            evt, (v1id, ) = layer_pot_v1f1(
+                queue,
+                targets=targets_in_3d,
+                centers=centers_in_3d,
+                sources=sources.reshape(3, vol_discr.nnodes),
+                strengths=((vol_weights * 1).reshape(vol_discr.nnodes), ))
+            v1id = v1id.get().real
+            print(v1id)
+
+            # FIXME: Why this is equal to 0 (instead of 1)
+            vec_rv1id.append(
+                la.norm(cp.laplace(v1id) - chop.lambdas[1]**2 * v1id
+                    - 1))
+            vec_rv0id.append(
+                la.norm(cp.laplace(v0id) - chop.lambdas[1]**2 * v0id - v1id))
 
             f14ck = cl.array.to_device(queue,
                                        f1_func(cp.points[0], cp.points[1]))
@@ -506,15 +637,8 @@ def main():
             # FIXME: Not passing this check. (bugs in volume integral?)
             vec_rutld.append(
                 la.norm(
-                    cp.laplace(lap_utild) - chop.b * lap_utild +
-                    chop.c * utild - f14ck))
-            print(f14ck)
-            print(f1[0:10])
-            print(cp.laplace(lap_utild))
-            print((lap_utild))
-            print((utild))
-            print(chop.b)
-            print(chop.c)
+                    cp.laplace(lap_utild) - chop.b * lap_utild + chop.c * utild
+                    - f14ck))
             vec_rvtld.append(
                 la.norm(vtild + cp.laplace(utild) - chop.b * utild - f24ck))
 
@@ -542,6 +666,8 @@ def main():
                     ["residual_v"] + vec_rv,
                     ["residual_f1"] + vec_rf1,
                     ["residual_f2"] + vec_rf2,
+                    ["residual_v1id"] + vec_rv1id,
+                    ["residual_v0id"] + vec_rv0id,
                     ["residual_u_tld"] + vec_rutld,
                     ["residual_v_tld"] + vec_rvtld,
                     ["residual_phi"] + vec_rp,
@@ -550,60 +676,10 @@ def main():
                 file=f)
         1 / 0
 
-    check_pde()
-
-    # }}}
-
-    # {{{ postprocess/visualize
-
-    from sumpy.visualization import FieldPlotter
-    fplot = FieldPlotter(np.zeros(2), extent=1.5, npoints=500)
-
-    targets = cl.array.to_device(queue, fplot.points)
-
-    qbx_stick_out = qbx.copy(target_stick_out_factor=0.05)
-    indicator_qbx = qbx_stick_out.copy(qbx_order=2)
-
-    from sumpy.kernel import LaplaceKernel
-    ones_density = density_discr.zeros(queue)
-    ones_density.fill(1)
-    indicator = bind((indicator_qbx, PointsTarget(targets)),
-                     sym.D(LaplaceKernel(2), sym.var("sigma")))(
-                         queue, sigma=ones_density).get()
-
-    # clean up the mess
-    def clean_file(filename):
-        import os
-        try:
-            os.remove(filename)
-        except OSError:
-            pass
-
-    clean_file("failed-targets.vts")
-    clean_file("potential.vts")
-
-    try:
-        u, v = bind((qbx_stick_out, PointsTarget(targets)),
-                    chop.representation(unk))(
-                        queue, sigma=sigma)
-    except QBXTargetAssociationFailedException as e:
-        fplot.write_vtk_file("failed-targets.vts",
-                             [("failed", e.failed_target_flags.get(queue))])
-        raise
-    u = u.get().real
-    v = v.get().real
-
-    #fplot.show_scalar_in_mayavi(fld_in_vol.real, max_val=5)
-    fplot.write_vtk_file("potential.vts", [
-        ("u", u),
-        ("v", v),
-        ("indicator", indicator),
-    ])
+    # check_pde()
 
     # }}}
 
-# }}}
-
 
 if __name__ == "__main__":
     main()
-- 
GitLab


From ffec81cca246aa4f0214c0699080f95dba83dc09 Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Mon, 5 Mar 2018 20:51:25 -0600
Subject: [PATCH 15/30] Add representations needed for time marching

---
 pytential/symbolic/pde/cahn_hilliard.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pytential/symbolic/pde/cahn_hilliard.py b/pytential/symbolic/pde/cahn_hilliard.py
index 2ac6167c..0cdceabe 100644
--- a/pytential/symbolic/pde/cahn_hilliard.py
+++ b/pytential/symbolic/pde/cahn_hilliard.py
@@ -112,13 +112,18 @@ class CahnHilliardOperator(L2WeightedPDEOperator):
         sig1, sig2 = unknown
         S_G = partial(self.S_G, qbx_forced_limit=None)  # noqa: N806
         laplacian = partial(sym.laplace, 2)
+        d_dx = partial(sym.d_dx, 2)
+        d_dy = partial(sym.d_dy, 2)
 
         def u(op_map=None):
             return S_G(1, sig1, op_map=op_map) + S_G(0, sig2, op_map=op_map)
 
         return sym.make_obj_array([
             u(),
-            -u(op_map=laplacian) + self.b*u()
+            -u(op_map=laplacian) + self.b*u(),
+            u(op_map=d_dx),
+            u(op_map=d_dy),
+            u(op_map=laplacian)
             ])
 
     def operator(self, unknown):
-- 
GitLab


From 4e1346c3c059dc785dc8922b4b4ff196735251fe Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Sun, 11 Mar 2018 18:59:42 -0500
Subject: [PATCH 16/30] Checkpoint before going wild

---
 pytential/symbolic/pde/cahn_hilliard.py | 65 +++++++++++++++++++++----
 1 file changed, 55 insertions(+), 10 deletions(-)

diff --git a/pytential/symbolic/pde/cahn_hilliard.py b/pytential/symbolic/pde/cahn_hilliard.py
index 0cdceabe..60055e4a 100644
--- a/pytential/symbolic/pde/cahn_hilliard.py
+++ b/pytential/symbolic/pde/cahn_hilliard.py
@@ -76,8 +76,14 @@ class CahnHilliardOperator(L2WeightedPDEOperator):
         else:
             lam2 = np.sqrt(root22)
 
-        assert np.abs(f(lam1**2)) < 1e-12
-        assert np.abs(f(lam2**2)) < 1e-12
+        if not np.abs(f(lam1**2)) < 1e-12:
+            import warnings
+            warnings.warn("Root lam1's residual is less than 1e-12.")
+            print(np.abs(f(lam1**2)))
+        if not np.abs(f(lam2**2)) < 1e-12:
+            import warnings
+            warnings.warn("Root lam2's residual is less than 1e-12.")
+            print(np.abs(f(lam2**2)))
 
         self.lambdas = sorted([lam1, lam2], key=abs, reverse=True)  # biggest first
 
@@ -106,11 +112,36 @@ class CahnHilliardOperator(L2WeightedPDEOperator):
                     op_map(sym.S(knl, density, lam=self.lambdas[i-1],
                         qbx_forced_limit=qbx_forced_limit)))
 
-    def representation(self, unknown):
+    def debug_representation(self, unknown, qbx_forced_limit=None):
+        """Return some layer potentials used for debugging
+        """
+        sig1, sig2 = unknown
+        lam1, lam2 = self.lambdas
+        S_G = partial(self.S_G, qbx_forced_limit=qbx_forced_limit)  # noqa: N806
+        laplacian = partial(sym.laplace, 2)
+        d_dx = partial(sym.d_dx, 2)
+        d_dy = partial(sym.d_dy, 2)
+
+        def Sn_G(i, density):  # noqa
+            return self.S_G(i, density,
+                        qbx_forced_limit=None,
+                        op_map=partial(sym.normal_derivative, 2))
+
+        return sym.make_obj_array([
+            S_G(0, sig1),
+            #Sn_G(0, sig1),
+            S_G(1, sig1),
+            #Sn_G(1, sig1),
+            S_G(2, sig1),
+            #Sn_G(2, sig1)
+            ])
+
+    def representation(self, unknown, qbx_forced_limit=None):
         """Return (u, v) in a :mod:`numpy` object array.
         """
         sig1, sig2 = unknown
-        S_G = partial(self.S_G, qbx_forced_limit=None)  # noqa: N806
+        lam1, lam2 = self.lambdas
+        S_G = partial(self.S_G, qbx_forced_limit=qbx_forced_limit)  # noqa: N806
         laplacian = partial(sym.laplace, 2)
         d_dx = partial(sym.d_dx, 2)
         d_dy = partial(sym.d_dy, 2)
@@ -118,20 +149,32 @@ class CahnHilliardOperator(L2WeightedPDEOperator):
         def u(op_map=None):
             return S_G(1, sig1, op_map=op_map) + S_G(0, sig2, op_map=op_map)
 
+        def v(op_map=None):
+            return lam2**2 * S_G(1, sig1, op_map=op_map) \
+                    - S_G(1, sig2, op_map=op_map) \
+                    + lam1**2 * S_G(0, sig2, op_map=op_map)
+
+        # v = - lap_u + b*u
         return sym.make_obj_array([
             u(),
             -u(op_map=laplacian) + self.b*u(),
             u(op_map=d_dx),
             u(op_map=d_dy),
-            u(op_map=laplacian)
+            u(op_map=laplacian),
+            v(),
+            v(op_map=d_dx),
+            v(op_map=d_dy)
             ])
 
-    def operator(self, unknown):
+    def operator(self, unknown, bdry_splitting_parameter=0):
         sig1, sig2 = unknown
         lam1, lam2 = self.lambdas
-        S_G = partial(self.S_G, qbx_forced_limit=1)  # noqa: N806
+        S_G = partial(self.S_G, qbx_forced_limit=-1)  # noqa: N806
 
+        # c = 1 / (epsilon * delta_t)
         c = self.c
+        # bdry_splitting_parameter = alpha_tild / epsilon
+        bs = bdry_splitting_parameter
 
         def Sn_G(i, density):  # noqa
             return self.S_G(i, density,
@@ -142,11 +185,13 @@ class CahnHilliardOperator(L2WeightedPDEOperator):
             0.5*sig1,
             0.5*lam2**2*sig1 - 0.5*sig2
             ])
+
+        # Relaxational BC
         a = sym.make_obj_array([
-            # A11
-            Sn_G(1, sig1) + c*S_G(1, sig1)
+            # A10
+            Sn_G(1, sig1) + (c-bs)*S_G(1, sig1)
             # A12
-            + Sn_G(0, sig2) + c*S_G(0, sig2),
+            + Sn_G(0, sig2) + (c-bs)*S_G(0, sig2),
 
             # A21
             lam2**2*Sn_G(1, sig1)
-- 
GitLab


From 842a9a5f979568d5a86aa1a51e1a07bbf9cc41a5 Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Fri, 4 May 2018 21:35:56 -0500
Subject: [PATCH 17/30] Stage

---
 pytential/symbolic/pde/cahn_hilliard.py | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/pytential/symbolic/pde/cahn_hilliard.py b/pytential/symbolic/pde/cahn_hilliard.py
index 60055e4a..448cef79 100644
--- a/pytential/symbolic/pde/cahn_hilliard.py
+++ b/pytential/symbolic/pde/cahn_hilliard.py
@@ -116,24 +116,12 @@ class CahnHilliardOperator(L2WeightedPDEOperator):
         """Return some layer potentials used for debugging
         """
         sig1, sig2 = unknown
-        lam1, lam2 = self.lambdas
         S_G = partial(self.S_G, qbx_forced_limit=qbx_forced_limit)  # noqa: N806
-        laplacian = partial(sym.laplace, 2)
-        d_dx = partial(sym.d_dx, 2)
-        d_dy = partial(sym.d_dy, 2)
-
-        def Sn_G(i, density):  # noqa
-            return self.S_G(i, density,
-                        qbx_forced_limit=None,
-                        op_map=partial(sym.normal_derivative, 2))
 
         return sym.make_obj_array([
             S_G(0, sig1),
-            #Sn_G(0, sig1),
             S_G(1, sig1),
-            #Sn_G(1, sig1),
             S_G(2, sig1),
-            #Sn_G(2, sig1)
             ])
 
     def representation(self, unknown, qbx_forced_limit=None):
@@ -161,12 +149,13 @@ class CahnHilliardOperator(L2WeightedPDEOperator):
             u(op_map=d_dx),
             u(op_map=d_dy),
             u(op_map=laplacian),
+            # self.b*u() - v(),
             v(),
             v(op_map=d_dx),
             v(op_map=d_dy)
             ])
 
-    def operator(self, unknown, bdry_splitting_parameter=0):
+    def operator(self, unknown, rscale=(1,1), bdry_splitting_parameter=0):
         sig1, sig2 = unknown
         lam1, lam2 = self.lambdas
         S_G = partial(self.S_G, qbx_forced_limit=-1)  # noqa: N806
@@ -181,12 +170,14 @@ class CahnHilliardOperator(L2WeightedPDEOperator):
                         qbx_forced_limit="avg",
                         op_map=partial(sym.normal_derivative, 2))
 
-        d = sym.make_obj_array([
+        # flip signs
+        d = - sym.make_obj_array([
             0.5*sig1,
             0.5*lam2**2*sig1 - 0.5*sig2
             ])
 
         # Relaxational BC
+        rs2 = rscale[1] / rscale[0]
         a = sym.make_obj_array([
             # A10
             Sn_G(1, sig1) + (c-bs)*S_G(1, sig1)
@@ -194,9 +185,9 @@ class CahnHilliardOperator(L2WeightedPDEOperator):
             + Sn_G(0, sig2) + (c-bs)*S_G(0, sig2),
 
             # A21
-            lam2**2*Sn_G(1, sig1)
+            rs2 * lam2**2*Sn_G(1, sig1)
             # A22
-            - Sn_G(1, sig2) + lam1**2*Sn_G(0, sig2)
+            - rs2 * Sn_G(1, sig2) + lam1**2*Sn_G(0, sig2)
             ])
 
         return d+a
-- 
GitLab


From 5738d0dd37e750914524ba18864406e19a64b27a Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Tue, 19 Jun 2018 17:02:32 +0800
Subject: [PATCH 18/30] Adding biharmonic eqn

---
 pytential/symbolic/pde/biharmonic.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 pytential/symbolic/pde/biharmonic.py

diff --git a/pytential/symbolic/pde/biharmonic.py b/pytential/symbolic/pde/biharmonic.py
new file mode 100644
index 00000000..6e06f0ca
--- /dev/null
+++ b/pytential/symbolic/pde/biharmonic.py
@@ -0,0 +1,26 @@
+from __future__ import division, absolute_import, print_function
+
+__copyright__ = "Copyright (C) 2018 Xiaoyu Wei"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+__doc__ = """
+.. autoclass:: BiharmonicOperator
-- 
GitLab


From dbbe2db5602045991056e92bb131a5e6f46de6c2 Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Mon, 2 Jul 2018 11:55:57 +0800
Subject: [PATCH 19/30] Fixes a formula in the doc

---
 pytential/symbolic/primitives.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pytential/symbolic/primitives.py b/pytential/symbolic/primitives.py
index 3a297fd1..b16c6d60 100644
--- a/pytential/symbolic/primitives.py
+++ b/pytential/symbolic/primitives.py
@@ -1168,8 +1168,8 @@ def int_g_dsource(ambient_dim, dsource, kernel, density,
     r"""
     .. math::
 
-        \int_\Gamma \operatorname{dsource} \dot \nabla_y
-            \dot g(x-y) \sigma(y) dS_y
+        \int_\Gamma \operatorname{dsource} \cdot \nabla_y
+            \cdot g(x-y) \sigma(y) dS_y
 
     where :math:`\sigma` is *density*, and
     *dsource*, a multivector.
-- 
GitLab


From 0227f4c94489a838829c1e29fdd013997ab64143 Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Tue, 3 Jul 2018 22:38:22 +0800
Subject: [PATCH 20/30] Adding biharmonic eqn

---
 experiments/biharmonic.py            | 223 +++++++++++++++++++++++++++
 pytential/symbolic/pde/biharmonic.py | 106 +++++++++++++
 2 files changed, 329 insertions(+)
 create mode 100644 experiments/biharmonic.py

diff --git a/experiments/biharmonic.py b/experiments/biharmonic.py
new file mode 100644
index 00000000..083c2dee
--- /dev/null
+++ b/experiments/biharmonic.py
@@ -0,0 +1,223 @@
+__copyright__ = "Copyright (C) 2018 Xiaoyu Wei"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import numpy as np
+
+# {{{ set some constants for use below
+
+dim = 2
+nelements = 20
+bdry_quad_order = 4
+mesh_order = bdry_quad_order
+qbx_order = bdry_quad_order
+bdry_ovsmp_quad_order = 4*bdry_quad_order
+fmm_order = 10
+
+loc_sign = -1
+x_0 = np.zeros(dim)
+# x_0 = np.array([0.5, 0])
+
+# }}}
+
+def main():
+    import logging
+    logging.basicConfig(level=logging.WARNING)  # INFO for more progress info
+    logger = logging.getLogger(__name__)
+
+    import numpy as np
+    import pyopencl as cl
+    import pyopencl.clmath
+    from pytential import sym
+
+    cl_ctx = cl.create_some_context()
+    queue = cl.CommandQueue(cl_ctx)
+
+    # {{{ make mesh
+
+    from meshmode.mesh.generation import ellipse, make_curve_mesh
+    from functools import partial
+
+    if 1 and dim == 2:
+            # one circle
+            mesh = make_curve_mesh(
+                    partial(ellipse, 1),
+                    np.linspace(0, 1, nelements+1),
+                    mesh_order)
+    elif dim == 2:
+        # array of circles
+        base_mesh = make_curve_mesh(
+                partial(ellipse, 1),
+                np.linspace(0, 1, nelements+1),
+                mesh_order)
+
+        from meshmode.mesh.processing import affine_map, merge_disjoint_meshes
+        nx = 2
+        ny = 2
+        dx = 2 / nx
+        meshes = [
+                affine_map(
+                    base_mesh,
+                    A=np.diag([dx*0.25, dx*0.25]),
+                    b=np.array([dx*(ix-nx/2), dx*(iy-ny/2)]))
+                for ix in range(nx)
+                for iy in range(ny)]
+
+        mesh = merge_disjoint_meshes(meshes, single_group=True)
+    else:
+        raise NotImplementedError("Could not find a way to get the mesh.")
+
+    if 0:
+        from meshmode.mesh.visualization import draw_curve
+        draw_curve(mesh)
+        import matplotlib.pyplot as plt
+        plt.show()
+
+    # }}} End make mesh
+
+    from meshmode.discretization import Discretization
+    from meshmode.discretization.poly_element import \
+        InterpolatoryQuadratureSimplexGroupFactory
+    pre_density_discr = Discretization(
+            cl_ctx, mesh,
+            InterpolatoryQuadratureSimplexGroupFactory(bdry_quad_order))
+
+    from pytential.qbx import (
+            QBXLayerPotentialSource, QBXTargetAssociationFailedException)
+    qbx, _ = QBXLayerPotentialSource(
+            pre_density_discr, fine_order=bdry_ovsmp_quad_order, qbx_order=qbx_order,
+            fmm_order=fmm_order
+            ).with_refinement()
+    density_discr = qbx.density_discr
+
+    # {{{ describe bvp
+
+    from pytential.symbolic.pde.biharmonic import BiharmonicOperator
+    biharmonic_operator = BiharmonicOperator(dim, loc_sign, x_0)
+
+    from pytential import bind
+    unk = biharmonic_operator.make_unknown("sigma")
+    bdry_op_sym = biharmonic_operator.operator(unk)
+
+    # }}}
+
+    bound_op = bind(qbx, bdry_op_sym)
+
+    # {{{ fix rhs and solve
+
+    nodes = density_discr.nodes().with_queue(queue)
+
+    def bdry_conditions_func(x):
+        if len(x) == 2:
+            return sym.make_obj_array([
+                x[0] * 0 + 3,
+                x[0] * 0 + 0
+                ])
+        else:
+            raise NotImplementedError
+
+    bc = bdry_conditions_func(nodes)
+
+    from pytential.solve import gmres
+    gmres_result = gmres(
+            bound_op.scipy_op(queue, "sigma", dtype=np.float64),
+            bc, tol=1e-8, progress=True,
+            stall_iterations=0,
+            hard_failure=False)
+
+    # }}}
+
+    # {{{ postprocess/visualize
+
+    from datetime import datetime
+    def file_id():
+        # return str(datetime.now()).replace(' ', '::')
+        return 'latest'
+
+    sigma = gmres_result.solution
+
+    if 0:
+        logger.warn("Overwriting solutions for debugging")
+        # test out layer potentials
+        sigma[0] *= 0
+        sigma[1] *= 0
+
+        sigma[0] += 0
+        sigma[1] += 1
+
+    representation_sym = biharmonic_operator.representation(unk, qbx_forced_limit=None)
+    from sumpy.visualization import FieldPlotter
+    fplot = FieldPlotter(np.zeros(dim), extent=5, npoints=500)
+
+    targets = cl.array.to_device(queue, fplot.points)
+
+    bdry_conditions = bdry_conditions_func(targets)
+
+    qbx_stick_out = qbx.copy(target_association_tolerance=0.05)
+
+    if 1:
+        # boundary plot for operator info
+        visual_order = 2
+        from meshmode.discretization.visualization import make_visualizer
+        solu_on_bdry = bind((qbx, density_discr), bdry_op_sym)(queue, sigma=sigma)
+        bdplot = make_visualizer(queue, density_discr, visual_order)
+        bdry_normals = bind(density_discr, sym.normal(dim))(queue)\
+                .as_vector(dtype=object)
+
+        bdplot.write_vtk_file(
+                "potential-biharm-bdry-%s.vtu" % file_id(),
+                [
+                    ("sig1", sigma[0]),
+                    ("sig2", sigma[1]),
+                    ("bdry_normals", bdry_normals),
+                    ("u", solu_on_bdry[0]),
+                    ("u_n", solu_on_bdry[1]),
+                    ("bc1", bc[0]),
+                    ("bc2", bc[1]),
+                    ]
+                )
+
+    try:
+        from pytential.target import PointsTarget
+        solu_representation = bind(
+                (qbx_stick_out, PointsTarget(targets)),
+                representation_sym)(queue, sigma=sigma)
+        fld_in_vol = solu_representation[0].get()
+    except QBXTargetAssociationFailedException as e:
+        fplot.write_vtk_file(
+                "failed-targets-%s.vts" % file_id(),
+                [
+                    ("failed", e.failed_target_flags.get(queue))
+                    ]
+                )
+        raise
+
+    fplot.write_vtk_file(
+            "potential-biharm-%s.vts" % file_id(),
+            [
+                ("potential", fld_in_vol),
+                ]
+            )
+
+    # }}}
+
+if __name__ == "__main__":
+    main()
diff --git a/pytential/symbolic/pde/biharmonic.py b/pytential/symbolic/pde/biharmonic.py
index 6e06f0ca..2874bc6d 100644
--- a/pytential/symbolic/pde/biharmonic.py
+++ b/pytential/symbolic/pde/biharmonic.py
@@ -24,3 +24,109 @@ THE SOFTWARE.
 
 __doc__ = """
 .. autoclass:: BiharmonicOperator
+"""
+
+import numpy as np
+from pytential.symbolic.pde.scalar import L2WeightedPDEOperator
+from pytential import sym
+from functools import partial
+
+from pymbolic.primitives import make_common_subexpression as cse
+
+
+class BiharmonicOperator(L2WeightedPDEOperator):
+    """Biharmonic operator, subject to boundary conditions on
+    solution values and normal derivatives.
+    """
+
+    def __init__(self, dim, loc_sign, x_0=None,
+            use_l2_weighting=False, kernel_arguments=None):
+        """
+        :arg loc_sign: +1 for exterior, -1 for interior.
+        :arg x_0: origin x_0 for the Almansi representation (default to be 0).
+        """
+
+        assert loc_sign in [-1, 1]
+        assert dim in [2, 3]
+
+        from sumpy.kernel import LaplaceKernel
+        kernel = LaplaceKernel(dim)
+
+        if x_0 is None:
+            x_0 = np.zeros(dim)
+        elif not isinstance(x_0, np.ndarray):
+            x_0 = np.array(x_0)
+        assert len(x_0) == dim
+        self.x_0 = x_0
+
+        if kernel_arguments is None:
+            kernel_arguments = {}
+        self.kernel_arguments = kernel_arguments
+
+        self.loc_sign = loc_sign
+
+        L2WeightedPDEOperator.__init__(self, kernel, use_l2_weighting)
+
+    def make_unknown(self, name):
+        return sym.make_sym_vector(name, 2)
+
+    def S(self, density, qbx_forced_limit):
+        return sym.S(self.kernel, density,
+                kernel_arguments=self.kernel_arguments,
+                qbx_forced_limit=qbx_forced_limit)
+
+    def D(self, density, qbx_forced_limit):
+        return sym.D(self.kernel, density,
+                kernel_arguments=self.kernel_arguments,
+                qbx_forced_limit=qbx_forced_limit)
+
+    def Sp(self, density, qbx_forced_limit):
+        return sym.Sp(self.kernel, density,
+                dim=self.kernel.dim,
+                qbx_forced_limit=qbx_forced_limit)
+
+    def Dp(self, density, qbx_forced_limit):
+        return sym.Dp(self.kernel, density,
+                dim=self.kernel.dim,
+                qbx_forced_limit=qbx_forced_limit)
+
+    def representation(self, unknown, qbx_forced_limit=None):
+        sig1, sig2 = unknown
+        S = partial(self.S, qbx_forced_limit=qbx_forced_limit)
+        D = partial(self.D, qbx_forced_limit=qbx_forced_limit)
+
+        r = sym.nodes(self.kernel.dim).as_vector() - self.x_0
+        rho_sq = cse(np.dot(r, r), "rho_sq")
+
+        def grad(operand):
+            return sym.grad(ambient_dim=self.kernel.dim, operand=operand)
+
+        return sym.make_obj_array([
+            rho_sq * D(sig1) + S(sig2),
+            2 * r * D(sig1) + rho_sq * grad(D(sig1)) + grad(S(sig2)),
+            ])
+
+    def operator(self, unknown):
+        sig1, sig2 = unknown
+        S = partial(self.S, qbx_forced_limit='avg')
+        D = partial(self.D, qbx_forced_limit='avg')
+        Sp = partial(self.Sp, qbx_forced_limit='avg')
+        Dp = partial(self.Dp, qbx_forced_limit='avg')
+
+        r = sym.nodes(self.kernel.dim).as_vector() - self.x_0
+        rho_sq = cse(np.dot(r, r), "rho_sq")
+
+        nrml = sym.normal(self.kernel.dim).as_vector()
+        drhosq_dn = cse(2 * np.dot(r, nrml), "drhosq_dn")
+
+        d = 0.5 * self.loc_sign * sym.make_obj_array([
+            rho_sq * sig1,
+            drhosq_dn * sig1 - sig2,
+            ])
+
+        a = sym.make_obj_array([ # noqa
+            rho_sq * D(sig1)                        +  S(sig2),
+            drhosq_dn * D(sig1) + rho_sq * Dp(sig1) +  Sp(sig2)
+            ])
+
+        return d + a
-- 
GitLab


From ffe3c86252647562b82a63ec2914751697556995 Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Tue, 2 Oct 2018 15:32:54 -0500
Subject: [PATCH 21/30] Left/right predoncitioners for CH

---
 examples/laplace-dirichlet-3d.py        |  4 +-
 pytential/symbolic/pde/cahn_hilliard.py | 57 +++++++++++++++++++++++--
 2 files changed, 55 insertions(+), 6 deletions(-)

diff --git a/examples/laplace-dirichlet-3d.py b/examples/laplace-dirichlet-3d.py
index 4166dddf..dd4c752d 100644
--- a/examples/laplace-dirichlet-3d.py
+++ b/examples/laplace-dirichlet-3d.py
@@ -78,8 +78,8 @@ def main():
     cse = sym.cse
 
     sigma_sym = sym.var("sigma")
-    #sqrt_w = sym.sqrt_jac_q_weight(3)
-    sqrt_w = 1
+    sqrt_w = sym.sqrt_jac_q_weight(3)
+    #sqrt_w = 1
     inv_sqrt_w_sigma = cse(sigma_sym/sqrt_w)
 
     # -1 for interior Dirichlet
diff --git a/pytential/symbolic/pde/cahn_hilliard.py b/pytential/symbolic/pde/cahn_hilliard.py
index 448cef79..47e0b60f 100644
--- a/pytential/symbolic/pde/cahn_hilliard.py
+++ b/pytential/symbolic/pde/cahn_hilliard.py
@@ -155,11 +155,39 @@ class CahnHilliardOperator(L2WeightedPDEOperator):
             v(op_map=d_dy)
             ])
 
-    def operator(self, unknown, rscale=(1,1), bdry_splitting_parameter=0):
+    def operator(self, unknown, rscale=None, bdry_splitting_parameter=0,
+            right_precon=None,
+            left_precon=None):
+        """
+        rscale: this API is deprecated, use left_precon instead.
+        left_precon, right_precon: left/right preconditionors.
+        It only accepts diagonal preconditioners for the moment.
+        """
         sig1, sig2 = unknown
         lam1, lam2 = self.lambdas
         S_G = partial(self.S_G, qbx_forced_limit=-1)  # noqa: N806
 
+        if rscale is not None:
+            raise NotImplementedError("rscale deprecated, use left_precon instead")
+
+        if right_precon is None:
+            rp = np.eye(2)
+        else:
+            rp = right_precon
+        assert abs(rp[0, 1]) < 1e-12
+        assert abs(rp[1, 0]) < 1e-12
+        r1 = rp[0, 0]
+        r2 = rp[1, 1]
+
+        if left_precon is None:
+            lp = np.eye(2)
+        else:
+            lp = left_precon
+        assert abs(lp[0, 1]) < 1e-12
+        assert abs(lp[1, 0]) < 1e-12
+        l1 = lp[0, 0]
+        l2 = lp[1, 1]
+
         # c = 1 / (epsilon * delta_t)
         c = self.c
         # bdry_splitting_parameter = alpha_tild / epsilon
@@ -170,14 +198,34 @@ class CahnHilliardOperator(L2WeightedPDEOperator):
                         qbx_forced_limit="avg",
                         op_map=partial(sym.normal_derivative, 2))
 
+        d = sym.make_obj_array([
+            - 0.5 * sig1 * l1 * r1,
+            - 0.5 * lam2**2 * sig1 * l2 * r1 + 0.5 * sig2 * l2 * r2
+            ])
+
+        """
         # flip signs
         d = - sym.make_obj_array([
             0.5*sig1,
             0.5*lam2**2*sig1 - 0.5*sig2
             ])
+        """
+
+        # Relaxational BC
+        a = sym.make_obj_array([
+            # A11
+            (Sn_G(1, sig1) + (c-bs)*S_G(1, sig1)) * l1 * r1
+            # A12
+            + (Sn_G(0, sig2) + (c-bs)*S_G(0, sig2)) * l1 * r2,
+
+            # A21
+            (lam2**2*Sn_G(1, sig1)) * l2 * r1
+            # A22
+            - (Sn_G(1, sig2) + lam1**2*Sn_G(0, sig2)) * l2 * r2
+            ])
 
+        """
         # Relaxational BC
-        rs2 = rscale[1] / rscale[0]
         a = sym.make_obj_array([
             # A10
             Sn_G(1, sig1) + (c-bs)*S_G(1, sig1)
@@ -185,9 +233,10 @@ class CahnHilliardOperator(L2WeightedPDEOperator):
             + Sn_G(0, sig2) + (c-bs)*S_G(0, sig2),
 
             # A21
-            rs2 * lam2**2*Sn_G(1, sig1)
+            lam2**2*Sn_G(1, sig1)
             # A22
-            - rs2 * Sn_G(1, sig2) + lam1**2*Sn_G(0, sig2)
+            - Sn_G(1, sig2) + lam1**2*Sn_G(0, sig2)
             ])
+        """
 
         return d+a
-- 
GitLab


From 96bd4d9d5d6eb10c151fcc05c0882183292df4bf Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Sat, 20 Oct 2018 11:05:38 -0500
Subject: [PATCH 22/30] Sync TimingFuture changes

---
 pytential/qbx/__init__.py     |  18 ++++--
 pytential/qbx/fmm.py          |  93 +++++++++++++++++++++++--------
 pytential/qbx/fmmlib.py       | 101 +++++++++++++++++++++++-----------
 pytential/qbx/target_assoc.py |   4 +-
 4 files changed, 152 insertions(+), 64 deletions(-)

diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py
index d3cd843a..188660f7 100644
--- a/pytential/qbx/__init__.py
+++ b/pytential/qbx/__init__.py
@@ -106,6 +106,12 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
 
         # {{{ argument processing
 
+        if fine_order is None:
+            raise ValueError("fine_order must be provided.")
+
+        if qbx_order is None:
+            raise ValueError("qbx_order must be provided.")
+
         if target_stick_out_factor is not _not_provided:
             from warnings import warn
             warn("target_stick_out_factor has been renamed to "
@@ -558,7 +564,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
             maxstretch = bind(
                     self, sym._simplex_mapping_max_stretch_factor(
                         self.ambient_dim,
-                        where=sym._QBXSourceStage2(sym.DEFAULT_SOURCE))
+                        where=sym.QBXSourceStage2(sym.DEFAULT_SOURCE))
                     )(queue)
             maxstretch = utils.to_last_dim_length(
                     self.stage2_density_discr, maxstretch, last_dim_length)
@@ -751,7 +757,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
         if self.geometry_data_inspector is not None:
             perform_fmm = self.geometry_data_inspector(insn, bound_expr, geo_data)
             if not perform_fmm:
-                return [(o.name, 0) for o in insn.outputs], []
+                return [(o.name, 0) for o in insn.outputs]
 
         # }}}
 
@@ -774,7 +780,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
                     (o.name,
                         all_potentials_on_every_tgt[o.kernel_index][tgt_slice]))
 
-        return result, []
+        return result
 
     # }}}
 
@@ -935,7 +941,7 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
 
                 result.append((o.name, output_for_each_kernel[o.kernel_index]))
 
-        return result, []
+        return result
 
     # }}}
 
@@ -945,8 +951,8 @@ class QBXLayerPotentialSource(LayerPotentialSourceBase):
 
 
 __all__ = (
-        QBXLayerPotentialSource,
-        QBXTargetAssociationFailedException,
+        "QBXLayerPotentialSource",
+        "QBXTargetAssociationFailedException",
         )
 
 # vim: fdm=marker
diff --git a/pytential/qbx/fmm.py b/pytential/qbx/fmm.py
index 4be9e5ca..46d7ae88 100644
--- a/pytential/qbx/fmm.py
+++ b/pytential/qbx/fmm.py
@@ -28,11 +28,12 @@ import numpy as np  # noqa
 import pyopencl as cl  # noqa
 import pyopencl.array  # noqa
 from sumpy.fmm import (SumpyExpansionWranglerCodeContainer,
-        SumpyExpansionWrangler, level_to_rscale)
+        SumpyExpansionWrangler, level_to_rscale, SumpyTimingFuture)
 
 from pytools import memoize_method
 from pytential.qbx.interactions import P2QBXLFromCSR, M2QBXL, L2QBXL, QBXL2P
 
+from boxtree.fmm import TimingRecorder
 from pytools import log_process, ProcessLogger
 
 import logging
@@ -229,7 +230,7 @@ QBXFMMGeometryData.non_qbx_box_target_lists`),
         assert local_exps is result
         result.add_event(evt)
 
-        return result
+        return (result, SumpyTimingFuture(self.queue, [evt]))
 
     @log_process(logger)
     def translate_box_multipoles_to_qbx_local(self, multipole_exps):
@@ -243,6 +244,8 @@ QBXFMMGeometryData.non_qbx_box_target_lists`),
 
         wait_for = multipole_exps.events
 
+        events = []
+
         for isrc_level, ssn in enumerate(traversal.from_sep_smaller_by_level):
             m2qbxl = self.code.m2qbxl(
                     self.level_orders[isrc_level],
@@ -273,12 +276,14 @@ QBXFMMGeometryData.non_qbx_box_target_lists`),
 
                     **self.kernel_extra_kwargs)
 
+            events.append(evt)
+
             wait_for = [evt]
             assert qbx_expansions_res is qbx_expansions
 
         qbx_expansions.add_event(evt)
 
-        return qbx_expansions
+        return (qbx_expansions, SumpyTimingFuture(self.queue, events))
 
     @log_process(logger)
     def translate_box_local_to_qbx_local(self, local_exps):
@@ -291,6 +296,8 @@ QBXFMMGeometryData.non_qbx_box_target_lists`),
 
         wait_for = local_exps.events
 
+        events = []
+
         for isrc_level in range(geo_data.tree().nlevels):
             l2qbxl = self.code.l2qbxl(
                     self.level_orders[isrc_level],
@@ -318,12 +325,14 @@ QBXFMMGeometryData.non_qbx_box_target_lists`),
 
                     **self.kernel_extra_kwargs)
 
+            events.append(evt)
+
             wait_for = [evt]
             assert qbx_expansions_res is qbx_expansions
 
         qbx_expansions.add_event(evt)
 
-        return qbx_expansions
+        return (qbx_expansions, SumpyTimingFuture(self.queue, events))
 
     @log_process(logger)
     def eval_qbx_expansions(self, qbx_expansions):
@@ -356,7 +365,7 @@ QBXFMMGeometryData.non_qbx_box_target_lists`),
         for pot_i, pot_res_i in zip(pot, pot_res):
             assert pot_i is pot_res_i
 
-        return pot
+        return (pot, SumpyTimingFuture(self.queue, [evt]))
 
     # }}}
 
@@ -365,7 +374,7 @@ QBXFMMGeometryData.non_qbx_box_target_lists`),
 
 # {{{ FMM top-level
 
-def drive_fmm(expansion_wrangler, src_weights):
+def drive_fmm(expansion_wrangler, src_weights, timing_data=None):
     """Top-level driver routine for the QBX fast multipole calculation.
 
     :arg geo_data: A :class:`QBXFMMGeometryData` instance.
@@ -373,6 +382,8 @@ def drive_fmm(expansion_wrangler, src_weights):
         :class:`ExpansionWranglerInterface`.
     :arg src_weights: Source 'density/weights/charges'.
         Passed unmodified to *expansion_wrangler*.
+    :arg timing_data: Either *None* or a dictionary that collects
+        timing data.
 
     Returns the potentials computed by *expansion_wrangler*.
 
@@ -383,6 +394,7 @@ def drive_fmm(expansion_wrangler, src_weights):
     geo_data = wrangler.geo_data
     traversal = geo_data.traversal()
     tree = traversal.tree
+    recorder = TimingRecorder()
 
     # Interface guidelines: Attributes of the tree are assumed to be known
     # to the expansion wrangler and should not be passed.
@@ -393,41 +405,49 @@ def drive_fmm(expansion_wrangler, src_weights):
 
     # {{{ construct local multipoles
 
-    mpole_exps = wrangler.form_multipoles(
+    mpole_exps, timing_future = wrangler.form_multipoles(
             traversal.level_start_source_box_nrs,
             traversal.source_boxes,
             src_weights)
 
+    recorder.add("form_multipoles", timing_future)
+
     # }}}
 
     # {{{ propagate multipoles upward
 
-    wrangler.coarsen_multipoles(
+    mpole_exps, timing_future = wrangler.coarsen_multipoles(
             traversal.level_start_source_parent_box_nrs,
             traversal.source_parent_boxes,
             mpole_exps)
 
+    recorder.add("coarsen_multipoles", timing_future)
+
     # }}}
 
     # {{{ direct evaluation from neighbor source boxes ("list 1")
 
-    non_qbx_potentials = wrangler.eval_direct(
+    non_qbx_potentials, timing_future = wrangler.eval_direct(
             traversal.target_boxes,
             traversal.neighbor_source_boxes_starts,
             traversal.neighbor_source_boxes_lists,
             src_weights)
 
+    recorder.add("eval_direct", timing_future)
+
     # }}}
 
     # {{{ translate separated siblings' ("list 2") mpoles to local
 
-    local_exps = wrangler.multipole_to_local(
+    local_exps, timing_future = wrangler.multipole_to_local(
             traversal.level_start_target_or_target_parent_box_nrs,
             traversal.target_or_target_parent_boxes,
             traversal.from_sep_siblings_starts,
             traversal.from_sep_siblings_lists,
             mpole_exps)
 
+    recorder.add("multipole_to_local", timing_future)
+
     # }}}
 
     # {{{ evaluate sep. smaller mpoles ("list 3") at particles
@@ -435,11 +455,15 @@ def drive_fmm(expansion_wrangler, src_weights):
     # (the point of aiming this stage at particles is specifically to keep its
     # contribution *out* of the downward-propagating local expansions)
 
-    non_qbx_potentials = non_qbx_potentials + wrangler.eval_multipoles(
+    mpole_result, timing_future = wrangler.eval_multipoles(
             traversal.target_boxes_sep_smaller_by_source_level,
             traversal.from_sep_smaller_by_level,
             mpole_exps)
 
+    recorder.add("eval_multipoles", timing_future)
+
+    non_qbx_potentials = non_qbx_potentials + mpole_result
+
     # assert that list 3 close has been merged into list 1
     assert traversal.from_sep_close_smaller_starts is None
 
@@ -447,13 +471,17 @@ def drive_fmm(expansion_wrangler, src_weights):
 
     # {{{ form locals for separated bigger source boxes ("list 4")
 
-    local_exps = local_exps + wrangler.form_locals(
+    local_result, timing_future = wrangler.form_locals(
             traversal.level_start_target_or_target_parent_box_nrs,
             traversal.target_or_target_parent_boxes,
             traversal.from_sep_bigger_starts,
             traversal.from_sep_bigger_lists,
             src_weights)
 
+    recorder.add("form_locals", timing_future)
+
+    local_exps = local_exps + local_result
+
     # assert that list 4 close has been merged into list 1
     assert traversal.from_sep_close_bigger_starts is None
 
@@ -461,34 +489,51 @@ def drive_fmm(expansion_wrangler, src_weights):
 
     # {{{ propagate local_exps downward
 
-    wrangler.refine_locals(
+    local_exps, timing_future = wrangler.refine_locals(
             traversal.level_start_target_or_target_parent_box_nrs,
             traversal.target_or_target_parent_boxes,
             local_exps)
 
+    recorder.add("refine_locals", timing_future)
+
     # }}}
 
     # {{{ evaluate locals
 
-    non_qbx_potentials = non_qbx_potentials + wrangler.eval_locals(
+    local_result, timing_future = wrangler.eval_locals(
             traversal.level_start_target_box_nrs,
             traversal.target_boxes,
             local_exps)
 
+    recorder.add("eval_locals", timing_future)
+
+    non_qbx_potentials = non_qbx_potentials + local_result
+
     # }}}
 
     # {{{ wrangle qbx expansions
 
-    qbx_expansions = wrangler.form_global_qbx_locals(src_weights)
+    qbx_expansions, timing_future = wrangler.form_global_qbx_locals(src_weights)
+
+    recorder.add("form_global_qbx_locals", timing_future)
+
+    local_result, timing_future = (
+            wrangler.translate_box_multipoles_to_qbx_local(mpole_exps))
 
-    qbx_expansions = qbx_expansions + \
-            wrangler.translate_box_multipoles_to_qbx_local(mpole_exps)
+    recorder.add("translate_box_multipoles_to_qbx_local", timing_future)
 
-    qbx_expansions = qbx_expansions + \
-            wrangler.translate_box_local_to_qbx_local(local_exps)
+    qbx_expansions = qbx_expansions + local_result
 
-    qbx_potentials = wrangler.eval_qbx_expansions(
-            qbx_expansions)
+    local_result, timing_future = (
+            wrangler.translate_box_local_to_qbx_local(local_exps))
+
+    recorder.add("translate_box_local_to_qbx_local", timing_future)
+
+    qbx_expansions = qbx_expansions + local_result
+
+    qbx_potentials, timing_future = wrangler.eval_qbx_expansions(qbx_expansions)
+
+    recorder.add("eval_qbx_expansions", timing_future)
 
     # }}}
 
@@ -516,6 +561,9 @@ def drive_fmm(expansion_wrangler, src_weights):
 
     fmm_proc.done()
 
+    if timing_data is not None:
+        timing_data.update(recorder.summarize())
+
     return result
 
 # }}}
@@ -761,7 +809,8 @@ def assemble_performance_data(geo_data, uses_pde_expansions,
 
     # {{{ evaluate locals
 
-    result["eval_part"] = tree.ntargets * ncoeffs_fmm
+    non_qbx_box_targets = geo_data.non_qbx_box_target_lists()
+    result["eval_part"] = non_qbx_box_targets.nfiltered_targets * ncoeffs_fmm
 
     # }}}
 
diff --git a/pytential/qbx/fmmlib.py b/pytential/qbx/fmmlib.py
index 2b6cbf88..bf233b76 100644
--- a/pytential/qbx/fmmlib.py
+++ b/pytential/qbx/fmmlib.py
@@ -30,6 +30,7 @@ from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler
 from sumpy.kernel import LaplaceKernel, HelmholtzKernel
 
 
+from boxtree.tools import return_timing_data
 from pytools import log_process
 
 import logging
@@ -301,6 +302,7 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler):
     # {{{ p2qbxl
 
     @log_process(logger)
+    @return_timing_data
     def form_global_qbx_locals(self, src_weights):
         geo_data = self.geo_data
         trav = geo_data.traversal()
@@ -352,6 +354,7 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler):
     # {{{ m2qbxl
 
     @log_process(logger)
+    @return_timing_data
     def translate_box_multipoles_to_qbx_local(self, multipole_exps):
         qbx_exps = self.qbx_local_expansion_zeros()
 
@@ -463,67 +466,99 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler):
     # }}}
 
     @log_process(logger)
+    @return_timing_data
     def translate_box_local_to_qbx_local(self, local_exps):
         qbx_expansions = self.qbx_local_expansion_zeros()
 
         geo_data = self.geo_data
-        if geo_data.ncenters == 0:
+        global_qbx_centers = geo_data.global_qbx_centers()
+
+        if global_qbx_centers.size == 0:
             return qbx_expansions
+
         trav = geo_data.traversal()
         qbx_center_to_target_box = geo_data.qbx_center_to_target_box()
         qbx_centers = geo_data.centers()
         qbx_radii = geo_data.expansion_radii()
 
-        locloc = self.get_translation_routine("%ddlocloc")
+        is_global_qbx_center = np.zeros(geo_data.ncenters, dtype=int)
+        is_global_qbx_center[global_qbx_centers] = 1
 
-        for isrc_level in range(geo_data.tree().nlevels):
+        locloc = self.get_translation_routine("%ddlocloc", vec_suffix="_qbx")
+
+        nlevels = geo_data.tree().nlevels
+
+        box_to_rscale = np.empty(geo_data.tree().nboxes, dtype=np.float)
+        for isrc_level in range(nlevels):
             lev_box_start, lev_box_stop = self.tree.level_start_box_nrs[
                     isrc_level:isrc_level+2]
-            locals_level_start_ibox, locals_view = \
-                    self.local_expansions_view(local_exps, isrc_level)
-            assert locals_level_start_ibox == lev_box_start
+            box_to_rscale[lev_box_start:lev_box_stop] = (
+                    self.level_to_rscale(isrc_level))
 
-            kwargs = {}
-            kwargs.update(self.kernel_kwargs)
+        box_centers = self._get_single_box_centers_array()
 
-            for tgt_icenter in range(geo_data.ncenters):
-                if self.dim == 3 and self.eqn_letter == "h":
-                    # Yuck: This keeps overwriting 'radius' in the dict.
-                    kwargs["radius"] = 0.5 * (
-                            geo_data.expansion_radii()[tgt_icenter])
+        # This translates from target box to global box numbers.
+        qbx_center_to_box = trav.target_boxes[qbx_center_to_target_box]
 
-                isrc_box = qbx_center_to_target_box[tgt_icenter]
+        kwargs = {}
+        kwargs.update(self.kernel_kwargs)
 
-                tgt_center = qbx_centers[:, tgt_icenter]
+        for isrc_level in range(nlevels):
+            lev_box_start, lev_box_stop = self.tree.level_start_box_nrs[
+                    isrc_level:isrc_level+2]
 
-                # The box's expansions which we're translating here
-                # (our source) is, globally speaking, a target box.
+            locals_level_start_ibox, locals_view = \
+                    self.local_expansions_view(local_exps, isrc_level)
 
-                src_ibox = trav.target_boxes[isrc_box]
+            assert locals_level_start_ibox == lev_box_start
 
-                # Is the box number on the level currently under
-                # consideration?
-                in_range = (lev_box_start <= src_ibox and src_ibox < lev_box_stop)
+            # Find used QBX centers that are on this level. (This is not ideal,
+            # but we're supplied a mapping of QBX centers to boxes and we have
+            # to invert that in some way.)
+            curr_level_qbx_centers = np.flatnonzero(
+                    is_global_qbx_center
+                    & (lev_box_start <= qbx_center_to_box)
+                    & (qbx_center_to_box < lev_box_stop))
 
-                if in_range:
-                    src_center = self.tree.box_centers[:, src_ibox]
-                    tmp_loc_exp = locloc(
-                                rscale1=self.level_to_rscale(isrc_level),
-                                center1=src_center,
-                                expn1=locals_view[
-                                    src_ibox - locals_level_start_ibox].T,
+            if curr_level_qbx_centers.size == 0:
+                continue
 
-                                rscale2=qbx_radii[tgt_icenter],
-                                center2=tgt_center,
-                                nterms2=self.qbx_order,
+            icurr_level_qbx_center_to_box = (
+                    qbx_center_to_box[curr_level_qbx_centers])
 
-                                **kwargs)[..., 0].T
+            if self.dim == 3 and self.eqn_letter == "h":
+                kwargs["radius"] = 0.5 * (
+                        geo_data.expansion_radii()[curr_level_qbx_centers])
+
+            # This returns either the expansion or a tuple (ier, expn).
+            rvals = locloc(
+                    rscale1=box_to_rscale,
+                    rscale1_offsets=icurr_level_qbx_center_to_box,
+                    center1=box_centers,
+                    center1_offsets=icurr_level_qbx_center_to_box,
+                    expn1=locals_view.T,
+                    expn1_offsets=icurr_level_qbx_center_to_box - lev_box_start,
+                    nterms1=self.level_nterms[isrc_level],
+                    nterms2=self.qbx_order,
+                    rscale2=qbx_radii,
+                    rscale2_offsets=curr_level_qbx_centers,
+                    center2=qbx_centers,
+                    center2_offsets=curr_level_qbx_centers,
+                    **kwargs)
+
+            if isinstance(rvals, tuple):
+                ier, expn2 = rvals
+                if ier.any():
+                    raise RuntimeError("locloc failed")
+            else:
+                expn2 = rvals
 
-                    qbx_expansions[tgt_icenter] += tmp_loc_exp
+            qbx_expansions[curr_level_qbx_centers] += expn2.T
 
         return qbx_expansions
 
     @log_process(logger)
+    @return_timing_data
     def eval_qbx_expansions(self, qbx_expansions):
         output = self.full_output_zeros()
 
diff --git a/pytential/qbx/target_assoc.py b/pytential/qbx/target_assoc.py
index f51e920f..11b70c4b 100644
--- a/pytential/qbx/target_assoc.py
+++ b/pytential/qbx/target_assoc.py
@@ -91,8 +91,6 @@ Return values
 
 .. autoclass:: QBXTargetAssociation
 
-.. autoclass:: QBXTargetAssociationFailedException
-
 Target association driver
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -727,7 +725,7 @@ def associate_targets_to_qbx_centers(lpot_source, wrangler,
 
         The side request can take on the values in :ref:`qbx-side-request-table`.
 
-    :raises QBXTargetAssociationFailedException:
+    :raises pytential.qbx.QBXTargetAssociationFailedException:
         when target association failed to find a center for a target.
         The returned exception object contains suggested refine flags.
 
-- 
GitLab


From adb36deb7bcb017a57f00c56aa4c45fa4a182d81 Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Sat, 20 Oct 2018 11:11:10 -0500
Subject: [PATCH 23/30] Sync more upstream changes

---
 README.rst                                 |   8 +-
 doc/conf.py                                |   1 +
 doc/discretization.rst                     |   2 +-
 doc/index.rst                              |   1 +
 doc/linalg.rst                             |   9 +
 doc/misc.rst                               |   2 +-
 doc/symbolic.rst                           |   2 +
 examples/fmm-error.py                      |   2 +-
 examples/helmholtz-dirichlet.py            |   6 +-
 examples/laplace-dirichlet-3d.py           |   4 +-
 pytential/__init__.py                      |   1 +
 pytential/linalg/__init__.py               |  23 +
 pytential/linalg/proxy.py                  | 692 +++++++++++++++++++++
 pytential/source.py                        |  38 +-
 pytential/symbolic/compiler.py             |  41 +-
 pytential/symbolic/execution.py            | 412 ++++++------
 pytential/symbolic/mappers.py              |  22 +-
 pytential/symbolic/matrix.py               | 579 ++++++++++++++---
 pytential/symbolic/pde/maxwell/__init__.py |   2 +-
 pytential/symbolic/primitives.py           |  75 ++-
 pytential/unregularized.py                 |   6 +-
 requirements.txt                           |   4 +-
 setup.py                                   |   3 +-
 23 files changed, 1595 insertions(+), 340 deletions(-)
 create mode 100644 doc/linalg.rst
 create mode 100644 pytential/linalg/__init__.py
 create mode 100644 pytential/linalg/proxy.py

diff --git a/README.rst b/README.rst
index bababc0a..dfc5fe49 100644
--- a/README.rst
+++ b/README.rst
@@ -11,6 +11,9 @@ potentials (and, sooner or later, volume potentials).
 It also knows how to set up meshes and solve integral
 equations.
 
+See `here <https://documen.tician.de/pytential/misc.html#installing-pytential>`_
+for easy, self-contained installation instructions for Linux and macOS.
+
 It relies on
 
 * `numpy <http://pypi.python.org/pypi/numpy>`_ for arrays
@@ -25,11 +28,12 @@ and, indirectly,
 
 * `PyOpenCL <http://pypi.python.org/pypi/pyopencl>`_ as computational infrastructure
 
-PyOpenCL is likely the only package you'll have to install
-by hand, all the others will be installed automatically.
+.. image:: https://badge.fury.io/py/pytential.png
+    :target: http://pypi.python.org/pypi/pytential
 
 Resources:
 
+* `installation instructions <https://documen.tician.de/pytential/misc.html#installing-pytential>`_
 * `documentation <http://documen.tician.de/pytential>`_
 * `wiki home page <http://wiki.tiker.net/Pytential>`_
 * `source code via git <http://github.com/inducer/pytential>`_
diff --git a/doc/conf.py b/doc/conf.py
index 81b15205..6df4af25 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -271,6 +271,7 @@ intersphinx_mapping = {
     'http://docs.python.org/': None,
     'http://documen.tician.de/boxtree/': None,
     'http://docs.scipy.org/doc/numpy/': None,
+    'http://documen.tician.de/meshmode/': None,
     'http://documen.tician.de/modepy/': None,
     'http://documen.tician.de/pyopencl/': None,
     'http://documen.tician.de/pytools/': None,
diff --git a/doc/discretization.rst b/doc/discretization.rst
index ad2eb738..ac4ca526 100644
--- a/doc/discretization.rst
+++ b/doc/discretization.rst
@@ -16,7 +16,7 @@ and you can start computing.
 .. automodule:: pytential.qbx
 
 Unregularized discretization
--------
+----------------------------
 
 .. automodule:: pytential.unregularized
 
diff --git a/doc/index.rst b/doc/index.rst
index d56c8bb5..772a936a 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -17,6 +17,7 @@ Contents
 
     discretization
     symbolic
+    linalg
     tools
     misc
 
diff --git a/doc/linalg.rst b/doc/linalg.rst
new file mode 100644
index 00000000..d5e78a6e
--- /dev/null
+++ b/doc/linalg.rst
@@ -0,0 +1,9 @@
+Linear Algebra Routines
+=======================
+
+Hierarchical Direct Solver
+--------------------------
+
+.. automodule:: pytential.linalg.proxy
+
+.. vim: sw=4:tw=75
diff --git a/doc/misc.rst b/doc/misc.rst
index 7a3d9aba..198b87da 100644
--- a/doc/misc.rst
+++ b/doc/misc.rst
@@ -31,7 +31,7 @@ This set of instructions is intended for 64-bit Linux and macOS computers.
 
 #.  ``conda config --add channels conda-forge``
 
-#.  (*macOS only*) ``conda install osx-pocl-opencl``
+#.  (*macOS only*) ``conda install osx-pocl-opencl pocl pyopencl``
 
 #.  ``conda install git pip pocl islpy pyopencl sympy pyfmmlib pytest``
 
diff --git a/doc/symbolic.rst b/doc/symbolic.rst
index e6de9534..86d580f6 100644
--- a/doc/symbolic.rst
+++ b/doc/symbolic.rst
@@ -13,6 +13,8 @@ Binding an operator to a discretization
 
 .. currentmodule:: pytential
 
+.. autoclass:: GeometryCollection
+
 .. autofunction:: bind
 
 PDE operators
diff --git a/examples/fmm-error.py b/examples/fmm-error.py
index 110ec66b..c3350786 100644
--- a/examples/fmm-error.py
+++ b/examples/fmm-error.py
@@ -78,7 +78,7 @@ def main():
 
     import matplotlib
     matplotlib.use('Agg')
-    im = fplot.show_scalar_in_matplotlib(np.log10(np.abs(err)))
+    im = fplot.show_scalar_in_matplotlib(np.log10(np.abs(err) + 1e-17))
 
     from matplotlib.colors import Normalize
     im.set_norm(Normalize(vmin=-12, vmax=0))
diff --git a/examples/helmholtz-dirichlet.py b/examples/helmholtz-dirichlet.py
index 22f8fa8a..847e5c3f 100644
--- a/examples/helmholtz-dirichlet.py
+++ b/examples/helmholtz-dirichlet.py
@@ -96,8 +96,10 @@ def main():
 
     bdry_op_sym = (-loc_sign*0.5*sigma_sym
             + sqrt_w*(
-                alpha*sym.S(kernel, inv_sqrt_w_sigma, k=sym.var("k"))
-                - sym.D(kernel, inv_sqrt_w_sigma, k=sym.var("k"))
+                alpha*sym.S(kernel, inv_sqrt_w_sigma, k=sym.var("k"),
+                    qbx_forced_limit=+1)
+                - sym.D(kernel, inv_sqrt_w_sigma, k=sym.var("k"),
+                    qbx_forced_limit="avg")
                 ))
 
     # }}}
diff --git a/examples/laplace-dirichlet-3d.py b/examples/laplace-dirichlet-3d.py
index dd4c752d..4166dddf 100644
--- a/examples/laplace-dirichlet-3d.py
+++ b/examples/laplace-dirichlet-3d.py
@@ -78,8 +78,8 @@ def main():
     cse = sym.cse
 
     sigma_sym = sym.var("sigma")
-    sqrt_w = sym.sqrt_jac_q_weight(3)
-    #sqrt_w = 1
+    #sqrt_w = sym.sqrt_jac_q_weight(3)
+    sqrt_w = 1
     inv_sqrt_w_sigma = cse(sigma_sym/sqrt_w)
 
     # -1 for interior Dirichlet
diff --git a/pytential/__init__.py b/pytential/__init__.py
index 1d07499d..d28e8bdb 100644
--- a/pytential/__init__.py
+++ b/pytential/__init__.py
@@ -25,6 +25,7 @@ THE SOFTWARE.
 import numpy as np
 
 import pytential.symbolic.primitives as sym
+from pytential.symbolic.execution import GeometryCollection # noqa
 from pytential.symbolic.execution import bind
 
 from pytools import memoize_on_first_arg
diff --git a/pytential/linalg/__init__.py b/pytential/linalg/__init__.py
new file mode 100644
index 00000000..b4a471df
--- /dev/null
+++ b/pytential/linalg/__init__.py
@@ -0,0 +1,23 @@
+from __future__ import division
+
+__copyright__ = "Copyright (C) 2018 Andreas Kloeckner"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
new file mode 100644
index 00000000..abb8e8de
--- /dev/null
+++ b/pytential/linalg/proxy.py
@@ -0,0 +1,692 @@
+from __future__ import division, absolute_import
+
+__copyright__ = "Copyright (C) 2018 Alexandru Fikl"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+
+import numpy as np
+import numpy.linalg as la
+
+import pyopencl as cl
+import pyopencl.array # noqa
+from pyopencl.array import to_device
+
+from pytools.obj_array import make_obj_array
+from pytools import memoize_method, memoize
+from sumpy.tools import BlockIndexRanges
+
+import loopy as lp
+from loopy.version import MOST_RECENT_LANGUAGE_VERSION
+
+
+__doc__ = """
+Proxy Point Generation
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: ProxyGenerator
+
+.. autofunction:: partition_by_nodes
+.. autofunction:: partition_by_elements
+.. autofunction:: partition_from_coarse
+
+.. autofunction:: gather_block_neighbor_points
+.. autofunction:: gather_block_interaction_points
+"""
+
+
+# {{{ point index partitioning
+
+def _element_node_range(group, ielement):
+    istart = group.node_nr_base + group.nunit_nodes * ielement
+    iend = group.node_nr_base + group.nunit_nodes * (ielement + 1)
+
+    return np.arange(istart, iend)
+
+
+def partition_by_nodes(discr,
+                       use_tree=True,
+                       max_nodes_in_box=None):
+    """Generate equally sized ranges of nodes. The partition is created at the
+    lowest level of granularity, i.e. nodes. This results in balanced ranges
+    of points, but will split elements across different ranges.
+
+    :arg discr: a :class:`meshmode.discretization.Discretization`.
+    :arg use_tree: if ``True``, node partitions are generated using a
+        :class:`boxtree.TreeBuilder`, which leads to geometrically close
+        points to belong to the same partition. If ``False``, a simple linear
+        partition is constructed.
+    :arg max_nodes_in_box: passed to :class:`boxtree.TreeBuilder`.
+
+    :return: a :class:`sumpy.tools.BlockIndexRanges`.
+    """
+
+    if max_nodes_in_box is None:
+        # FIXME: this is just an arbitrary value
+        max_nodes_in_box = 32
+
+    with cl.CommandQueue(discr.cl_context) as queue:
+        if use_tree:
+            from boxtree import box_flags_enum
+            from boxtree import TreeBuilder
+
+            builder = TreeBuilder(discr.cl_context)
+
+            tree, _ = builder(queue, discr.nodes(),
+                max_particles_in_box=max_nodes_in_box)
+
+            tree = tree.get(queue)
+            leaf_boxes, = (tree.box_flags &
+                           box_flags_enum.HAS_CHILDREN == 0).nonzero()
+
+            indices = np.empty(len(leaf_boxes), dtype=np.object)
+            for i, ibox in enumerate(leaf_boxes):
+                box_start = tree.box_source_starts[ibox]
+                box_end = box_start + tree.box_source_counts_cumul[ibox]
+                indices[i] = tree.user_source_ids[box_start:box_end]
+
+            ranges = to_device(queue,
+                np.cumsum([0] + [box.shape[0] for box in indices]))
+            indices = to_device(queue, np.hstack(indices))
+        else:
+            indices = cl.array.arange(queue, 0, discr.nnodes,
+                                      dtype=np.int)
+            ranges = cl.array.arange(queue, 0, discr.nnodes + 1,
+                                     discr.nnodes // max_nodes_in_box,
+                                     dtype=np.int)
+        assert ranges[-1] == discr.nnodes
+
+        return BlockIndexRanges(discr.cl_context,
+                                indices.with_queue(None),
+                                ranges.with_queue(None))
+
+
+def partition_by_elements(discr,
+                          use_tree=True,
+                          max_elements_in_box=None):
+    """Generate equally sized ranges of points. The partition is created at the
+    element level, so that all the nodes belonging to an element belong to
+    the same range. This can result in slightly larger differences in size
+    between the ranges, but can be very useful when the individual partitions
+    need to be resampled, integrated, etc.
+
+    :arg discr: a :class:`meshmode.discretization.Discretization`.
+    :arg use_tree: if ``True``, node partitions are generated using a
+        :class:`boxtree.TreeBuilder`, which leads to geometrically close
+        points to belong to the same partition. If ``False``, a simple linear
+        partition is constructed.
+    :arg max_elements_in_box: passed to :class:`boxtree.TreeBuilder`.
+
+    :return: a :class:`sumpy.tools.BlockIndexRanges`.
+    """
+
+    if max_elements_in_box is None:
+        # NOTE: keep in sync with partition_by_nodes
+        max_nodes_in_box = 32
+
+        nunit_nodes = int(np.mean([g.nunit_nodes for g in discr.groups]))
+        max_elements_in_box = max_nodes_in_box // nunit_nodes
+
+    with cl.CommandQueue(discr.cl_context) as queue:
+        if use_tree:
+            from boxtree import box_flags_enum
+            from boxtree import TreeBuilder
+
+            builder = TreeBuilder(discr.cl_context)
+
+            from pytential.qbx.utils import element_centers_of_mass
+            elranges = np.cumsum([group.nelements for group in discr.mesh.groups])
+            elcenters = element_centers_of_mass(discr)
+
+            tree, _ = builder(queue, elcenters,
+                max_particles_in_box=max_elements_in_box)
+
+            groups = discr.groups
+            tree = tree.get(queue)
+            leaf_boxes, = (tree.box_flags &
+                           box_flags_enum.HAS_CHILDREN == 0).nonzero()
+
+            indices = np.empty(len(leaf_boxes), dtype=np.object)
+            for i, ibox in enumerate(leaf_boxes):
+                box_start = tree.box_source_starts[ibox]
+                box_end = box_start + tree.box_source_counts_cumul[ibox]
+
+                ielement = tree.user_source_ids[box_start:box_end]
+                igroup = np.digitize(ielement, elranges)
+
+                indices[i] = np.hstack([_element_node_range(groups[j], k)
+                                        for j, k in zip(igroup, ielement)])
+        else:
+            nelements = discr.mesh.nelements
+            elements = np.array_split(np.arange(0, nelements),
+                                      nelements // max_elements_in_box)
+
+            elranges = np.cumsum([g.nelements for g in discr.groups])
+            elgroups = [np.digitize(elements[i], elranges)
+                        for i in range(len(elements))]
+
+            indices = np.empty(len(elements), dtype=np.object)
+            for i in range(indices.shape[0]):
+                indices[i] = np.hstack([_element_node_range(discr.groups[j], k)
+                                        for j, k in zip(elgroups[i], elements[i])])
+
+        ranges = to_device(queue,
+                np.cumsum([0] + [b.shape[0] for b in indices]))
+        indices = to_device(queue, np.hstack(indices))
+        assert ranges[-1] == discr.nnodes
+
+        return BlockIndexRanges(discr.cl_context,
+                                indices.with_queue(None),
+                                ranges.with_queue(None))
+
+
+def partition_from_coarse(resampler, from_indices):
+    """Generate a partition of nodes from an existing partition on a
+    coarser discretization. The new partition is generated based on element
+    refinement relationships in *resampler*, so the existing partition
+    needs to be created using :func:`partition_by_elements`,
+    since we assume that each range contains all the nodes in an element.
+
+    The new partition will have the same number of ranges as the old partition.
+    The nodes inside each range in the new partition are all the nodes in
+    *resampler.to_discr* that were refined from elements in the same
+    range from *resampler.from_discr*.
+
+    :arg resampler: a
+        :class:`meshmode.discretization.connection.DirectDiscretizationConnection`.
+    :arg from_indices: a :class:`sumpy.tools.BlockIndexRanges`.
+
+    :return: a :class:`sumpy.tools.BlockIndexRanges`.
+    """
+
+    if not hasattr(resampler, "groups"):
+        raise ValueError("resampler must be a DirectDiscretizationConnection.")
+
+    with cl.CommandQueue(resampler.cl_context) as queue:
+        from_indices = from_indices.get(queue)
+
+        # construct ranges
+        from_discr = resampler.from_discr
+        from_grp_ranges = np.cumsum([0] +
+                [grp.nelements for grp in from_discr.mesh.groups])
+        from_el_ranges = np.hstack([
+            np.arange(grp.node_nr_base, grp.nnodes + 1, grp.nunit_nodes)
+            for grp in from_discr.groups])
+
+        # construct coarse element arrays in each from_range
+        el_indices = np.empty(from_indices.nblocks, dtype=np.object)
+        el_ranges = np.full(from_grp_ranges[-1], -1, dtype=np.int)
+        for i in range(from_indices.nblocks):
+            ifrom = from_indices.block_indices(i)
+            el_indices[i] = np.unique(np.digitize(ifrom, from_el_ranges)) - 1
+            el_ranges[el_indices[i]] = i
+        el_indices = np.hstack(el_indices)
+
+        # construct lookup table
+        to_el_table = [np.full(g.nelements, -1, dtype=np.int)
+                       for g in resampler.to_discr.groups]
+
+        for igrp, grp in enumerate(resampler.groups):
+            for batch in grp.batches:
+                to_el_table[igrp][batch.to_element_indices.get(queue)] = \
+                    from_grp_ranges[igrp] + batch.from_element_indices.get(queue)
+
+        # construct fine node index list
+        indices = [np.empty(0, dtype=np.int)
+                   for _ in range(from_indices.nblocks)]
+        for igrp in range(len(resampler.groups)):
+            to_element_indices = \
+                    np.where(np.isin(to_el_table[igrp], el_indices))[0]
+
+            for i, j in zip(el_ranges[to_el_table[igrp][to_element_indices]],
+                            to_element_indices):
+                indices[i] = np.hstack([indices[i],
+                    _element_node_range(resampler.to_discr.groups[igrp], j)])
+
+        ranges = to_device(queue,
+                np.cumsum([0] + [b.shape[0] for b in indices]))
+        indices = to_device(queue, np.hstack(indices))
+
+        return BlockIndexRanges(resampler.cl_context,
+                                indices.with_queue(None),
+                                ranges.with_queue(None))
+
+# }}}
+
+
+# {{{ proxy point generator
+
+def _generate_unit_sphere(ambient_dim, approx_npoints):
+    """Generate uniform points on a unit sphere.
+
+    :arg ambient_dim: dimension of the ambient space.
+    :arg approx_npoints: approximate number of points to generate. If the
+        ambient space is 3D, this will not generate the exact number of points.
+    :return: array of shape ``(ambient_dim, npoints)``, where ``npoints``
+        will not generally be the same as ``approx_npoints``.
+    """
+
+    if ambient_dim == 2:
+        t = np.linspace(0.0, 2.0 * np.pi, approx_npoints)
+        points = np.vstack([np.cos(t), np.sin(t)])
+    elif ambient_dim == 3:
+        # https://www.cmu.edu/biolphys/deserno/pdf/sphere_equi.pdf
+        # code by Matt Wala from
+        # https://github.com/mattwala/gigaqbx-accuracy-experiments/blob/d56ed063ffd7843186f4fe05d2a5b5bfe6ef420c/translation_accuracy.py#L23
+        a = 4.0 * np.pi / approx_npoints
+        m_theta = int(np.round(np.pi / np.sqrt(a)))
+        d_theta = np.pi / m_theta
+        d_phi = a / d_theta
+
+        points = []
+        for m in range(m_theta):
+            theta = np.pi * (m + 0.5) / m_theta
+            m_phi = int(np.round(2.0 * np.pi * np.sin(theta) / d_phi))
+
+            for n in range(m_phi):
+                phi = 2.0 * np.pi * n / m_phi
+                points.append(np.array([np.sin(theta) * np.cos(phi),
+                                        np.sin(theta) * np.sin(phi),
+                                        np.cos(theta)]))
+
+        for i in range(ambient_dim):
+            for sign in [-1, 1]:
+                pole = np.zeros(ambient_dim)
+                pole[i] = sign
+                points.append(pole)
+
+        points = np.array(points).T
+    else:
+        raise ValueError("ambient_dim > 3 not supported.")
+
+    return points
+
+
+class ProxyGenerator(object):
+    r"""
+    .. attribute:: ambient_dim
+    .. attribute:: nproxy
+
+        Number of proxy points in a single proxy ball.
+
+    .. attribute:: source
+
+        A :class:`pytential.qbx.QBXLayerPotentialSource`.
+
+    .. attribute:: ratio
+
+        A ratio used to compute the proxy ball radius. The radius
+        is computed in the :math:`\ell^2` norm, resulting in a circle or
+        sphere of proxy points. For QBX, we have two radii of interest
+        for a set of points: the radius :math:`r_{block}` of the
+        smallest ball containing all the points and the radius
+        :math:`r_{qbx}` of the smallest ball containing all the QBX
+        expansion balls in the block. If the ratio :math:`\theta \in
+        [0, 1]`, then the radius of the proxy ball is
+
+        .. math::
+
+            r = (1 - \theta) r_{block} + \theta r_{qbx}.
+
+        If the ratio :math:`\theta > 1`, the the radius is simply
+
+        .. math::
+
+            r = \theta r_{qbx}.
+
+    .. attribute:: ref_points
+
+        Reference points on a unit ball. Can be used to construct the points
+        of a proxy ball :math:`i` by translating them to ``center[i]`` and
+        scaling by ``radii[i]``, as obtained by :meth:`__call__`.
+
+    .. automethod:: __call__
+    """
+
+    def __init__(self, source, approx_nproxy=None, ratio=None):
+        self.source = source
+        self.ambient_dim = source.density_discr.ambient_dim
+        self.ratio = 1.1 if ratio is None else ratio
+
+        approx_nproxy = 32 if approx_nproxy is None else approx_nproxy
+        self.ref_points = \
+                _generate_unit_sphere(self.ambient_dim, approx_nproxy)
+
+    @property
+    def nproxy(self):
+        return self.ref_points.shape[1]
+
+    @memoize_method
+    def get_kernel(self):
+        if self.ratio < 1.0:
+            radius_expr = "(1.0 - {ratio}) * rblk + {ratio} * rqbx"
+        else:
+            radius_expr = "{ratio} * rqbx"
+        radius_expr = radius_expr.format(ratio=self.ratio)
+
+        # NOTE: centers of mass are computed using a second-order approximation
+        # that currently matches what is in `element_centers_of_mass`.
+        knl = lp.make_kernel([
+            "{[irange]: 0 <= irange < nranges}",
+            "{[i]: 0 <= i < npoints}",
+            "{[idim]: 0 <= idim < dim}"
+            ],
+            ["""
+            for irange
+                <> ioffset = srcranges[irange]
+                <> npoints = srcranges[irange + 1] - srcranges[irange]
+
+                proxy_center[idim, irange] = 1.0 / npoints * \
+                    reduce(sum, i, sources[idim, srcindices[i + ioffset]]) \
+                        {{dup=idim:i}}
+
+                <> rblk = simul_reduce(max, i, sqrt(simul_reduce(sum, idim, \
+                        (proxy_center[idim, irange] -
+                         sources[idim, srcindices[i + ioffset]]) ** 2)))
+
+                <> rqbx_int = simul_reduce(max, i, sqrt(simul_reduce(sum, idim, \
+                        (proxy_center[idim, irange] -
+                         center_int[idim, srcindices[i + ioffset]]) ** 2)) + \
+                         expansion_radii[srcindices[i + ioffset]])
+                <> rqbx_ext = simul_reduce(max, i, sqrt(simul_reduce(sum, idim, \
+                        (proxy_center[idim, irange] -
+                         center_ext[idim, srcindices[i + ioffset]]) ** 2)) + \
+                         expansion_radii[srcindices[i + ioffset]])
+                <> rqbx = if(rqbx_ext < rqbx_int, rqbx_int, rqbx_ext)
+
+                proxy_radius[irange] = {radius_expr}
+            end
+            """.format(radius_expr=radius_expr)],
+            [
+                lp.GlobalArg("sources", None,
+                    shape=(self.ambient_dim, "nsources")),
+                lp.GlobalArg("center_int", None,
+                    shape=(self.ambient_dim, "nsources"), dim_tags="sep,C"),
+                lp.GlobalArg("center_ext", None,
+                    shape=(self.ambient_dim, "nsources"), dim_tags="sep,C"),
+                lp.GlobalArg("proxy_center", None,
+                    shape=(self.ambient_dim, "nranges")),
+                lp.GlobalArg("proxy_radius", None,
+                    shape="nranges"),
+                lp.ValueArg("nsources", np.int),
+                "..."
+            ],
+            name="find_proxy_radii_knl",
+            assumptions="dim>=1 and nranges>=1",
+            fixed_parameters=dict(dim=self.ambient_dim),
+            lang_version=MOST_RECENT_LANGUAGE_VERSION)
+
+        knl = lp.tag_inames(knl, "idim*:unr")
+
+        return knl
+
+    @memoize_method
+    def get_optimized_kernel(self):
+        knl = self.get_kernel()
+        knl = lp.split_iname(knl, "irange", 128, outer_tag="g.0")
+
+        return knl
+
+    def __call__(self, queue, indices, **kwargs):
+        """Generate proxy points for each given range of source points in
+        the discretization in :attr:`source`.
+
+        :arg queue: a :class:`pyopencl.CommandQueue`.
+        :arg indices: a :class:`sumpy.tools.BlockIndexRanges`.
+
+        :return: a tuple of ``(proxies, pxyranges, pxycenters, pxyranges)``,
+            where each element is a :class:`pyopencl.array.Array`. The
+            sizes of the arrays are as follows: ``pxycenters`` is of size
+            ``(2, nranges)``, ``pxyradii`` is of size ``(nranges,)``,
+            ``pxyranges`` is of size ``(nranges + 1,)`` and ``proxies`` is
+            of size ``(2, nranges * nproxy)``. The proxy points in a range
+            :math:`i` can be obtained by a slice
+            ``proxies[pxyranges[i]:pxyranges[i + 1]]`` and are all at a
+            distance ``pxyradii[i]`` from the range center ``pxycenters[i]``.
+        """
+
+        def _affine_map(v, A, b):
+            return np.dot(A, v) + b
+
+        from pytential.qbx.utils import get_centers_on_side
+
+        knl = self.get_kernel()
+        _, (centers_dev, radii_dev,) = knl(queue,
+            sources=self.source.density_discr.nodes(),
+            center_int=get_centers_on_side(self.source, -1),
+            center_ext=get_centers_on_side(self.source, +1),
+            expansion_radii=self.source._expansion_radii("nsources"),
+            srcindices=indices.indices,
+            srcranges=indices.ranges, **kwargs)
+        centers = centers_dev.get()
+        radii = radii_dev.get()
+
+        proxies = np.empty(indices.nblocks, dtype=np.object)
+        for i in range(indices.nblocks):
+            proxies[i] = _affine_map(self.ref_points,
+                    A=(radii[i] * np.eye(self.ambient_dim)),
+                    b=centers[:, i].reshape(-1, 1))
+
+        pxyranges = cl.array.arange(queue,
+                0,
+                proxies.shape[0] * proxies[0].shape[1] + 1,
+                proxies[0].shape[1],
+                dtype=indices.ranges.dtype)
+        proxies = make_obj_array([
+            cl.array.to_device(queue, np.hstack([p[idim] for p in proxies]))
+            for idim in range(self.ambient_dim)])
+        centers = make_obj_array([
+            centers_dev[idim].with_queue(queue).copy()
+            for idim in range(self.ambient_dim)])
+
+        assert pxyranges[-1] == proxies[0].shape[0]
+        return proxies, pxyranges, centers, radii_dev
+
+
+def gather_block_neighbor_points(discr, indices, pxycenters, pxyradii,
+                                 max_nodes_in_box=None):
+    """Generate a set of neighboring points for each range of points in
+    *discr*. Neighboring points of a range :math:`i` are defined
+    as all the points inside the proxy ball :math:`i` that do not also
+    belong to the range itself.
+
+    :arg discr: a :class:`meshmode.discretization.Discretization`.
+    :arg indices: a :class:`sumpy.tools.BlockIndexRanges`.
+    :arg pxycenters: an array containing the center of each proxy ball.
+    :arg pxyradii: an array containing the radius of each proxy ball.
+
+    :return: a :class:`sumpy.tools.BlockIndexRanges`.
+    """
+
+    if max_nodes_in_box is None:
+        # FIXME: this is a fairly arbitrary value
+        max_nodes_in_box = 32
+
+    with cl.CommandQueue(discr.cl_context) as queue:
+        indices = indices.get(queue)
+
+        # NOTE: this is constructed for multiple reasons:
+        #   * TreeBuilder takes object arrays
+        #   * `srcindices` can be a small subset of nodes, so this will save
+        #   some work
+        #   * `srcindices` may reorder the array returned by nodes(), so this
+        #   makes sure that we have the same order in tree.user_source_ids
+        #   and friends
+        sources = discr.nodes().get(queue)
+        sources = make_obj_array([
+            cl.array.to_device(queue, sources[idim, indices.indices])
+            for idim in range(discr.ambient_dim)])
+
+        # construct tree
+        from boxtree import TreeBuilder
+        builder = TreeBuilder(discr.cl_context)
+        tree, _ = builder(queue, sources,
+                          max_particles_in_box=max_nodes_in_box)
+
+        from boxtree.area_query import AreaQueryBuilder
+        builder = AreaQueryBuilder(discr.cl_context)
+        query, _ = builder(queue, tree, pxycenters, pxyradii)
+
+        # find nodes inside each proxy ball
+        tree = tree.get(queue)
+        query = query.get(queue)
+
+        if isinstance(pxycenters[0], cl.array.Array):
+            pxycenters = np.vstack([pxycenters[idim].get(queue)
+                                    for idim in range(discr.ambient_dim)])
+        if isinstance(pxyradii, cl.array.Array):
+            pxyradii = pxyradii.get(queue)
+
+        nbrindices = np.empty(indices.nblocks, dtype=np.object)
+        for iproxy in range(indices.nblocks):
+            # get list of boxes intersecting the current ball
+            istart = query.leaves_near_ball_starts[iproxy]
+            iend = query.leaves_near_ball_starts[iproxy + 1]
+            iboxes = query.leaves_near_ball_lists[istart:iend]
+
+            # get nodes inside the boxes
+            istart = tree.box_source_starts[iboxes]
+            iend = istart + tree.box_source_counts_cumul[iboxes]
+            isources = np.hstack([np.arange(s, e)
+                                  for s, e in zip(istart, iend)])
+            nodes = np.vstack([tree.sources[idim][isources]
+                               for idim in range(discr.ambient_dim)])
+            isources = tree.user_source_ids[isources]
+
+            # get nodes inside the ball but outside the current range
+            center = pxycenters[:, iproxy].reshape(-1, 1)
+            radius = pxyradii[iproxy]
+            mask = (la.norm(nodes - center, axis=0) < radius) & \
+                   ((isources < indices.ranges[iproxy]) |
+                    (indices.ranges[iproxy + 1] <= isources))
+
+            nbrindices[iproxy] = indices.indices[isources[mask]]
+
+        nbrranges = to_device(queue,
+                np.cumsum([0] + [n.shape[0] for n in nbrindices]))
+        nbrindices = to_device(queue, np.hstack(nbrindices))
+
+        return BlockIndexRanges(discr.cl_context,
+                                nbrindices.with_queue(None),
+                                nbrranges.with_queue(None))
+
+
+def gather_block_interaction_points(source, indices,
+                                    ratio=None,
+                                    approx_nproxy=None,
+                                    max_nodes_in_box=None):
+    """Generate sets of interaction points for each given range of indices
+    in the *source* discretization. For each input range of indices,
+    the corresponding output range of points is consists of:
+
+    - a set of proxy points (or balls) around the range, which
+      model farfield interactions. These are constructed using
+      :class:`ProxyGenerator`.
+
+    - a set of neighboring points that are inside the proxy balls, but
+      do not belong to the given range, which model nearby interactions.
+      These are constructed with :func:`gather_block_neighbor_points`.
+
+    :arg source: a :class:`pytential.qbx.QBXLayerPotentialSource`.
+    :arg indices: a :class:`sumpy.tools.BlockIndexRanges`.
+
+    :return: a tuple ``(nodes, ranges)``, where each value is a
+        :class:`pyopencl.array.Array`. For a range :math:`i`, we can
+        get the slice using ``nodes[ranges[i]:ranges[i + 1]]``.
+    """
+
+    @memoize
+    def knl():
+        loopy_knl = lp.make_kernel([
+            "{[irange, idim]: 0 <= irange < nranges and \
+                              0 <= idim < dim}",
+            "{[ipxy, ingb]: 0 <= ipxy < npxyblock and \
+                            0 <= ingb < nngbblock}"
+            ],
+            """
+            for irange
+                <> pxystart = pxyranges[irange]
+                <> pxyend = pxyranges[irange + 1]
+                <> npxyblock = pxyend - pxystart
+
+                <> ngbstart = nbrranges[irange]
+                <> ngbend = nbrranges[irange + 1]
+                <> nngbblock = ngbend - ngbstart
+
+                <> istart = pxyranges[irange] + nbrranges[irange]
+                nodes[idim, istart + ipxy] = \
+                    proxies[idim, pxystart + ipxy] \
+                    {id_prefix=write_pxy,nosync=write_ngb}
+                nodes[idim, istart + npxyblock + ingb] = \
+                    sources[idim, nbrindices[ngbstart + ingb]] \
+                    {id_prefix=write_ngb,nosync=write_pxy}
+                ranges[irange + 1] = ranges[irange] + npxyblock + nngbblock
+            end
+            """,
+            [
+                lp.GlobalArg("sources", None,
+                    shape=(source.ambient_dim, "nsources")),
+                lp.GlobalArg("proxies", None,
+                    shape=(source.ambient_dim, "nproxies"), dim_tags="sep,C"),
+                lp.GlobalArg("nbrindices", None,
+                    shape="nnbrindices"),
+                lp.GlobalArg("nodes", None,
+                    shape=(source.ambient_dim, "nproxies + nnbrindices")),
+                lp.ValueArg("nsources", np.int),
+                lp.ValueArg("nproxies", np.int),
+                lp.ValueArg("nnbrindices", np.int),
+                "..."
+            ],
+            name="concat_proxy_and_neighbors",
+            default_offset=lp.auto,
+            silenced_warnings="write_race(write_*)",
+            fixed_parameters=dict(dim=source.ambient_dim),
+            lang_version=MOST_RECENT_LANGUAGE_VERSION)
+
+        loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
+        loopy_knl = lp.split_iname(loopy_knl, "irange", 128, outer_tag="g.0")
+
+        return loopy_knl
+
+    with cl.CommandQueue(source.cl_context) as queue:
+        generator = ProxyGenerator(source,
+                                   ratio=ratio,
+                                   approx_nproxy=approx_nproxy)
+        proxies, pxyranges, pxycenters, pxyradii = generator(queue, indices)
+
+        neighbors = gather_block_neighbor_points(source.density_discr,
+                indices, pxycenters, pxyradii,
+                max_nodes_in_box=max_nodes_in_box)
+
+        ranges = cl.array.zeros(queue, indices.nblocks + 1, dtype=np.int)
+        _, (nodes, ranges) = knl()(queue,
+                sources=source.density_discr.nodes(),
+                proxies=proxies,
+                pxyranges=pxyranges,
+                nbrindices=neighbors.indices,
+                nbrranges=neighbors.ranges,
+                ranges=ranges)
+
+        return nodes.with_queue(None), ranges.with_queue(None)
+
+# }}}
+
+# vim: foldmethod=marker
diff --git a/pytential/source.py b/pytential/source.py
index 6420494e..9334dff7 100644
--- a/pytential/source.py
+++ b/pytential/source.py
@@ -38,7 +38,7 @@ __doc__ = """
 
 class PotentialSource(object):
     """
-    .. method:: preprocess_optemplate(name, expr)
+    .. automethod:: preprocess_optemplate
 
     .. method:: op_group_features(expr)
 
@@ -49,29 +49,43 @@ class PotentialSource(object):
         :class:`pytential.symbolic.primitives.IntG`.
     """
 
+    def preprocess_optemplate(self, name, discretizations, expr):
+        return expr
+
 
 # {{{ point potential source
 
 class PointPotentialSource(PotentialSource):
     """
-    .. attribute:: points
+    .. attribute:: nodes
 
-        An :class:`pyopencl.array.Array` of shape ``[ambient_dim, npoints]``.
+        An :class:`pyopencl.array.Array` of shape ``[ambient_dim, nnodes]``.
 
     .. attribute:: nnodes
     """
 
-    def __init__(self, cl_context, points):
+    def __init__(self, cl_context, nodes):
         self.cl_context = cl_context
-        self.points = points
+        self._nodes = nodes
+
+    @property
+    def points(self):
+        from warnings import warn
+        warn("'points' has been renamed to nodes().",
+             DeprecationWarning, stacklevel=2)
+
+        return self._nodes
+
+    def nodes(self):
+        return self._nodes
 
     @property
     def real_dtype(self):
-        return self.points.dtype
+        return self._nodes.dtype
 
     @property
     def nnodes(self):
-        return self.points.shape[-1]
+        return self._nodes.shape[-1]
 
     @property
     def complex_dtype(self):
@@ -82,7 +96,7 @@ class PointPotentialSource(PotentialSource):
 
     @property
     def ambient_dim(self):
-        return self.points.shape[0]
+        return self._nodes.shape[0]
 
     def op_group_features(self, expr):
         from sumpy.kernel import AxisTargetDerivativeRemover
@@ -129,17 +143,17 @@ class PointPotentialSource(PotentialSource):
                 p2p = self.get_p2p(insn.kernels)
 
             evt, output_for_each_kernel = p2p(queue,
-                    target_discr.nodes(), self.points,
+                    target_discr.nodes(), self._nodes,
                     [strengths], **kernel_args)
 
             result.append((o.name, output_for_each_kernel[o.kernel_index]))
 
-        return result, []
+        return result
 
     @memoize_method
     def weights_and_area_elements(self):
         with cl.CommandQueue(self.cl_context) as queue:
-            result = cl.array.empty(queue, self.points.shape[-1],
+            result = cl.array.empty(queue, self._nodes.shape[-1],
                     dtype=self.real_dtype)
             result.fill(1)
 
@@ -268,3 +282,5 @@ class LayerPotentialSourceBase(PotentialSource):
     # }}}
 
 # }}}
+
+# vim: foldmethod=marker
diff --git a/pytential/symbolic/compiler.py b/pytential/symbolic/compiler.py
index 17d23ebf..456044e7 100644
--- a/pytential/symbolic/compiler.py
+++ b/pytential/symbolic/compiler.py
@@ -315,7 +315,7 @@ class Code(object):
 
         return "\n".join(lines)
 
-    # {{{ dynamic scheduler (generates static schedules by self-observation)
+    # {{{ dynamic scheduler
 
     class NoInstructionAvailable(Exception):
         pass
@@ -372,38 +372,35 @@ class Code(object):
         done_insns = set()
 
         while True:
-            insn = None
             discardable_vars = []
+            insn = None
 
-            # pick the next insn
-            if insn is None:
-                try:
-                    insn, discardable_vars = self.get_next_step(
-                            frozenset(list(context.keys())),
-                            frozenset(done_insns))
+            try:
+                insn, discardable_vars = self.get_next_step(
+                        frozenset(list(context.keys())),
+                        frozenset(done_insns))
 
-                except self.NoInstructionAvailable:
-                    # no available instructions: we're done
-                    break
-                else:
-                    for name in discardable_vars:
-                        del context[name]
+            except self.NoInstructionAvailable:
+                # no available instructions: we're done
+                break
+            else:
+                for name in discardable_vars:
+                    del context[name]
 
-                    done_insns.add(insn)
-                    assignments, new_futures = (
-                            insn.get_exec_function(exec_mapper)
-                            (exec_mapper.queue, insn, exec_mapper.bound_expr,
-                                exec_mapper))
+                done_insns.add(insn)
+                assignments = (
+                        insn.get_exec_function(exec_mapper)
+                        (exec_mapper.queue, insn, exec_mapper.bound_expr,
+                            exec_mapper))
 
-            if insn is not None:
+                assignees = insn.get_assignees()
                 for target, value in assignments:
                     if pre_assign_check is not None:
                         pre_assign_check(target, value)
 
+                    assert target in assignees
                     context[target] = value
 
-                assert not new_futures
-
         if len(done_insns) < len(self.instructions):
             print("Unreachable instructions:")
             for insn in set(self.instructions) - done_insns:
diff --git a/pytential/symbolic/execution.py b/pytential/symbolic/execution.py
index f658a6aa..976e352f 100644
--- a/pytential/symbolic/execution.py
+++ b/pytential/symbolic/execution.py
@@ -1,6 +1,9 @@
 from __future__ import division, absolute_import
 
-__copyright__ = "Copyright (C) 2013 Andreas Kloeckner"
+__copyright__ = """
+Copyright (C) 2013 Andreas Kloeckner
+Copyright (C) 2018 Alexandru Fikl
+"""
 
 __license__ = """
 Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -36,6 +39,9 @@ import pyopencl.clmath  # noqa
 from loopy.version import MOST_RECENT_LANGUAGE_VERSION
 
 from pytools import memoize_in
+from pytential.symbolic.primitives import DEFAULT_SOURCE, DEFAULT_TARGET
+from pytential.symbolic.primitives import (
+    QBXSourceStage1, QBXSourceStage2, QBXSourceQuadStage2)
 
 
 # FIXME caches: fix up queues
@@ -173,7 +179,7 @@ class EvaluationMapper(EvaluationMapperBase):
 
     def exec_assign(self, queue, insn, bound_expr, evaluate):
         return [(name, evaluate(expr))
-                for name, expr in zip(insn.names, insn.exprs)], []
+                for name, expr in zip(insn.names, insn.exprs)]
 
     # {{{ functions
 
@@ -278,54 +284,194 @@ class MatVecOp:
 # }}}
 
 
-# {{{ default for 'domains' parameter
+# {{{ expression prep
+
+def _prepare_domains(nresults, places, domains, default_domain):
+    """
+    :arg nresults: number of results.
+    :arg places: a :class:`pytential.symbolic.execution.GeometryCollection`.
+    :arg domains: recommended domains.
+    :arg default_domain: default value for domains which are not provided.
+
+    :return: a list of domains for each result. If domains is `None`, each
+        element in the list is *default_domain*. If *domains* is a scalar
+        (i.e., not a *list* or *tuple*), each element in the list is
+        *domains*. Otherwise, *domains* is returned as is.
+    """
 
-def _domains_default(nresults, places, domains, default_val):
     if domains is None:
-        if default_val not in places:
+        if default_domain not in places:
             raise RuntimeError("'domains is None' requires "
-                    "default domain to be defined")
-        dom_name = default_val
-        return nresults*[dom_name]
-
+                               "default domain to be defined in places")
+        dom_name = default_domain
+        return nresults * [dom_name]
     elif not isinstance(domains, (list, tuple)):
         dom_name = domains
-        return nresults*[dom_name]
+        return nresults * [dom_name]
+
+    assert len(domains) == nresults
+    return domains
+
+
+def _prepare_expr(places, expr):
+    """
+    :arg places: :class:`pytential.symbolic.execution.GeometryCollection`.
+    :arg expr: a symbolic expression.
+    :return: processed symbolic expressions, tagged with the appropriate
+        `where` identifier from places, etc.
+    """
+
+    from pytential.source import LayerPotentialSourceBase
+    from pytential.symbolic.mappers import (
+            ToTargetTagger, DerivativeBinder)
 
-    else:
-        return domains
+    expr = ToTargetTagger(*places._default_place_ids)(expr)
+    expr = DerivativeBinder()(expr)
+
+    for name, place in six.iteritems(places.places):
+        if isinstance(place, LayerPotentialSourceBase):
+            expr = place.preprocess_optemplate(name, places, expr)
+
+    return expr
 
 # }}}
 
 
 # {{{ bound expression
 
-class BoundExpression:
-    def __init__(self, optemplate, places):
-        self.optemplate = optemplate
-        self.places = places
+class GeometryCollection(object):
+    """A mapping from symbolic identifiers ("place IDs", typically strings)
+    to 'geometries', where a geometry can be a
+    :class:`pytential.source.PotentialSource`
+    or a :class:`pytential.target.TargetBase`.
+    This class is meant to hold a specific combination of sources and targets
+    serve to host caches of information derived from them, e.g. FMM trees
+    of subsets of them, as well as related common subexpressions such as
+    metric terms.
+
+    .. method:: __getitem__
+    .. method:: get_discretization
+    .. method:: get_cache
+    """
+
+    def __init__(self, places, auto_where=None):
+        """
+        :arg places: a scalar, tuple of or mapping of symbolic names to
+            geometry objects. Supported objects are
+            :class:`~pytential.source.PotentialSource`,
+            :class:`~potential.target.TargetBase` and
+            :class:`~meshmode.discretization.Discretization`.
+        :arg auto_where: location identifier for each geometry object, used
+            to denote specific discretizations, e.g. in the case where
+            *places* is a :class:`~pytential.source.LayerPotentialSourceBase`.
+            By default, we assume
+            :class:`~pytential.symbolic.primitives.DEFAULT_SOURCE` and
+            :class:`~pytential.symbolic.primitives.DEFAULT_TARGET` for
+            sources and targets, respectively.
+        """
+
+        from pytential.target import TargetBase
+        from meshmode.discretization import Discretization
+        from pytential.source import LayerPotentialSourceBase, PotentialSource
+
+        if auto_where is None:
+            source_where, target_where = DEFAULT_SOURCE, DEFAULT_TARGET
+        else:
+            # NOTE: keeping this here to make sure auto_where unpacks into
+            # just the two elements
+            source_where, target_where = auto_where
+
+        self._default_source_place = source_where
+        self._default_target_place = target_where
+        self._default_place_ids = (source_where, target_where)
+
+        self.places = {}
+        if isinstance(places, LayerPotentialSourceBase):
+            self.places[source_where] = places
+            self.places[target_where] = \
+                    self._get_lpot_discretization(target_where, places)
+        elif isinstance(places, (Discretization, TargetBase)):
+            self.places[target_where] = places
+        elif isinstance(places, tuple):
+            source_discr, target_discr = places
+            self.places[source_where] = source_discr
+            self.places[target_where] = target_discr
+        else:
+            self.places = places.copy()
+
+        for p in six.itervalues(self.places):
+            if not isinstance(p, (PotentialSource, TargetBase, Discretization)):
+                raise TypeError("Must pass discretization, targets or "
+                        "layer potential sources as 'places'.")
 
         self.caches = {}
 
-        from pytential.symbolic.compiler import OperatorCompiler
-        self.code = OperatorCompiler(self.places)(optemplate)
+    def _get_lpot_discretization(self, where, lpot):
+        from pytential.source import LayerPotentialSourceBase
+        if not isinstance(lpot, LayerPotentialSourceBase):
+            return lpot
+
+        from pytential.symbolic.primitives import _QBXSource
+        if not isinstance(where, _QBXSource):
+            where = QBXSourceStage1(where)
+
+        if isinstance(where, QBXSourceStage1):
+            return lpot.density_discr
+        if isinstance(where, QBXSourceStage2):
+            return lpot.stage2_density_discr
+        if isinstance(where, QBXSourceQuadStage2):
+            return lpot.quad_stage2_density_discr
+
+        raise ValueError('unknown `where` identifier: {}'.format(type(where)))
+
+    def get_discretization(self, where):
+        """
+        :arg where: location identifier.
+
+        :return: a geometry object in the collection corresponding to the
+            key *where*. If it is a
+            :class:`~pytential.source.LayerPotentialSourceBase`, we look for
+            the corresponding :class:`~meshmode.discretization.Discretization`
+            in its attributes instead.
+        """
+
+        if where in self.places:
+            lpot = self.places[where]
+        else:
+            lpot = self.places.get(getattr(where, 'where', None), None)
+
+        if lpot is None:
+            raise KeyError('`where` not in the collection: {}'.format(where))
+
+        return self._get_lpot_discretization(where, lpot)
+
+    def __getitem__(self, where):
+        return self.places[where]
+
+    def __contains__(self, where):
+        return where in self.places
+
+    def copy(self):
+        return GeometryCollection(self.places, auto_where=self.where)
 
     def get_cache(self, name):
         return self.caches.setdefault(name, {})
 
-    def get_discretization(self, where):
-        from pytential.symbolic.primitives import _QBXSourceStage2
-        if isinstance(where, _QBXSourceStage2):
-            lpot_source = self.places[where.where]
-            return lpot_source.stage2_density_discr
 
-        discr = self.places[where]
+class BoundExpression(object):
+    def __init__(self, places, sym_op_expr):
+        self.places = places
+        self.sym_op_expr = sym_op_expr
+        self.caches = {}
 
-        from pytential.source import LayerPotentialSourceBase
-        if isinstance(discr, LayerPotentialSourceBase):
-            discr = discr.density_discr
+        from pytential.symbolic.compiler import OperatorCompiler
+        self.code = OperatorCompiler(self.places)(sym_op_expr)
 
-        return discr
+    def get_cache(self, name):
+        return self.places.get_cache(name)
+
+    def get_discretization(self, where):
+        return self.places.get_discretization(where)
 
     def scipy_op(self, queue, arg_name, dtype, domains=None, **extra_args):
         """
@@ -343,8 +489,7 @@ class BoundExpression:
         else:
             nresults = 1
 
-        from pytential.symbolic.primitives import DEFAULT_TARGET
-        domains = _domains_default(nresults, self.places, domains,
+        domains = _prepare_domains(nresults, self.places, domains,
                 DEFAULT_TARGET)
 
         total_dofs = 0
@@ -370,117 +515,74 @@ class BoundExpression:
         exec_mapper = EvaluationMapper(self, queue, args)
         return self.code.execute(exec_mapper)
 
-# }}}
-
-
-# {{{ expression prep
-
-def prepare_places(places):
-    from pytential.symbolic.primitives import DEFAULT_SOURCE, DEFAULT_TARGET
-    from meshmode.discretization import Discretization
-    from pytential.source import LayerPotentialSourceBase
-    from pytential.target import TargetBase
-
-    if isinstance(places, LayerPotentialSourceBase):
-        places = {
-                DEFAULT_SOURCE: places,
-                DEFAULT_TARGET: places.density_discr,
-                }
-    elif isinstance(places, (Discretization, TargetBase)):
-        places = {
-                DEFAULT_TARGET: places,
-                }
-
-    elif isinstance(places, tuple):
-        source_discr, target_discr = places
-        places = {
-                DEFAULT_SOURCE: source_discr,
-                DEFAULT_TARGET: target_discr,
-                }
-        del source_discr
-        del target_discr
-
-    def cast_to_place(discr):
-        from pytential.target import TargetBase
-        from pytential.source import PotentialSource
-        if not isinstance(discr, (Discretization, TargetBase, PotentialSource)):
-            raise TypeError("must pass discretizations, "
-                    "layer potential sources or targets as 'places'")
-        return discr
 
-    return dict(
-            (key, cast_to_place(value))
-            for key, value in six.iteritems(places))
-
-
-def prepare_expr(places, expr, auto_where=None):
+def bind(places, expr, auto_where=None):
     """
-    :arg places: result of :func:`prepare_places`
-    :arg auto_where: For simple source-to-self or source-to-target
+    :arg places: a :class:`pytential.symbolic.execution.GeometryCollection`.
+        Alternatively, any list or mapping that is a valid argument for its
+        constructor can also be used.
+    :arg auto_where: for simple source-to-self or source-to-target
         evaluations, find 'where' attributes automatically.
     """
 
-    from pytential.symbolic.primitives import DEFAULT_SOURCE, DEFAULT_TARGET
-    from pytential.source import LayerPotentialSourceBase
-
-    from pytential.symbolic.mappers import (
-            ToTargetTagger,
-            DerivativeBinder,
-            )
+    if not isinstance(places, GeometryCollection):
+        places = GeometryCollection(places, auto_where=auto_where)
 
-    if auto_where is None:
-        if DEFAULT_TARGET in places:
-            auto_where = DEFAULT_SOURCE, DEFAULT_TARGET
-        else:
-            auto_where = DEFAULT_SOURCE, DEFAULT_SOURCE
+    expr = _prepare_expr(places, expr)
 
-    if auto_where:
-        expr = ToTargetTagger(*auto_where)(expr)
+    return BoundExpression(places, expr)
 
-    expr = DerivativeBinder()(expr)
+# }}}
 
-    for name, place in six.iteritems(places):
-        if isinstance(place, LayerPotentialSourceBase):
-            expr = place.preprocess_optemplate(name, places, expr)
 
-    return expr
+# {{{ matrix building
 
-# }}}
+def _bmat(blocks, dtypes):
+    from pytools import single_valued
+    from pytential.symbolic.matrix import is_zero
 
+    nrows = blocks.shape[0]
+    ncolumns = blocks.shape[1]
 
-def bind(places, expr, auto_where=None):
-    """
-    :arg places: a mapping of symbolic names to
-        :class:`pytential.discretization.Discretization` objects or a subclass
-        of :class:`pytential.discretization.target.TargetBase`.
-    :arg auto_where: For simple source-to-self or source-to-target
-        evaluations, find 'where' attributes automatically.
-    """
-
-    places = prepare_places(places)
-    expr = prepare_expr(places, expr, auto_where=auto_where)
-    return BoundExpression(expr, places)
+    # "block row starts"/"block column starts"
+    brs = np.cumsum([0]
+            + [single_valued(blocks[ibrow, ibcol].shape[0]
+                             for ibcol in range(ncolumns)
+                             if not is_zero(blocks[ibrow, ibcol]))
+             for ibrow in range(nrows)])
+
+    bcs = np.cumsum([0]
+            + [single_valued(blocks[ibrow, ibcol].shape[1]
+                             for ibrow in range(nrows)
+                             if not is_zero(blocks[ibrow, ibcol]))
+             for ibcol in range(ncolumns)])
+
+    result = np.zeros((brs[-1], bcs[-1]),
+                      dtype=np.find_common_type(dtypes, []))
+    for ibcol in range(ncolumns):
+        for ibrow in range(nrows):
+            result[brs[ibrow]:brs[ibrow + 1], bcs[ibcol]:bcs[ibcol + 1]] = \
+                    blocks[ibrow, ibcol]
 
+    return result
 
-# {{{ matrix building
 
-def build_matrix(queue, places, expr, input_exprs, domains=None,
+def build_matrix(queue, places, exprs, input_exprs, domains=None,
         auto_where=None, context=None):
     """
-    :arg queue: a :class:`pyopencl.CommandQueue` used to synchronize
-        the calculation.
-    :arg places: a mapping of symbolic names to
-        :class:`pytential.discretization.Discretization` objects or a subclass
-        of :class:`pytential.discretization.target.TargetBase`.
-    :arg input_exprs: An object array of expressions corresponding to the
-        input block columns of the matrix.
-
-        May also be a single expression.
+    :arg queue: a :class:`pyopencl.CommandQueue`.
+    :arg places: a :class:`pytential.symbolic.execution.GeometryCollection`.
+        Alternatively, any list or mapping that is a valid argument for its
+        constructor can also be used.
+    :arg exprs: an array of expressions corresponding to the output block
+        rows of the matrix. May also be a single expression.
+    :arg input_exprs: an array of expressions corresponding to the
+        input block columns of the matrix. May also be a single expression.
     :arg domains: a list of discretization identifiers (see 'places') or
         *None* values indicating the domains on which each component of the
         solution vector lives.  *None* values indicate that the component
-        is a scalar.  If *None*,
-        :class:`pytential.symbolic.primitives.DEFAULT_TARGET`, is required
+        is a scalar.  If *None*, *auto_where* or, if it is not provided,
+        :class:`~pytential.symbolic.primitives.DEFAULT_SOURCE` is required
         to be a key in :attr:`places`.
     :arg auto_where: For simple source-to-self or source-to-target
         evaluations, find 'where' attributes automatically.
@@ -489,84 +591,48 @@ def build_matrix(queue, places, expr, input_exprs, domains=None,
     if context is None:
         context = {}
 
-    places = prepare_places(places)
-    expr = prepare_expr(places, expr, auto_where=auto_where)
-
     from pytools.obj_array import is_obj_array, make_obj_array
-    if not is_obj_array(expr):
-        expr = make_obj_array([expr])
+    if not isinstance(places, GeometryCollection):
+        places = GeometryCollection(places, auto_where=auto_where)
+    exprs = _prepare_expr(places, exprs)
+
+    if not is_obj_array(exprs):
+        exprs = make_obj_array([exprs])
     try:
         input_exprs = list(input_exprs)
     except TypeError:
         # not iterable, wrap in a list
         input_exprs = [input_exprs]
 
-    from pytential.symbolic.primitives import DEFAULT_SOURCE
-    domains = _domains_default(len(input_exprs), places, domains,
-            DEFAULT_SOURCE)
+    domains = _prepare_domains(len(input_exprs), places, domains,
+                               places._default_source_place)
 
-    nblock_rows = len(expr)
+    from pytential.symbolic.matrix import MatrixBuilder, is_zero
+    nblock_rows = len(exprs)
     nblock_columns = len(input_exprs)
-
     blocks = np.zeros((nblock_rows, nblock_columns), dtype=np.object)
 
-    from pytential.symbolic.matrix import MatrixBuilder, is_zero
-
     dtypes = []
-
     for ibcol in range(nblock_columns):
         mbuilder = MatrixBuilder(
                 queue,
                 dep_expr=input_exprs[ibcol],
-                other_dep_exprs=(
-                    input_exprs[:ibcol]
-                    +
-                    input_exprs[ibcol+1:]),
+                other_dep_exprs=(input_exprs[:ibcol]
+                                 + input_exprs[ibcol + 1:]),
                 dep_source=places[domains[ibcol]],
+                dep_discr=places.get_discretization(domains[ibcol]),
                 places=places,
                 context=context)
 
         for ibrow in range(nblock_rows):
-            block = mbuilder(expr[ibrow])
-
-            assert (
-                    is_zero(block)
-                    or isinstance(block, np.ndarray))
-            if isinstance(block, np.ndarray):
-                dtypes.append(block.dtype)
+            block = mbuilder(exprs[ibrow])
+            assert is_zero(block) or isinstance(block, np.ndarray)
 
             blocks[ibrow, ibcol] = block
-
-            if isinstance(block, cl.array.Array):
+            if isinstance(block, np.ndarray):
                 dtypes.append(block.dtype)
 
-    from pytools import single_valued
-
-    block_row_counts = [
-            single_valued(
-                blocks[ibrow, ibcol].shape[0]
-                for ibcol in range(nblock_columns)
-                if not is_zero(blocks[ibrow, ibcol]))
-            for ibrow in range(nblock_rows)]
-
-    block_col_counts = [
-            places[domains[ibcol]].density_discr.nnodes
-            for ibcol in range(nblock_columns)]
-
-    # "block row starts"/"block column starts"
-    brs = np.cumsum([0] + block_row_counts)
-    bcs = np.cumsum([0] + block_col_counts)
-
-    result = np.zeros(
-            (sum(block_row_counts), sum(block_col_counts)),
-            dtype=np.find_common_type(dtypes, []))
-
-    for ibcol in range(nblock_columns):
-        for ibrow in range(nblock_rows):
-            result[brs[ibrow]:brs[ibrow+1], bcs[ibcol]:bcs[ibcol+1]] = \
-                    blocks[ibrow, ibcol]
-
-    return cl.array.to_device(queue, result)
+    return cl.array.to_device(queue, _bmat(blocks, dtypes))
 
 # }}}
 
diff --git a/pytential/symbolic/mappers.py b/pytential/symbolic/mappers.py
index 9fdbaae4..00c43689 100644
--- a/pytential/symbolic/mappers.py
+++ b/pytential/symbolic/mappers.py
@@ -292,8 +292,10 @@ class ToTargetTagger(LocationTagger):
     """
 
     def __init__(self, default_source, default_target):
-        LocationTagger.__init__(self, default_target)
-        self.operand_rec = LocationTagger(default_source)
+        LocationTagger.__init__(self, default_target,
+                                default_source=default_source)
+        self.operand_rec = LocationTagger(default_source,
+                                          default_source=default_source)
 
 # }}}
 
@@ -397,19 +399,15 @@ class QBXPreprocessor(IdentityMapper):
         self.places = places
 
     def map_int_g(self, expr):
-        source = self.places[self.source_name]
-        target_discr = self.places[expr.target]
-
-        from pytential.source import LayerPotentialSourceBase
-        if isinstance(target_discr, LayerPotentialSourceBase):
-            target_discr = target_discr.density_discr
+        source_discr = self.places.get_discretization(self.source_name)
+        target_discr = self.places.get_discretization(expr.target)
 
         if expr.qbx_forced_limit == 0:
             raise ValueError("qbx_forced_limit == 0 was a bad idea and "
                     "is no longer supported. Use qbx_forced_limit == 'avg' "
                     "to request two-sided averaging explicitly if needed.")
 
-        is_self = source.density_discr is target_discr
+        is_self = source_discr is target_discr
 
         expr = expr.copy(
                 kernel=expr.kernel,
@@ -453,8 +451,12 @@ class QBXPreprocessor(IdentityMapper):
 # {{{ stringifier
 
 def stringify_where(where):
-    if isinstance(where, prim._QBXSourceStage2):
+    if isinstance(where, prim.QBXSourceStage1):
+        return "stage1(%s)" % stringify_where(where.where)
+    if isinstance(where, prim.QBXSourceStage2):
         return "stage2(%s)" % stringify_where(where.where)
+    if isinstance(where, prim.QBXSourceQuadStage2):
+        return "quad_stage2(%s)" % stringify_where(where.where)
 
     if where is None:
         return "?"
diff --git a/pytential/symbolic/matrix.py b/pytential/symbolic/matrix.py
index f0b1d82f..9d0c2b2f 100644
--- a/pytential/symbolic/matrix.py
+++ b/pytential/symbolic/matrix.py
@@ -1,6 +1,9 @@
 from __future__ import division, absolute_import
 
-__copyright__ = "Copyright (C) 2015 Andreas Kloeckner"
+__copyright__ = """
+Copyright (C) 2015 Andreas Kloeckner
+Copyright (C) 2018 Alexandru Fikl
+"""
 
 __license__ = """
 Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -34,39 +37,214 @@ import pytential.symbolic.primitives as sym
 from pytential.symbolic.execution import bind
 
 
+# {{{ helpers
+
 def is_zero(x):
     return isinstance(x, (int, float, complex, np.number)) and x == 0
 
 
-# FIXME: PyOpenCL doesn't do all the required matrix math yet.
-# We'll cheat and build the matrix on the host.
+def _resample_arg(queue, source, x):
+    """
+    :arg queue: a :class:`pyopencl.CommandQueue`.
+    :arg source: a :class:`pytential.source.LayerPotentialSourceBase` subclass.
+        If it is not a layer potential source, no resampling is done.
+    :arg x: a :class:`numpy.ndarray`.
+
+    :return: a resampled :class:`numpy.ndarray` (see
+        :method:`pytential.source.LayerPotentialSourceBase.resampler`).
+    """
+
+    from pytential.source import LayerPotentialSourceBase
+    if not isinstance(source, LayerPotentialSourceBase):
+        return x
+
+    if not isinstance(x, np.ndarray):
+        return x
+
+    if len(x.shape) >= 2:
+        raise RuntimeError("matrix variables in kernel arguments")
+
+    def resample(y):
+        return source.resampler(queue, cl.array.to_device(queue, y)).get(queue)
+
+    from pytools.obj_array import with_object_array_or_scalar
+    return with_object_array_or_scalar(resample, x)
+
+
+def _get_layer_potential_args(mapper, expr, source):
+    """
+    :arg mapper: a :class:`pytential.symbolic.matrix.MatrixBuilderBase`.
+    :arg expr: symbolic layer potential expression.
+    :arg source: a :class:`pytential.source.LayerPotentialSourceBase`.
+
+    :return: a mapping of kernel arguments evaluated by the *mapper*.
+    """
+
+    # skip resampling if source and target are the same
+    from pytential.symbolic.primitives import DEFAULT_SOURCE, DEFAULT_TARGET
+    if ((expr.source is not DEFAULT_SOURCE)
+            and (expr.target is not DEFAULT_TARGET)
+            and (isinstance(expr.source, type(expr.target)))):
+        source = None
+
+    kernel_args = {}
+    for arg_name, arg_expr in six.iteritems(expr.kernel_arguments):
+        rec_arg = mapper.rec(arg_expr)
+        kernel_args[arg_name] = _resample_arg(mapper.queue, source, rec_arg)
+
+    return kernel_args
+
+
+def _get_kernel_args(mapper, kernel, expr, source):
+    """
+    :arg mapper: a :class:`pytential.symbolic.matrix.MatrixBuilderBase`.
+    :arg kernel: a :class:`sumpy.kernel.Kernel`.
+    :arg expr: symbolic layer potential expression.
+    :arg source: a :class:`pytential.source.LayerPotentialSourceBase`.
+
+    :return: a mapping of kernel arguments evaluated by the *mapper*.
+    """
+
+    # NOTE: copied from pytential.symbolic.primitives.IntG
+    inner_kernel_args = kernel.get_args() + kernel.get_source_args()
+    inner_kernel_args = set(arg.loopy_arg.name for arg in inner_kernel_args)
+
+    kernel_args = {}
+    for arg_name, arg_expr in six.iteritems(expr.kernel_arguments):
+        if arg_name not in inner_kernel_args:
+            continue
+
+        rec_arg = mapper.rec(arg_expr)
+        kernel_args[arg_name] = _resample_arg(mapper.queue, source, rec_arg)
+
+    return kernel_args
+
+
+def _get_weights_and_area_elements(queue, source, source_discr):
+    """
+    :arg queue: a :class:`pyopencl.CommandQueue`.
+    :arg source: a :class:`pytential.source.LayerPotentialSourceBase`.
+    :arg source_discr: a :class:`meshmode.discretization.Discretization`.
+
+    :return: quadrature weights for each node in *source_discr*.
+    """
+
+    if source.quad_stage2_density_discr is source_discr:
+        waa = source.weights_and_area_elements().with_queue(queue)
+    else:
+        # NOTE: copied from `weights_and_area_elements`, but using the
+        # discretization given by `where` and no interpolation
+        area = bind(source_discr,
+                sym.area_element(source.ambient_dim, source.dim))(queue)
+        qweight = bind(source_discr, sym.QWeight())(queue)
+        waa = area * qweight
+
+    return waa
+
+
+def _get_centers_and_expansion_radii(queue, source, target_discr, qbx_forced_limit):
+    """
+    :arg queue: a :class:`pyopencl.CommandQueue`.
+    :arg source: a :class:`pytential.source.LayerPotentialSourceBase`.
+    :arg target_discr: a :class:`meshmode.discretization.Discretization`.
+    :arg qbx_forced_limit: an integer (*+1* or *-1*).
+
+    :return: a tuple of `(centers, radii)` for each node in *target_discr*.
+    """
+
+    if source.density_discr is target_discr:
+        # NOTE: skip expensive target association
+        from pytential.qbx.utils import get_centers_on_side
+        centers = get_centers_on_side(source, qbx_forced_limit)
+        radii = source._expansion_radii('nsources')
+    else:
+        from pytential.qbx.utils import get_interleaved_centers
+        centers = get_interleaved_centers(queue, source)
+        radii = source._expansion_radii('ncenters')
+
+        # NOTE: using a very small tolerance to make sure all the stage2
+        # targets are associated to a center. We can't use the user provided
+        # source.target_association_tolerance here because it will likely be
+        # way too small.
+        target_association_tolerance = 1.0e-1
+
+        from pytential.qbx.target_assoc import associate_targets_to_qbx_centers
+        code_container = source.target_association_code_container
+        assoc = associate_targets_to_qbx_centers(
+                source,
+                code_container.get_wrangler(queue),
+                [(target_discr, qbx_forced_limit)],
+                target_association_tolerance=target_association_tolerance)
+
+        centers = [cl.array.take(c, assoc.target_to_center, queue=queue)
+                   for c in centers]
+        radii = cl.array.take(radii, assoc.target_to_center, queue=queue)
+
+    return centers, radii
+
+# }}}
+
+
+# {{{ base class for matrix builders
+
+class MatrixBuilderBase(EvaluationMapperBase):
+    def __init__(self, queue, dep_expr, other_dep_exprs,
+            dep_source, dep_discr, places, context):
+        """
+        :arg queue: a :class:`pyopencl.CommandQueue`.
+        :arg dep_expr: symbolic expression for the input block column
+            that the builder is evaluating.
+        :arg other_dep_exprs: symbolic expressions for the remaining input
+            block columns.
+        :arg dep_source: a :class:`pytential.source.LayerPotentialSourceBase`
+            for the given *dep_expr*.
+        :arg dep_discr: a concerete :class:`meshmode.discretization.Discretization`
+            for the given *dep_expr*.
+        :arg places: a :class:`pytential.symbolic.execution.GeometryCollection`
+            for all the sources and targets the builder is expected to
+            encounter.
+        """
+        super(MatrixBuilderBase, self).__init__(context=context)
 
-class MatrixBuilder(EvaluationMapperBase):
-    def __init__(self, queue, dep_expr, other_dep_exprs, dep_source, places,
-            context):
         self.queue = queue
         self.dep_expr = dep_expr
         self.other_dep_exprs = other_dep_exprs
         self.dep_source = dep_source
-        self.dep_discr = dep_source.density_discr
+        self.dep_discr = dep_discr
         self.places = places
-        self.context = context
+
+        self.dep_nnodes = dep_discr.nnodes
+
+    # {{{
+
+    def get_dep_variable(self):
+        return np.eye(self.dep_nnodes, dtype=np.float64)
+
+    def is_kind_vector(self, x):
+        return len(x.shape) == 1
+
+    def is_kind_matrix(self, x):
+        return len(x.shape) == 2
+
+    # }}}
+
+    # {{{ map_xxx implementation
 
     def map_variable(self, expr):
         if expr == self.dep_expr:
-            return np.eye(self.dep_discr.nnodes, dtype=np.float64)
+            return self.get_dep_variable()
         elif expr in self.other_dep_exprs:
             return 0
         else:
-            return super(MatrixBuilder, self).map_variable(expr)
+            return super(MatrixBuilderBase, self).map_variable(expr)
 
     def map_subscript(self, expr):
         if expr == self.dep_expr:
-            return np.eye(self.dep_discr.nnodes, dtype=np.float64)
+            return self.get_dep_variable()
         elif expr in self.other_dep_exprs:
             return 0
         else:
-            return super(MatrixBuilder, self).map_subscript(expr)
+            return super(MatrixBuilderBase, self).map_subscript(expr)
 
     def map_sum(self, expr):
         sum_kind = None
@@ -83,13 +261,12 @@ class MatrixBuilder(EvaluationMapperBase):
                 continue
 
             if isinstance(rec_child, np.ndarray):
-                if len(rec_child.shape) == 2:
+                if self.is_kind_matrix(rec_child):
                     term_kind = term_kind_matrix
-                elif len(rec_child.shape) == 1:
+                elif self.is_kind_vector(rec_child):
                     term_kind = term_kind_vector
                 else:
                     raise RuntimeError("unexpected array rank")
-
             else:
                 term_kind = term_kind_scalar
 
@@ -108,71 +285,141 @@ class MatrixBuilder(EvaluationMapperBase):
         mat_result = None
         vecs_and_scalars = 1
 
-        for term in expr.children:
-            rec_term = self.rec(term)
+        for child in expr.children:
+            rec_child = self.rec(child)
 
-            if is_zero(rec_term):
+            if is_zero(rec_child):
                 return 0
 
-            if isinstance(rec_term, (np.number, int, float, complex)):
-                vecs_and_scalars = vecs_and_scalars * rec_term
-            elif isinstance(rec_term, np.ndarray):
-                if len(rec_term.shape) == 2:
+            if isinstance(rec_child, (np.number, int, float, complex)):
+                vecs_and_scalars = vecs_and_scalars * rec_child
+            elif isinstance(rec_child, np.ndarray):
+                if self.is_kind_matrix(rec_child):
                     if mat_result is not None:
                         raise RuntimeError("expression is nonlinear in %s"
                                 % self.dep_expr)
                     else:
-                        mat_result = rec_term
+                        mat_result = rec_child
                 else:
-                    vecs_and_scalars = vecs_and_scalars * rec_term
+                    vecs_and_scalars = vecs_and_scalars * rec_child
 
         if mat_result is not None:
-            if (
-                    isinstance(vecs_and_scalars, np.ndarray)
-                    and len(vecs_and_scalars.shape) == 1):
+            if (isinstance(vecs_and_scalars, np.ndarray)
+                    and self.is_kind_vector(vecs_and_scalars)):
                 vecs_and_scalars = vecs_and_scalars[:, np.newaxis]
 
             return mat_result * vecs_and_scalars
         else:
             return vecs_and_scalars
 
+    def map_num_reference_derivative(self, expr):
+        rec_operand = self.rec(expr.operand)
+
+        assert isinstance(rec_operand, np.ndarray)
+        if self.is_kind_matrix(rec_operand):
+            raise NotImplementedError("derivatives")
+
+        where_discr = self.places[expr.where]
+        op = sym.NumReferenceDerivative(expr.ref_axes, sym.var("u"))
+        return bind(where_discr, op)(
+                self.queue, u=cl.array.to_device(self.queue, rec_operand)).get()
+
+    def map_node_coordinate_component(self, expr):
+        where_discr = self.places[expr.where]
+        op = sym.NodeCoordinateComponent(expr.ambient_axis)
+        return bind(where_discr, op)(self.queue).get()
+
+    def map_call(self, expr):
+        arg, = expr.parameters
+        rec_arg = self.rec(arg)
+
+        if isinstance(rec_arg, np.ndarray) and self.is_kind_matrix(rec_arg):
+            raise RuntimeError("expression is nonlinear in variable")
+
+        if isinstance(rec_arg, np.ndarray):
+            rec_arg = cl.array.to_device(self.queue, rec_arg)
+
+        op = expr.function(sym.var("u"))
+        result = bind(self.dep_source, op)(self.queue, u=rec_arg)
+
+        if isinstance(result, cl.array.Array):
+            result = result.get()
+
+        return result
+
+    # }}}
+
+
+class MatrixBlockBuilderBase(MatrixBuilderBase):
+    """Evaluate individual blocks of a matrix operator.
+
+    Unlike, e.g. :class:`MatrixBuilder`, matrix block builders are
+    significantly reduced in scope. They are basically just meant
+    to evaluate linear combinations of layer potential operators.
+    For example, they do not support composition of operators because we
+    assume that each operator acts directly on the density.
+    """
+
+    def __init__(self, queue, dep_expr, other_dep_exprs,
+            dep_source, dep_discr, places, index_set, context):
+        """
+        :arg index_set: a :class:`sumpy.tools.MatrixBlockIndexRanges` class
+            describing which blocks are going to be evaluated.
+        """
+
+        super(MatrixBlockBuilderBase, self).__init__(queue,
+                dep_expr, other_dep_exprs, dep_source, dep_discr,
+                places, context)
+
+        self.index_set = index_set
+        self.dep_nnodes = index_set.col.indices.size
+
+    def get_dep_variable(self):
+        return 1.0
+
+    def is_kind_vector(self, x):
+        # NOTE: since matrices are flattened, the only way to differentiate
+        # them from a vector is by size
+        return x.size == self.index_set.row.indices.size
+
+    def is_kind_matrix(self, x):
+        # NOTE: since matrices are flattened, we recognize them by checking
+        # if they have the right size
+        return x.size == self.index_set.linear_row_indices.size
+
+# }}}
+
+
+# {{{ QBX layer potential matrix builder
+
+# FIXME: PyOpenCL doesn't do all the required matrix math yet.
+# We'll cheat and build the matrix on the host.
+
+class MatrixBuilder(MatrixBuilderBase):
+    def __init__(self, queue, dep_expr, other_dep_exprs,
+            dep_source, dep_discr, places, context):
+        super(MatrixBuilder, self).__init__(queue, dep_expr, other_dep_exprs,
+                dep_source, dep_discr, places, context)
+
     def map_int_g(self, expr):
-        source = self.places[expr.source]
-        target_discr = self.places[expr.target]
+        where_source = expr.source
+        if where_source is sym.DEFAULT_SOURCE:
+            where_source = sym.QBXSourceQuadStage2(expr.source)
 
-        if source.density_discr is not target_discr:
-            raise NotImplementedError()
+        source = self.places[expr.source]
+        source_discr = self.places.get_discretization(where_source)
+        target_discr = self.places.get_discretization(expr.target)
 
         rec_density = self.rec(expr.density)
         if is_zero(rec_density):
             return 0
 
         assert isinstance(rec_density, np.ndarray)
-        if len(rec_density.shape) != 2:
+        if not self.is_kind_matrix(rec_density):
             raise NotImplementedError("layer potentials on non-variables")
 
         kernel = expr.kernel
-
-        kernel_args = {}
-        for arg_name, arg_expr in six.iteritems(expr.kernel_arguments):
-            rec_arg = self.rec(arg_expr)
-
-            if isinstance(rec_arg, np.ndarray):
-                if len(rec_arg.shape) == 2:
-                    raise RuntimeError("matrix variables in kernel arguments")
-                if len(rec_arg.shape) == 1:
-                    from pytools.obj_array import with_object_array_or_scalar
-
-                    def resample(x):
-                        return (
-                                source.resampler(
-                                    self.queue,
-                                    cl.array.to_device(self.queue, x))
-                                .get(queue=self.queue))
-
-                    rec_arg = with_object_array_or_scalar(resample, rec_arg)
-
-            kernel_args[arg_name] = rec_arg
+        kernel_args = _get_layer_potential_args(self, expr, source)
 
         from sumpy.expansion.local import LineTaylorLocalExpansion
         local_expn = LineTaylorLocalExpansion(kernel, source.qbx_order)
@@ -181,66 +428,210 @@ class MatrixBuilder(EvaluationMapperBase):
         mat_gen = LayerPotentialMatrixGenerator(
                 self.queue.context, (local_expn,))
 
-        assert target_discr is source.density_discr
+        assert abs(expr.qbx_forced_limit) > 0
+        centers, radii = _get_centers_and_expansion_radii(self.queue,
+                source, target_discr, expr.qbx_forced_limit)
 
-        from pytential.qbx.utils import get_centers_on_side
+        _, (mat,) = mat_gen(self.queue,
+                targets=target_discr.nodes(),
+                sources=source_discr.nodes(),
+                centers=centers,
+                expansion_radii=radii,
+                **kernel_args)
+        mat = mat.get()
+
+        waa = _get_weights_and_area_elements(self.queue, source, source_discr)
+        mat[:, :] *= waa.get(self.queue)
+
+        if target_discr.nnodes != source_discr.nnodes:
+            # NOTE: we only resample sources
+            assert target_discr.nnodes < source_discr.nnodes
+
+            resampler = source.direct_resampler
+            resample_mat = resampler.full_resample_matrix(self.queue).get(self.queue)
+            mat = mat.dot(resample_mat)
+
+        mat = mat.dot(rec_density)
+
+        return mat
+
+# }}}
+
+
+# {{{ p2p matrix builder
+
+class P2PMatrixBuilder(MatrixBuilderBase):
+    def __init__(self, queue, dep_expr, other_dep_exprs,
+            dep_source, dep_discr, places, context, exclude_self=True):
+        super(P2PMatrixBuilder, self).__init__(queue,
+                dep_expr, other_dep_exprs, dep_source, dep_discr,
+                places, context)
+
+        self.exclude_self = exclude_self
+
+    def map_int_g(self, expr):
+        source = self.places[expr.source]
+        source_discr = self.places.get_discretization(expr.source)
+        target_discr = self.places.get_discretization(expr.target)
+
+        rec_density = self.rec(expr.density)
+        if is_zero(rec_density):
+            return 0
+
+        assert isinstance(rec_density, np.ndarray)
+        if not self.is_kind_matrix(rec_density):
+            raise NotImplementedError("layer potentials on non-variables")
+
+        kernel = expr.kernel.get_base_kernel()
+        kernel_args = _get_kernel_args(self, kernel, expr, source)
+        if self.exclude_self:
+            kernel_args["target_to_source"] = \
+                cl.array.arange(self.queue, 0, target_discr.nnodes, dtype=np.int)
+
+        from sumpy.p2p import P2PMatrixGenerator
+        mat_gen = P2PMatrixGenerator(
+                self.queue.context, (kernel,), exclude_self=self.exclude_self)
 
-        assert abs(expr.qbx_forced_limit) > 0
         _, (mat,) = mat_gen(self.queue,
-                target_discr.nodes(),
-                source.quad_stage2_density_discr.nodes(),
-                get_centers_on_side(source, expr.qbx_forced_limit),
-                expansion_radii=self.dep_source._expansion_radii("nsources"),
+                targets=target_discr.nodes(),
+                sources=source_discr.nodes(),
                 **kernel_args)
 
         mat = mat.get()
+        mat = mat.dot(rec_density)
+
+        return mat
+# }}}
 
-        waa = source.weights_and_area_elements().get(queue=self.queue)
-        mat[:, :] *= waa
 
-        resampler = source.direct_resampler
-        resample_mat = resampler.full_resample_matrix(self.queue).get(self.queue)
+# {{{ block matrix builders
 
-        mat = mat.dot(resample_mat)
-        mat = mat.dot(rec_density)
+class NearFieldBlockBuilder(MatrixBlockBuilderBase):
+    def __init__(self, queue, dep_expr, other_dep_exprs, dep_source, dep_discr,
+            places, index_set, context):
+        super(NearFieldBlockBuilder, self).__init__(queue,
+                dep_expr, other_dep_exprs, dep_source, dep_discr,
+                places, index_set, context)
+
+        # NOTE: we need additional mappers to redirect some operations:
+        #   * mat_mapper is used to compute any kernel arguments that need to
+        #   be computed on the full discretization, ignoring our index_set,
+        #   e.g the normal in a double layer potential
+        #   * blk_mapper is used to recursively compute the density to
+        #   a layer potential operator to ensure there is no composition
+        self.mat_mapper = MatrixBuilderBase(queue,
+                dep_expr, other_dep_exprs, dep_source, dep_discr,
+                places, context)
+        self.blk_mapper = MatrixBlockBuilderBase(queue,
+                dep_expr, other_dep_exprs, dep_source, dep_discr,
+                places, index_set, context)
+
+    def get_dep_variable(self):
+        tgtindices = self.index_set.linear_row_indices.get(self.queue)
+        srcindices = self.index_set.linear_col_indices.get(self.queue)
+
+        return np.equal(tgtindices, srcindices).astype(np.float64)
+
+    def map_int_g(self, expr):
+        source = self.places[expr.source]
+        source_discr = self.places.get_discretization(expr.source)
+        target_discr = self.places.get_discretization(expr.target)
+
+        if source_discr is not target_discr:
+            raise NotImplementedError()
+
+        rec_density = self.blk_mapper.rec(expr.density)
+        if is_zero(rec_density):
+            return 0
+
+        if not np.isscalar(rec_density):
+            raise NotImplementedError()
+
+        kernel = expr.kernel
+        kernel_args = _get_layer_potential_args(self.mat_mapper, expr, source)
+
+        from sumpy.expansion.local import LineTaylorLocalExpansion
+        local_expn = LineTaylorLocalExpansion(kernel, source.qbx_order)
+
+        from sumpy.qbx import LayerPotentialMatrixBlockGenerator
+        mat_gen = LayerPotentialMatrixBlockGenerator(
+                self.queue.context, (local_expn,))
+
+        assert abs(expr.qbx_forced_limit) > 0
+        centers, radii = _get_centers_and_expansion_radii(self.queue,
+                source, target_discr, expr.qbx_forced_limit)
+
+        _, (mat,) = mat_gen(self.queue,
+                targets=target_discr.nodes(),
+                sources=source_discr.nodes(),
+                centers=centers,
+                expansion_radii=radii,
+                index_set=self.index_set,
+                **kernel_args)
+
+        waa = _get_weights_and_area_elements(self.queue, source, source_discr)
+        mat *= waa[self.index_set.linear_col_indices]
+        mat = rec_density * mat.get(self.queue)
 
         return mat
 
-    # IntGdSource should have been removed by a preprocessor
 
-    def map_num_reference_derivative(self, expr):
-        rec_operand = self.rec(expr.operand)
+class FarFieldBlockBuilder(MatrixBlockBuilderBase):
+    def __init__(self, queue, dep_expr, other_dep_exprs, dep_source, dep_discr,
+            places, index_set, context, exclude_self=False):
+        super(FarFieldBlockBuilder, self).__init__(queue,
+                dep_expr, other_dep_exprs, dep_source, dep_discr,
+                places, index_set, context)
 
-        assert isinstance(rec_operand, np.ndarray)
-        if len(rec_operand.shape) == 2:
-            raise NotImplementedError("derivatives")
+        # NOTE: same mapper issues as in the NearFieldBlockBuilder
+        self.exclude_self = exclude_self
+        self.mat_mapper = MatrixBuilderBase(queue,
+                dep_expr, other_dep_exprs, dep_source, dep_discr,
+                places, context)
+        self.blk_mapper = MatrixBlockBuilderBase(queue,
+                dep_expr, other_dep_exprs, dep_source, dep_discr,
+                places, index_set, context)
 
-        where_discr = self.places[expr.where]
-        op = sym.NumReferenceDerivative(expr.ref_axes, sym.var("u"))
-        return bind(where_discr, op)(
-                self.queue, u=cl.array.to_device(self.queue, rec_operand)).get()
+    def get_dep_variable(self):
+        tgtindices = self.index_set.linear_row_indices.get(self.queue)
+        srcindices = self.index_set.linear_col_indices.get(self.queue)
 
-    def map_node_coordinate_component(self, expr):
-        where_discr = self.places[expr.where]
-        op = sym.NodeCoordinateComponent(expr.ambient_axis)
-        return bind(where_discr, op)(self.queue).get()
+        return np.equal(tgtindices, srcindices).astype(np.float64)
 
-    def map_call(self, expr):
-        arg, = expr.parameters
-        rec_arg = self.rec(arg)
+    def map_int_g(self, expr):
+        source = self.places[expr.source]
+        source_discr = self.places.get_discretization(expr.source)
+        target_discr = self.places.get_discretization(expr.target)
 
-        if (
-                isinstance(rec_arg, np.ndarray)
-                and len(rec_arg.shape) == 2):
-            raise RuntimeError("expression is nonlinear in variable")
+        if source_discr is not target_discr:
+            raise NotImplementedError()
 
-        if isinstance(rec_arg, np.ndarray):
-            rec_arg = cl.array.to_device(self.queue, rec_arg)
+        rec_density = self.blk_mapper.rec(expr.density)
+        if is_zero(rec_density):
+            return 0
 
-        op = expr.function(sym.var("u"))
-        result = bind(self.dep_source, op)(self.queue, u=rec_arg)
+        if not np.isscalar(rec_density):
+            raise NotImplementedError()
 
-        if isinstance(result, cl.array.Array):
-            result = result.get()
+        kernel = expr.kernel.get_base_kernel()
+        kernel_args = _get_kernel_args(self.mat_mapper, kernel, expr, source)
+        if self.exclude_self:
+            kernel_args["target_to_source"] = \
+                cl.array.arange(self.queue, 0, target_discr.nnodes, dtype=np.int)
 
-        return result
+        from sumpy.p2p import P2PMatrixBlockGenerator
+        mat_gen = P2PMatrixBlockGenerator(
+                self.queue.context, (kernel,), exclude_self=self.exclude_self)
+
+        _, (mat,) = mat_gen(self.queue,
+                targets=target_discr.nodes(),
+                sources=source_discr.nodes(),
+                index_set=self.index_set,
+                **kernel_args)
+        mat = rec_density * mat.get(self.queue)
+
+        return mat
+
+# }}}
+
+# vim: foldmethod=marker
diff --git a/pytential/symbolic/pde/maxwell/__init__.py b/pytential/symbolic/pde/maxwell/__init__.py
index 489f97fe..86fdd993 100644
--- a/pytential/symbolic/pde/maxwell/__init__.py
+++ b/pytential/symbolic/pde/maxwell/__init__.py
@@ -165,7 +165,7 @@ class PECChargeCurrentMFIEOperator:
 
     def scattered_volume_field(self, Jt, rho, qbx_forced_limit=None):
         """
-        This will return an object of six entries, the first three of which
+        This will return an object array of six entries, the first three of which
         represent the electric, and the second three of which represent the
         magnetic field. This satisfies the time-domain Maxwell's equations
         as verified by :func:`sumpy.point_calculus.frequency_domain_maxwell`.
diff --git a/pytential/symbolic/primitives.py b/pytential/symbolic/primitives.py
index b16c6d60..5ee758a4 100644
--- a/pytential/symbolic/primitives.py
+++ b/pytential/symbolic/primitives.py
@@ -201,10 +201,10 @@ class DEFAULT_TARGET:  # noqa
     pass
 
 
-class _QBXSourceStage2(object):
-    """A symbolic 'where' specifier for the
-    :attr:`pytential.qbx.QBXLayerPotentialSource.stage2_density_discr`
-    of the layer potential source identified by :attr:`where`.
+class _QBXSource(object):
+    """A symbolic 'where' specifier for the a density of a
+    :attr:`pytential.qbx.QBXLayerPotentialSource`
+    layer potential source identified by :attr:`where`.
 
     .. attribute:: where
 
@@ -229,6 +229,27 @@ class _QBXSourceStage2(object):
     def __ne__(self, other):
         return not self.__eq__(other)
 
+
+class QBXSourceStage1(_QBXSource):
+    """An explicit symbolic 'where' specifier for the
+    :attr:`pytential.qbx.QBXLayerPotentialSource.density_discr`
+    of the layer potential source identified by :attr:`where`.
+    """
+
+
+class QBXSourceStage2(_QBXSource):
+    """A symbolic 'where' specifier for the
+    :attr:`pytential.qbx.QBXLayerPotentialSource.stage2_density_discr`
+    of the layer potential source identified by :attr:`where`.
+    """
+
+
+class QBXSourceQuadStage2(_QBXSource):
+    """A symbolic 'where' specifier for the
+    :attr:`pytential.qbx.QBXLayerPotentialSource.quad_stage2_density_discr`
+    of the layer potential source identified by :attr:`where`.
+    """
+
 # }}}
 
 
@@ -328,6 +349,8 @@ class DiscretizationProperty(Expression):
     further arguments.
     """
 
+    init_arg_names = ("where",)
+
     def __init__(self, where=None):
         """
         :arg where: |where-blurb|
@@ -348,6 +371,9 @@ class QWeight(DiscretizationProperty):
 
 
 class NodeCoordinateComponent(DiscretizationProperty):
+
+    init_arg_names = ("ambient_axis", "where")
+
     def __init__(self, ambient_axis, where=None):
         """
         :arg where: |where-blurb|
@@ -378,12 +404,14 @@ class NumReferenceDerivative(DiscretizationProperty):
     reference coordinates.
     """
 
-    def __new__(cls, ref_axes, operand, where=None):
+    init_arg_names = ("ref_axes", "operand", "where")
+
+    def __new__(cls, ref_axes=None, operand=None, where=None):
         # If the constructor is handed a multivector object, return an
         # object array of the operator applied to each of the
         # coefficients in the multivector.
 
-        if isinstance(operand, (np.ndarray)):
+        if isinstance(operand, np.ndarray):
             def make_op(operand_i):
                 return cls(ref_axes, operand_i, where=where)
 
@@ -750,7 +778,10 @@ def _scaled_max_curvature(ambient_dim, dim=None, where=None):
 # {{{ operators
 
 class SingleScalarOperandExpression(Expression):
-    def __new__(cls, operand):
+
+    init_arg_names = ("operand",)
+
+    def __new__(cls, operand=None):
         # If the constructor is handed a multivector object, return an
         # object array of the operator applied to each of the
         # coefficients in the multivector.
@@ -792,7 +823,10 @@ def integral(ambient_dim, dim, operand, where=None):
 
 
 class SingleScalarOperandExpressionWithWhere(Expression):
-    def __new__(cls, operand, where=None):
+
+    init_arg_names = ("operand", "where")
+
+    def __new__(cls, operand=None, where=None):
         # If the constructor is handed a multivector object, return an
         # object array of the operator applied to each of the
         # coefficients in the multivector.
@@ -842,6 +876,8 @@ class Ones(Expression):
     discretization.
     """
 
+    init_arg_names = ("where",)
+
     def __init__(self, where=None):
         self.where = where
 
@@ -870,6 +906,9 @@ def mean(ambient_dim, dim, operand, where=None):
 
 
 class IterativeInverse(Expression):
+
+    init_arg_names = ("expression", "rhs", "variable_name", "extra_vars", "where")
+
     def __init__(self, expression, rhs, variable_name, extra_vars={},
             where=None):
         self.expression = expression
@@ -982,7 +1021,10 @@ class IntG(Expression):
     where :math:`\sigma` is *density*.
     """
 
-    def __new__(cls, kernel, density, *args, **kwargs):
+    init_arg_names = ("kernel", "density", "qbx_forced_limit", "source", "target",
+                      "kernel_arguments")
+
+    def __new__(cls, kernel=None, density=None, *args, **kwargs):
         # If the constructor is handed a multivector object, return an
         # object array of the operator applied to each of the
         # coefficients in the multivector.
@@ -1024,8 +1066,8 @@ class IntG(Expression):
         :arg kernel_arguments: A dictionary mapping named
             :class:`sumpy.kernel.Kernel` arguments
             (see :meth:`sumpy.kernel.Kernel.get_args`
-            and :meth:`sumpy.kernel.Kernel.get_source_args`
-            to expressions that determine them)
+            and :meth:`sumpy.kernel.Kernel.get_source_args`)
+            to expressions that determine them
 
         :arg source: The symbolic name of the source discretization. This name
             is bound to a concrete :class:`pytential.source.LayerPotentialSourceBase`
@@ -1114,6 +1156,11 @@ class IntG(Expression):
                 self.source, self.target,
                 hashable_kernel_args(self.kernel_arguments))
 
+    def __setstate__(self, state):
+        # Overwrite pymbolic.Expression.__setstate__
+        assert len(self.init_arg_names) == len(state), type(self)
+        self.__init__(*state)
+
     mapper_method = intern("map_int_g")
 
 
@@ -1130,7 +1177,7 @@ def _insert_source_derivative_into_kernel(kernel):
                 kernel, dir_vec_name=_DIR_VEC_NAME)
     else:
         return kernel.replace_inner_kernel(
-                _insert_source_derivative_into_kernel(kernel.kernel))
+                _insert_source_derivative_into_kernel(kernel.inner_kernel))
 
 
 def _get_dir_vec(dsource, ambient_dim):
@@ -1168,8 +1215,8 @@ def int_g_dsource(ambient_dim, dsource, kernel, density,
     r"""
     .. math::
 
-        \int_\Gamma \operatorname{dsource} \cdot \nabla_y
-            \cdot g(x-y) \sigma(y) dS_y
+        \int_\Gamma \operatorname{dsource} \dot \nabla_y
+            \dot g(x-y) \sigma(y) dS_y
 
     where :math:`\sigma` is *density*, and
     *dsource*, a multivector.
diff --git a/pytential/unregularized.py b/pytential/unregularized.py
index a2fe4ad0..e7fe1b3e 100644
--- a/pytential/unregularized.py
+++ b/pytential/unregularized.py
@@ -187,7 +187,7 @@ class UnregularizedLayerPotentialSource(LayerPotentialSourceBase):
 
             result.append((o.name, output_for_each_kernel[o.kernel_index]))
 
-        return result, []
+        return result
 
     # {{{ fmm-based execution
 
@@ -288,7 +288,7 @@ class UnregularizedLayerPotentialSource(LayerPotentialSourceBase):
 
         # }}}
 
-        return result, []
+        return result
 
     # }}}
 
@@ -447,7 +447,7 @@ class _FMMGeometryData(object):
 
 
 __all__ = (
-        UnregularizedLayerPotentialSource,
+        "UnregularizedLayerPotentialSource",
         )
 
 # vim: fdm=marker
diff --git a/requirements.txt b/requirements.txt
index 8925c34e..dd15a69e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 numpy
 git+git://github.com/inducer/pymbolic
-sympy==1.0
+sympy==1.1.1
 git+https://github.com/inducer/modepy
 git+https://github.com/inducer/pyopencl
 git+https://github.com/inducer/islpy
@@ -8,4 +8,4 @@ git+https://github.com/inducer/loopy
 git+https://gitlab.tiker.net/inducer/boxtree
 git+https://github.com/inducer/meshmode
 git+https://gitlab.tiker.net/inducer/sumpy
-git+https://github.com/inducer/pyfmmlib
+git+https://gitlab.tiker.net/inducer/pyfmmlib
diff --git a/setup.py b/setup.py
index 84438494..e2a1ae90 100644
--- a/setup.py
+++ b/setup.py
@@ -100,11 +100,12 @@ setup(name="pytential",
           "pytools>=2018.2",
           "modepy>=2013.3",
           "pyopencl>=2013.1",
-          "boxtree>=2013.1",
+          "boxtree>=2018.2",
           "pymbolic>=2013.2",
           "loo.py>=2017.2",
           "sumpy>=2013.1",
           "cgen>=2013.1.2",
+          "pyfmmlib>=2018.1",
 
           "six",
           ])
-- 
GitLab


From 1d83e091b1c6739ce77f5d0541d105528f13660c Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Sat, 3 Nov 2018 12:10:19 -0500
Subject: [PATCH 24/30] Mysterious compatibility fix

---
 pytential/qbx/fmm.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/pytential/qbx/fmm.py b/pytential/qbx/fmm.py
index 46d7ae88..a5a5e52e 100644
--- a/pytential/qbx/fmm.py
+++ b/pytential/qbx/fmm.py
@@ -513,7 +513,11 @@ def drive_fmm(expansion_wrangler, src_weights, timing_data=None):
 
     # {{{ wrangle qbx expansions
 
-    qbx_expansions, timing_future = wrangler.form_global_qbx_locals(src_weights)
+    tmpres = wrangler.form_global_qbx_locals(src_weights)
+    if isinstance(tmpres, tuple):
+        qbx_expansions, timing_future = tmpres
+    else:
+        qbx_expansions = tmpres
 
     recorder.add("form_global_qbx_locals", timing_future)
 
@@ -531,7 +535,11 @@ def drive_fmm(expansion_wrangler, src_weights, timing_data=None):
 
     qbx_expansions = qbx_expansions + local_result
 
-    qbx_potentials, timing_future = wrangler.eval_qbx_expansions(qbx_expansions)
+    tmpres = wrangler.eval_qbx_expansions(qbx_expansions)
+    if isinstance(tmpres, tuple):
+        qbx_potentials, timing_future = tmpres
+    else:
+        qbx_potentials = tmpres
 
     recorder.add("eval_qbx_expansions", timing_future)
 
-- 
GitLab


From 79c4132aef286f7a30a4fa74eb3e9726c2109271 Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Sat, 26 Jan 2019 22:54:48 -0600
Subject: [PATCH 25/30] Remove Cahn-Hilliard related expriments (better suited
 in volumential)

---
 experiments/biharmonic.py    | 223 ------------
 experiments/cahn-hilliard.py | 685 -----------------------------------
 2 files changed, 908 deletions(-)
 delete mode 100644 experiments/biharmonic.py
 delete mode 100644 experiments/cahn-hilliard.py

diff --git a/experiments/biharmonic.py b/experiments/biharmonic.py
deleted file mode 100644
index 083c2dee..00000000
--- a/experiments/biharmonic.py
+++ /dev/null
@@ -1,223 +0,0 @@
-__copyright__ = "Copyright (C) 2018 Xiaoyu Wei"
-
-__license__ = """
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-"""
-
-import numpy as np
-
-# {{{ set some constants for use below
-
-dim = 2
-nelements = 20
-bdry_quad_order = 4
-mesh_order = bdry_quad_order
-qbx_order = bdry_quad_order
-bdry_ovsmp_quad_order = 4*bdry_quad_order
-fmm_order = 10
-
-loc_sign = -1
-x_0 = np.zeros(dim)
-# x_0 = np.array([0.5, 0])
-
-# }}}
-
-def main():
-    import logging
-    logging.basicConfig(level=logging.WARNING)  # INFO for more progress info
-    logger = logging.getLogger(__name__)
-
-    import numpy as np
-    import pyopencl as cl
-    import pyopencl.clmath
-    from pytential import sym
-
-    cl_ctx = cl.create_some_context()
-    queue = cl.CommandQueue(cl_ctx)
-
-    # {{{ make mesh
-
-    from meshmode.mesh.generation import ellipse, make_curve_mesh
-    from functools import partial
-
-    if 1 and dim == 2:
-            # one circle
-            mesh = make_curve_mesh(
-                    partial(ellipse, 1),
-                    np.linspace(0, 1, nelements+1),
-                    mesh_order)
-    elif dim == 2:
-        # array of circles
-        base_mesh = make_curve_mesh(
-                partial(ellipse, 1),
-                np.linspace(0, 1, nelements+1),
-                mesh_order)
-
-        from meshmode.mesh.processing import affine_map, merge_disjoint_meshes
-        nx = 2
-        ny = 2
-        dx = 2 / nx
-        meshes = [
-                affine_map(
-                    base_mesh,
-                    A=np.diag([dx*0.25, dx*0.25]),
-                    b=np.array([dx*(ix-nx/2), dx*(iy-ny/2)]))
-                for ix in range(nx)
-                for iy in range(ny)]
-
-        mesh = merge_disjoint_meshes(meshes, single_group=True)
-    else:
-        raise NotImplementedError("Could not find a way to get the mesh.")
-
-    if 0:
-        from meshmode.mesh.visualization import draw_curve
-        draw_curve(mesh)
-        import matplotlib.pyplot as plt
-        plt.show()
-
-    # }}} End make mesh
-
-    from meshmode.discretization import Discretization
-    from meshmode.discretization.poly_element import \
-        InterpolatoryQuadratureSimplexGroupFactory
-    pre_density_discr = Discretization(
-            cl_ctx, mesh,
-            InterpolatoryQuadratureSimplexGroupFactory(bdry_quad_order))
-
-    from pytential.qbx import (
-            QBXLayerPotentialSource, QBXTargetAssociationFailedException)
-    qbx, _ = QBXLayerPotentialSource(
-            pre_density_discr, fine_order=bdry_ovsmp_quad_order, qbx_order=qbx_order,
-            fmm_order=fmm_order
-            ).with_refinement()
-    density_discr = qbx.density_discr
-
-    # {{{ describe bvp
-
-    from pytential.symbolic.pde.biharmonic import BiharmonicOperator
-    biharmonic_operator = BiharmonicOperator(dim, loc_sign, x_0)
-
-    from pytential import bind
-    unk = biharmonic_operator.make_unknown("sigma")
-    bdry_op_sym = biharmonic_operator.operator(unk)
-
-    # }}}
-
-    bound_op = bind(qbx, bdry_op_sym)
-
-    # {{{ fix rhs and solve
-
-    nodes = density_discr.nodes().with_queue(queue)
-
-    def bdry_conditions_func(x):
-        if len(x) == 2:
-            return sym.make_obj_array([
-                x[0] * 0 + 3,
-                x[0] * 0 + 0
-                ])
-        else:
-            raise NotImplementedError
-
-    bc = bdry_conditions_func(nodes)
-
-    from pytential.solve import gmres
-    gmres_result = gmres(
-            bound_op.scipy_op(queue, "sigma", dtype=np.float64),
-            bc, tol=1e-8, progress=True,
-            stall_iterations=0,
-            hard_failure=False)
-
-    # }}}
-
-    # {{{ postprocess/visualize
-
-    from datetime import datetime
-    def file_id():
-        # return str(datetime.now()).replace(' ', '::')
-        return 'latest'
-
-    sigma = gmres_result.solution
-
-    if 0:
-        logger.warn("Overwriting solutions for debugging")
-        # test out layer potentials
-        sigma[0] *= 0
-        sigma[1] *= 0
-
-        sigma[0] += 0
-        sigma[1] += 1
-
-    representation_sym = biharmonic_operator.representation(unk, qbx_forced_limit=None)
-    from sumpy.visualization import FieldPlotter
-    fplot = FieldPlotter(np.zeros(dim), extent=5, npoints=500)
-
-    targets = cl.array.to_device(queue, fplot.points)
-
-    bdry_conditions = bdry_conditions_func(targets)
-
-    qbx_stick_out = qbx.copy(target_association_tolerance=0.05)
-
-    if 1:
-        # boundary plot for operator info
-        visual_order = 2
-        from meshmode.discretization.visualization import make_visualizer
-        solu_on_bdry = bind((qbx, density_discr), bdry_op_sym)(queue, sigma=sigma)
-        bdplot = make_visualizer(queue, density_discr, visual_order)
-        bdry_normals = bind(density_discr, sym.normal(dim))(queue)\
-                .as_vector(dtype=object)
-
-        bdplot.write_vtk_file(
-                "potential-biharm-bdry-%s.vtu" % file_id(),
-                [
-                    ("sig1", sigma[0]),
-                    ("sig2", sigma[1]),
-                    ("bdry_normals", bdry_normals),
-                    ("u", solu_on_bdry[0]),
-                    ("u_n", solu_on_bdry[1]),
-                    ("bc1", bc[0]),
-                    ("bc2", bc[1]),
-                    ]
-                )
-
-    try:
-        from pytential.target import PointsTarget
-        solu_representation = bind(
-                (qbx_stick_out, PointsTarget(targets)),
-                representation_sym)(queue, sigma=sigma)
-        fld_in_vol = solu_representation[0].get()
-    except QBXTargetAssociationFailedException as e:
-        fplot.write_vtk_file(
-                "failed-targets-%s.vts" % file_id(),
-                [
-                    ("failed", e.failed_target_flags.get(queue))
-                    ]
-                )
-        raise
-
-    fplot.write_vtk_file(
-            "potential-biharm-%s.vts" % file_id(),
-            [
-                ("potential", fld_in_vol),
-                ]
-            )
-
-    # }}}
-
-if __name__ == "__main__":
-    main()
diff --git a/experiments/cahn-hilliard.py b/experiments/cahn-hilliard.py
deleted file mode 100644
index ca1219da..00000000
--- a/experiments/cahn-hilliard.py
+++ /dev/null
@@ -1,685 +0,0 @@
-import numpy as np
-import numpy.linalg as la
-import pyopencl as cl
-import pyopencl.clmath  # noqa
-
-from meshmode.discretization import Discretization
-from meshmode.discretization.poly_element import \
-        InterpolatoryQuadratureSimplexGroupFactory
-
-from pytential import bind, sym, norm  # noqa
-from pytential.target import PointsTarget
-
-from pytools.obj_array import make_obj_array
-import pytential.symbolic.primitives as p
-
-from sumpy.kernel import ExpressionKernel
-import loopy as lp
-
-
-# clean up the mess
-def clean_file(filename):
-    import os
-    try:
-        os.remove(filename)
-    except OSError:
-        pass
-
-# {{{ set some constants for use below
-
-# {{{ all kinds of orders
-bdry_quad_order = 4
-mesh_order = bdry_quad_order
-qbx_order = bdry_quad_order
-bdry_ovsmp_quad_order = 4 * bdry_quad_order
-fmm_order = 8
-
-vol_quad_order = 5
-vol_qbx_order = 2
-# }}}
-# {{{ mesh generation
-nelements = 20
-
-from enum import Enum
-
-
-class Geometry(Enum):
-    RegularRectangle = 1
-    Circle = 2
-
-
-shape = Geometry.Circle
-# }}}
-# {{{ physical parameters
-s = 1.5
-epsilon = 0.01
-delta_t = 0.05
-final_t = delta_t * 1
-theta_y = 60. / 180. * np.pi
-
-b = s / (epsilon**2)
-c = 1. / (epsilon * delta_t)
-# }}}
-# {{{ initial phi
-
-# This phi function is also used to do PDE check
-import pymbolic as pmbl
-x = pmbl.var("x")
-y = pmbl.var("y")
-
-# b, c are known constants
-
-# TODO: modify pymbolic to use tanh function
-# phi = tanh(x / sqrt (2 * epsilon))
-phi = x**2 + x * y + 2 * y**2
-phi3 = phi**3
-laphi  = pmbl.differentiate(pmbl.differentiate(phi, 'x'), 'x') + \
-         pmbl.differentiate(pmbl.differentiate(phi, 'y'), 'y')
-laphi3 = pmbl.differentiate(pmbl.differentiate(phi3, 'x'), 'x') + \
-         pmbl.differentiate(pmbl.differentiate(phi3, 'y'), 'y')
-f1_expr = c * phi - (1 + s) / epsilon**2 * laphi + 1 / epsilon**2 * laphi3
-f2_expr = (phi3 - (1 + s) * phi) / epsilon**2
-
-def f1_func(x, y):
-    return pmbl.evaluate(f1_expr, {"x": x, "y": y})
-
-def f2_func(x, y):
-    return pmbl.evaluate(f2_expr, {"x": x, "y": y})
-
-def initial_phi(x, y):
-    return pmbl.evaluate(phi, {"x": x, "y": y})
-# }}}
-# {{{ manufactured rhs such that the solution is phi given above
-
-lalaphi = pmbl.differentiate(pmbl.differentiate(laphi, 'x'), 'x') + \
-        pmbl.differentiate(pmbl.differentiate(laphi, 'y'), 'y')
-mu      = - epsilon * laphi - phi / epsilon + phi3 / epsilon
-
-rhs1_expr = lalaphi - b * laphi + c * phi
-rhs2_expr = mu / epsilon + laphi - b * phi
-
-def rhs1_func(x, y):
-    return pmbl.evaluate(rhs1_expr, {"x": x, "y": y})
-
-def rhs2_func(x, y):
-    return pmbl.evaluate(rhs2_expr, {"x": x, "y": y})
-# }}}
-
-# }}}
-
-# {{{ a kernel class for G0
-# TODO: will the expressions work when lambda is complex?
-# (may need conversion from 1j to var("I"))
-from sumpy.kernel import ExpressionKernel
-
-
-class ShidongKernel(ExpressionKernel):
-    init_arg_names = ("dim", "lambda1", "lambda2")
-
-    def __init__(self, dim=None, lambda1=0., lambda2=1.):
-        """
-        :arg lambda1,lambda2: The roots of the quadratic equation w.r.t
-             laplacian.
-         """
-        # Assert against repeated roots.
-        if abs(lambda1**2 - lambda2**2) < 1e-9:
-            raise RuntimeError("illposed since input roots are too close")
-
-# Based on http://mathworld.wolfram.com/ModifiedBesselFunctionoftheSecondKind.html
-        if dim == 2:
-            r = pymbolic_real_norm_2(make_sym_vector("d", dim))
-            expr    = var("hankel_1")(0, var("I") * lambda1 * r) - \
-                        var("hankel_1")(0, var("I") * lambda2 * r)
-            scaling = 1. / (4. * var("I") * (lambda1**2 - lambda2**2))
-        else:
-            raise RuntimeError("unsupported dimensionality")
-
-        ExpressionKernel.__init__(
-            self,
-            dim,
-            expression=expr,
-            scaling=scaling,
-            is_complex_valued=True)
-
-        self.lambda1 = lambda1
-        self.lambda2 = lambda2
-
-    def __getinitargs__(self):
-        return (self._dim, self.lambda1, self.lambda2)
-
-    def update_persistent_hash(self, key_hash, key_builder):
-        key_hash.update(type(self).__name__.encode("utf8"))
-        key_builder.rec(key_hash, (self._dim, self.lambda1, self.lambda2))
-
-    def __repr__(self):
-        if self._dim is not None:
-            return "ShdgKnl%dD(%f, %f)" % (self._dim, self.lambda1,
-                                           self.lambda2)
-        else:
-            return "ShdgKnl(%f, %f)" % (self.lambda1, self.lambda2)
-
-    def prepare_loopy_kernel(self, loopy_knl):
-        from sumpy.codegen import (bessel_preamble_generator, bessel_mangler)
-        loopy_knl = lp.register_function_manglers(loopy_knl, [bessel_mangler])
-        loopy_knl = lp.register_preamble_generators(
-            loopy_knl, [bessel_preamble_generator])
-        return loopy_knl
-
-    def get_args(self):
-        k_dtype = np.complex128
-        return [
-            KernelArgument(
-                loopy_arg=lp.ValueArg("shidong_kernel", k_dtype), )
-        ]
-
-    mapper_method = "map_shidong_kernel"
-
-
-# }}}
-
-# {{{ extended kernel getters
-class HankelBasedKernel(ExpressionKernel):
-    def prepare_loopy_kernel(self, loopy_knl):
-        from sumpy.codegen import (bessel_preamble_generator, bessel_mangler)
-        loopy_knl = lp.register_function_manglers(loopy_knl, [bessel_mangler])
-        loopy_knl = lp.register_preamble_generators(
-            loopy_knl, [bessel_preamble_generator])
-
-        return loopy_knl
-
-
-def get_extkernel_for_G0(lambda1, lambda2):
-    from sumpy.symbolic import pymbolic_real_norm_2
-    from pymbolic.primitives import make_sym_vector
-    from pymbolic import var
-
-    d = make_sym_vector("d", 3)
-    # r2 = pymbolic_real_norm_2(d[:-1])
-    r3 = pymbolic_real_norm_2(d)
-    #expr = var("hankel_1")(0, var("I") * lambda1 * r2
-    #        + var("I") * d[-1]**2) \
-    #        - var("hankel_1")(0, var("I") * lambda2 * r2
-    #        + var("I") * d[-1]**2)
-    expr = var("hankel_1")(0, var("I") * lambda1 * r3) \
-            - var("hankel_1")(0, var("I") * lambda2 * r3)
-    scaling = 1. / (4. * var("I") * (lambda1**2 - lambda2**2))
-
-    return HankelBasedKernel(
-        dim=3, expression=expr, scaling=scaling, is_complex_valued=True)
-
-
-def get_extkernel_for_G1(lamb):
-    from sumpy.symbolic import pymbolic_real_norm_2
-    from pymbolic.primitives import make_sym_vector
-    from pymbolic import var
-
-    d = make_sym_vector("d", 3)
-    #r2 = pymbolic_real_norm_2(d[:-1])
-    r3 = pymbolic_real_norm_2(d)
-    #expr = var("hankel_1")(0, var("I") * lamb * r2 + var("I") * d[-1]**2)
-    expr = var("hankel_1")(0, var("I") * lamb * r3)
-    scaling = -var("I") / 4.
-
-    return HankelBasedKernel(
-        dim=3, expression=expr, scaling=scaling, is_complex_valued=True)
-
-
-# }}}
-
-
-def main():
-    import logging
-    logging.basicConfig(level=logging.INFO)
-    logger = logging.getLogger(__name__)
-
-    cl_ctx = cl.create_some_context()
-    queue = cl.CommandQueue(cl_ctx)
-
-    # {{{ volume mesh generation
-
-    if shape == Geometry.RegularRectangle:
-        from meshmode.mesh.generation import generate_regular_rect_mesh
-        ext = 1.
-        h = 0.05
-        mesh = generate_regular_rect_mesh(
-            a=(-ext / 2., -ext / 2.),
-            b=(ext / 2., ext / 2.),
-            n=(int(ext / h), int(ext / h)))
-    elif shape == Geometry.Circle:
-        from meshmode.mesh.io import generate_gmsh, FileSource
-        h = 0.05
-        mesh = generate_gmsh(
-            FileSource("circle.step"),
-            2,
-            order=mesh_order,
-            force_ambient_dim=2,
-            other_options=[
-                "-string", "Mesh.CharacteristicLengthMax = %g;" % h
-            ])
-    else:
-        RuntimeError("unsupported geometry")
-
-    logger.info("%d elements" % mesh.nelements)
-
-    # }}}
-
-    # {{{ discretization and connections
-
-    vol_discr = Discretization(
-        cl_ctx, mesh,
-        InterpolatoryQuadratureSimplexGroupFactory(vol_quad_order))
-
-    from meshmode.mesh import BTAG_ALL
-    from meshmode.discretization.connection import make_face_restriction
-    pre_density_connection = make_face_restriction(
-        vol_discr,
-        InterpolatoryQuadratureSimplexGroupFactory(bdry_quad_order), BTAG_ALL)
-    pre_density_discr = pre_density_connection.to_discr
-
-    from pytential.qbx import (QBXLayerPotentialSource,
-                               QBXTargetAssociationFailedException)
-
-    qbx, _ = QBXLayerPotentialSource(
-        pre_density_discr,
-        fine_order=bdry_ovsmp_quad_order,
-        qbx_order=qbx_order,
-        fmm_order=fmm_order,
-        expansion_disks_in_tree_have_extent=True, ).with_refinement()
-
-    density_discr = qbx.density_discr
-
-    # composition of connetions
-    # vol_discr --> pre_density_discr --> density_discr
-    # via ChainedDiscretizationConnection
-
-    #    from meshmode.mesh.generation import ellipse, make_curve_mesh
-    #    from functools import partial
-    #
-    #    mesh = make_curve_mesh(
-    #                partial(ellipse, 2),
-    #                np.linspace(0, 1, nelements+1),
-    #                mesh_order)
-    #
-    #    pre_density_discr = Discretization(
-    #            cl_ctx, mesh,
-    #            InterpolatoryQuadratureSimplexGroupFactory(bdry_quad_order))
-    #
-    #    from pytential.qbx import (
-    #            QBXLayerPotentialSource, QBXTargetAssociationFailedException)
-    #    qbx, _ = QBXLayerPotentialSource(
-    #            pre_density_discr, fine_order=bdry_ovsmp_quad_order, qbx_order=qbx_order,
-    #            fmm_order=fmm_order,
-    #            expansion_disks_in_tree_have_extent=True,
-    #            ).with_refinement()
-    #    density_discr = qbx.density_discr
-
-    # }}}
-
-    # {{{ visualizers
-
-    from meshmode.discretization.visualization import make_visualizer
-    vol_vis = make_visualizer(queue, vol_discr, 20)
-    bdry_vis = make_visualizer(queue, density_discr, 20)
-
-    # }}}
-
-    # {{{ setup operator and potentials
-
-    from pytential.symbolic.pde.cahn_hilliard import CahnHilliardOperator
-    chop = CahnHilliardOperator(b=b, c=c)
-
-    unk = chop.make_unknown("sigma")
-    bound_op = bind(qbx, chop.operator(unk))
-
-    yukawa_2d_in_3d_kernel_1 = get_extkernel_for_G1(chop.lambdas[0])
-    shidong_2d_in_3d_kernel = get_extkernel_for_G0(chop.lambdas[0],
-                                                   chop.lambdas[1])
-
-    from sumpy.qbx import LayerPotential
-    from sumpy.expansion.local import LineTaylorLocalExpansion
-
-    layer_pot_v0f1 = LayerPotential(cl_ctx, [
-        LineTaylorLocalExpansion(shidong_2d_in_3d_kernel, order=vol_qbx_order)
-    ])
-    layer_pot_v1f1 = LayerPotential(cl_ctx, [
-        LineTaylorLocalExpansion(
-            yukawa_2d_in_3d_kernel_1, order=vol_qbx_order)
-        ])
-
-    #print(layer_pot_v0f1.get_kernel())
-    # print(layer_pot_v1f1.get_kernel())
-    # }}}
-
-    # {{{ setup for volume integral
-
-    vol_x = vol_discr.nodes().with_queue(queue)
-
-    # target points
-    targets = cl.array.zeros(queue, (3, ) + vol_x.shape[1:], vol_x.dtype)
-    targets[:2] = vol_x
-
-    # expansion centers
-    center_dist = 0.125 * np.min(
-        cl.clmath.sqrt(
-            bind(vol_discr, p.area_element(mesh.ambient_dim, mesh.dim))(queue))
-        .get())
-    centers = make_obj_array(
-        [ci.copy().reshape(vol_discr.nnodes) for ci in targets])
-    centers[2][:] = center_dist
-    print(center_dist)
-
-    # source points
-    # TODO: use over sampled source points?
-    sources = cl.array.zeros(queue, (3, ) + vol_x.shape[1:], vol_x.dtype)
-    sources[:2] = vol_x
-
-    #np.set_printoptions(threshold=np.inf)
-    #print(sources)
-
-    vol_weights = bind(
-        vol_discr,
-        p.area_element(mesh.ambient_dim, mesh.dim) * p.QWeight())(queue)
-
-    print("volume: %d source nodes, %d target nodes" % (vol_discr.nnodes,
-                                                        vol_discr.nnodes))
-
-    # }}}
-
-    # {{{ prepare for time stepping
-    timestep_number = 0
-    time = 0
-
-    def get_vts_filename(tmstp_num):
-        return "solution-" + '{0:03}'.format(tmstp_num) + ".vts"
-
-    output_vts_filename = get_vts_filename(timestep_number)
-    # }}}
-
-    # {{{ [[TIME STEPPING]]
-    while time < final_t:
-        timestep_number += 1
-        time += delta_t
-        output_vts_filename = get_vts_filename(timestep_number)
-
-        # get f1 to compute strengths
-        f1 = rhs1_func(vol_x[0], vol_x[1])
-        f2 = rhs2_func(vol_x[0], vol_x[1])
-
-        evt, (vol_pot_v0f1, ) = layer_pot_v0f1(
-            queue,
-            targets=targets.reshape(3, vol_discr.nnodes),
-            centers=centers,
-            sources=sources.reshape(3, vol_discr.nnodes),
-            strengths=((vol_weights * f1).reshape(vol_discr.nnodes), ))
-
-        evt, (vol_pot_v1f1, ) = layer_pot_v1f1(
-            queue,
-            targets=targets.reshape(3, vol_discr.nnodes),
-            centers=centers,
-            sources=sources.reshape(3, vol_discr.nnodes),
-            strengths=((vol_weights * f1).reshape(vol_discr.nnodes), ))
-
-        plot_volume_potentials = True
-        if plot_volume_potentials:
-
-            evt, (vol_pot_v0id, ) = layer_pot_v0f1(
-                queue,
-                targets=targets.reshape(3, vol_discr.nnodes),
-                centers=centers,
-                sources=sources.reshape(3, vol_discr.nnodes),
-                strengths=((vol_weights * 1).reshape(vol_discr.nnodes), ))
-
-            evt, (vol_pot_v1id, ) = layer_pot_v1f1(
-                queue,
-                targets=targets.reshape(3, vol_discr.nnodes),
-                centers=centers,
-                sources=sources.reshape(3, vol_discr.nnodes),
-                strengths=((vol_weights * 1).reshape(vol_discr.nnodes), ))
-
-            #print(vol_pot_v1f1.get().real[0:20])
-            #print(vol_pot_v1f1.get().real[0:20])
-            #print(vol_pot_v0id.get().real[0:20])
-            #print(vol_pot_v1id.get().real[0:20])
-
-            clean_file("ch-volume.vtu")
-            vol_vis.write_vtk_file("ch-volume.vtu", [
-                ("vol_pot_v0f1", vol_pot_v0f1), ("vol_pot_v1f1", vol_pot_v1f1),
-                ("vol_pot_v0id", vol_pot_v0id), ("vol_pot_v1id", vol_pot_v1id)
-            ])
-
-    # {{{ fix rhs and solve
-
-    nodes = density_discr.nodes().with_queue(queue)
-
-    def g(xvec):
-        x, y = xvec
-        return cl.clmath.cos(5 * cl.clmath.atan2(y, x))
-
-    bc = sym.make_obj_array([
-        # TODO: Realistic BC
-        5 + g(nodes),
-        3 - g(nodes),
-    ])
-
-    from pytential.solve import gmres
-    gmres_result = gmres(
-        bound_op.scipy_op(queue, "sigma", dtype=np.complex128),
-        bc,
-        tol=1e-8,
-        progress=True,
-        stall_iterations=0,
-        hard_failure=True)
-
-    sigma = gmres_result.solution
-
-    # }}}
-
-    # }}}
-
-    qbx_stick_out = qbx.copy(target_association_tolerance=0.05)
-
-    immersed_visualization = False
-    if immersed_visualization == True:
-        from sumpy.visualization import FieldPlotter
-        fplot = FieldPlotter(np.zeros(2), extent=1.5, npoints=500)
-
-        targets = cl.array.to_device(queue, fplot.points)
-
-        qbx_stick_out = qbx.copy(target_stick_out_factor=0.05)
-        indicator_qbx = qbx_stick_out.copy(qbx_order=2)
-
-        from sumpy.kernel import LaplaceKernel
-        ones_density = density_discr.zeros(queue)
-        ones_density.fill(1)
-        indicator = bind((indicator_qbx, PointsTarget(targets)),
-                         sym.D(LaplaceKernel(2), sym.var("sigma")))(
-                             queue, sigma=ones_density).get()
-
-        clean_file("failed-targets.vts")
-        clean_file("potential.vts")
-
-        try:
-            u, v = bind((qbx_stick_out, PointsTarget(targets)),
-                        chop.representation(unk))(
-                            queue, sigma=sigma)
-        except QBXTargetAssociationFailedException as e:
-            fplot.write_vtk_file("failed-targets.vts",
-                                 [("failed", e.failed_target_flags.get(queue))])
-            raise
-        u = u.get().real
-        v = v.get().real
-
-        #fplot.show_scalar_in_mayavi(fld_in_vol.real, max_val=5)
-        fplot.write_vtk_file("potential.vts", [
-            ("u", u),
-            ("v", v),
-            ("indicator", indicator),
-        ])
-
-    # }}}
-
-    # {{{ check pde
-
-    def check_pde():
-        from sumpy.point_calculus import CalculusPatch
-        vec_h = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7]
-        #vec_h = [1e-1]
-        vec_ru = []
-        vec_rv = []
-        vec_rp = []
-        vec_rm = []
-        vec_rf1 = []
-        vec_rf2 = []
-        vec_rutld = []
-        vec_rvtld = []
-        vec_rv1id = []  # V1[id]
-        vec_rv0id = []  # V0[id]
-        for dx in vec_h:
-            cp = CalculusPatch(np.zeros(2), order=4, h=dx)
-            targets = cl.array.to_device(queue, cp.points)
-
-            u, v = bind((qbx, PointsTarget(targets)),
-                        chop.representation(unk))(
-                            queue, sigma=sigma)
-
-            u = u.get().real
-            v = v.get().real
-
-            lap_u = -(v - chop.b * u)
-
-            # Check for homogeneous PDEs for u and v
-            vec_ru.append(
-                la.norm(
-                    cp.laplace(lap_u) - chop.b * cp.laplace(u) +
-                    chop.c * u))
-            vec_rv.append(la.norm(v + cp.laplace(u) - chop.b * u))
-
-            # Check for inhomogeneous PDEs for phi and mu
-
-            targets_in_3d = cl.array.zeros(queue, (3, ) + cp.points.shape[1:],
-                                           vol_x.dtype)
-            targets_in_3d[:2] = cp.points
-
-            #center_dist = 0.125*np.min(
-            #        cl.clmath.sqrt(
-            #            bind(vol_discr,
-            #                p.area_element(mesh.ambient_dim, mesh.dim))
-            #            (queue)).get())
-
-            centers_in_3d = make_obj_array([ci.copy() for ci in targets_in_3d])
-            centers_in_3d[2][:] = center_dist
-
-            evt, (v0f1, ) = layer_pot_v0f1(
-                queue,
-                targets=targets_in_3d,
-                centers=centers_in_3d,
-                sources=sources.reshape(3, vol_discr.nnodes),
-                strengths=((vol_weights * f1).reshape(vol_discr.nnodes), ))
-            v0f1 = v0f1.get().real
-            print(v0f1)
-
-            evt, (v0id, ) = layer_pot_v0f1(
-                queue,
-                targets=targets_in_3d,
-                centers=centers_in_3d,
-                sources=sources.reshape(3, vol_discr.nnodes),
-                strengths=((vol_weights * 1).reshape(vol_discr.nnodes), ))
-            v0id = v0id.get().real
-            print(v0id)
-
-            evt, (v1f1, ) = layer_pot_v1f1(
-                queue,
-                targets=targets_in_3d,
-                centers=centers_in_3d,
-                sources=sources.reshape(3, vol_discr.nnodes),
-                strengths=((vol_weights * f1).reshape(vol_discr.nnodes), ))
-            v1f1 = v1f1.get().real
-            print(v1f1)
-
-            evt, (v1id, ) = layer_pot_v1f1(
-                queue,
-                targets=targets_in_3d,
-                centers=centers_in_3d,
-                sources=sources.reshape(3, vol_discr.nnodes),
-                strengths=((vol_weights * 1).reshape(vol_discr.nnodes), ))
-            v1id = v1id.get().real
-            print(v1id)
-
-            # FIXME: Why this is equal to 0 (instead of 1)
-            vec_rv1id.append(
-                la.norm(cp.laplace(v1id) - chop.lambdas[1]**2 * v1id
-                    - 1))
-            vec_rv0id.append(
-                la.norm(cp.laplace(v0id) - chop.lambdas[1]**2 * v0id - v1id))
-
-            f14ck = cl.array.to_device(queue,
-                                       f1_func(cp.points[0], cp.points[1]))
-            f24ck = cl.array.to_device(queue,
-                                       f2_func(cp.points[0], cp.points[1]))
-            phi_init = cl.array.to_device(queue,
-                                          initial_phi(cp.points[0],
-                                                      cp.points[1]))
-
-            f14ck = f14ck.get().real
-            f24ck = f24ck.get().real
-            phi_init = phi_init.get().real
-            vec_rf1.append(
-                la.norm(chop.c * phi_init -
-                        (1 + s) / epsilon**2 * cp.laplace(phi_init) +
-                        1. / epsilon**2 * cp.laplace(phi_init**3) - f14ck))
-            vec_rf2.append(
-                la.norm((phi_init**3 - (1 + s) * phi_init) / epsilon**2 -
-                        f24ck))
-
-            utild = v0f1
-            vtild = f24ck - v1f1 + chop.lambdas[0]**2 * v0f1
-            lap_utild = -vtild + chop.b * utild + f24ck
-            # FIXME: Not passing this check. (bugs in volume integral?)
-            vec_rutld.append(
-                la.norm(
-                    cp.laplace(lap_utild) - chop.b * lap_utild + chop.c * utild
-                    - f14ck))
-            vec_rvtld.append(
-                la.norm(vtild + cp.laplace(utild) - chop.b * utild - f24ck))
-
-            ph = utild + u
-            mu = epsilon * (vtild + v)
-            lap_ph = f24ck + chop.b * ph - mu / epsilon
-
-            vec_rp.append(
-                la.norm(
-                    cp.laplace(lap_ph) - chop.b * cp.laplace(ph) + chop.c * ph
-                    - f14ck))
-            vec_rm.append(
-                la.norm(mu / epsilon + cp.laplace(ph) - chop.b * ph - f24ck))
-
-        from tabulate import tabulate
-        # overwrite if file exists
-        with open('check_pde.dat', 'w') as f:
-            print(
-                "Residuals of PDE and numerical vs. symbolic differentiation:",
-                file=f)
-            print(
-                tabulate([
-                    ["h"] + vec_h,
-                    ["residual_u"] + vec_ru,
-                    ["residual_v"] + vec_rv,
-                    ["residual_f1"] + vec_rf1,
-                    ["residual_f2"] + vec_rf2,
-                    ["residual_v1id"] + vec_rv1id,
-                    ["residual_v0id"] + vec_rv0id,
-                    ["residual_u_tld"] + vec_rutld,
-                    ["residual_v_tld"] + vec_rvtld,
-                    ["residual_phi"] + vec_rp,
-                    ["residual_mu"] + vec_rm,
-                ]),
-                file=f)
-        1 / 0
-
-    # check_pde()
-
-    # }}}
-
-
-if __name__ == "__main__":
-    main()
-- 
GitLab


From 1e178c8e7f2529727f4d1bda450a282963fbe3e6 Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Sat, 26 Jan 2019 22:57:11 -0600
Subject: [PATCH 26/30] Clean up cahn-hilliard operator

---
 pytential/symbolic/pde/biharmonic.py | 132 ---------------------------
 1 file changed, 132 deletions(-)
 delete mode 100644 pytential/symbolic/pde/biharmonic.py

diff --git a/pytential/symbolic/pde/biharmonic.py b/pytential/symbolic/pde/biharmonic.py
deleted file mode 100644
index 2874bc6d..00000000
--- a/pytential/symbolic/pde/biharmonic.py
+++ /dev/null
@@ -1,132 +0,0 @@
-from __future__ import division, absolute_import, print_function
-
-__copyright__ = "Copyright (C) 2018 Xiaoyu Wei"
-
-__license__ = """
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-"""
-
-__doc__ = """
-.. autoclass:: BiharmonicOperator
-"""
-
-import numpy as np
-from pytential.symbolic.pde.scalar import L2WeightedPDEOperator
-from pytential import sym
-from functools import partial
-
-from pymbolic.primitives import make_common_subexpression as cse
-
-
-class BiharmonicOperator(L2WeightedPDEOperator):
-    """Biharmonic operator, subject to boundary conditions on
-    solution values and normal derivatives.
-    """
-
-    def __init__(self, dim, loc_sign, x_0=None,
-            use_l2_weighting=False, kernel_arguments=None):
-        """
-        :arg loc_sign: +1 for exterior, -1 for interior.
-        :arg x_0: origin x_0 for the Almansi representation (default to be 0).
-        """
-
-        assert loc_sign in [-1, 1]
-        assert dim in [2, 3]
-
-        from sumpy.kernel import LaplaceKernel
-        kernel = LaplaceKernel(dim)
-
-        if x_0 is None:
-            x_0 = np.zeros(dim)
-        elif not isinstance(x_0, np.ndarray):
-            x_0 = np.array(x_0)
-        assert len(x_0) == dim
-        self.x_0 = x_0
-
-        if kernel_arguments is None:
-            kernel_arguments = {}
-        self.kernel_arguments = kernel_arguments
-
-        self.loc_sign = loc_sign
-
-        L2WeightedPDEOperator.__init__(self, kernel, use_l2_weighting)
-
-    def make_unknown(self, name):
-        return sym.make_sym_vector(name, 2)
-
-    def S(self, density, qbx_forced_limit):
-        return sym.S(self.kernel, density,
-                kernel_arguments=self.kernel_arguments,
-                qbx_forced_limit=qbx_forced_limit)
-
-    def D(self, density, qbx_forced_limit):
-        return sym.D(self.kernel, density,
-                kernel_arguments=self.kernel_arguments,
-                qbx_forced_limit=qbx_forced_limit)
-
-    def Sp(self, density, qbx_forced_limit):
-        return sym.Sp(self.kernel, density,
-                dim=self.kernel.dim,
-                qbx_forced_limit=qbx_forced_limit)
-
-    def Dp(self, density, qbx_forced_limit):
-        return sym.Dp(self.kernel, density,
-                dim=self.kernel.dim,
-                qbx_forced_limit=qbx_forced_limit)
-
-    def representation(self, unknown, qbx_forced_limit=None):
-        sig1, sig2 = unknown
-        S = partial(self.S, qbx_forced_limit=qbx_forced_limit)
-        D = partial(self.D, qbx_forced_limit=qbx_forced_limit)
-
-        r = sym.nodes(self.kernel.dim).as_vector() - self.x_0
-        rho_sq = cse(np.dot(r, r), "rho_sq")
-
-        def grad(operand):
-            return sym.grad(ambient_dim=self.kernel.dim, operand=operand)
-
-        return sym.make_obj_array([
-            rho_sq * D(sig1) + S(sig2),
-            2 * r * D(sig1) + rho_sq * grad(D(sig1)) + grad(S(sig2)),
-            ])
-
-    def operator(self, unknown):
-        sig1, sig2 = unknown
-        S = partial(self.S, qbx_forced_limit='avg')
-        D = partial(self.D, qbx_forced_limit='avg')
-        Sp = partial(self.Sp, qbx_forced_limit='avg')
-        Dp = partial(self.Dp, qbx_forced_limit='avg')
-
-        r = sym.nodes(self.kernel.dim).as_vector() - self.x_0
-        rho_sq = cse(np.dot(r, r), "rho_sq")
-
-        nrml = sym.normal(self.kernel.dim).as_vector()
-        drhosq_dn = cse(2 * np.dot(r, nrml), "drhosq_dn")
-
-        d = 0.5 * self.loc_sign * sym.make_obj_array([
-            rho_sq * sig1,
-            drhosq_dn * sig1 - sig2,
-            ])
-
-        a = sym.make_obj_array([ # noqa
-            rho_sq * D(sig1)                        +  S(sig2),
-            drhosq_dn * D(sig1) + rho_sq * Dp(sig1) +  Sp(sig2)
-            ])
-
-        return d + a
-- 
GitLab


From e79fc553e7d524d18429b26701504a237b615439 Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Sat, 26 Jan 2019 23:01:09 -0600
Subject: [PATCH 27/30] Revert timing future related hacks

---
 pytential/qbx/fmm.py | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/pytential/qbx/fmm.py b/pytential/qbx/fmm.py
index d55e5128..badf6300 100644
--- a/pytential/qbx/fmm.py
+++ b/pytential/qbx/fmm.py
@@ -515,11 +515,7 @@ def drive_fmm(expansion_wrangler, src_weights, timing_data=None):
 
     # {{{ wrangle qbx expansions
 
-    tmpres = wrangler.form_global_qbx_locals(src_weights)
-    if isinstance(tmpres, tuple):
-        qbx_expansions, timing_future = tmpres
-    else:
-        qbx_expansions = tmpres
+    qbx_expansions, timing_future = wrangler.form_global_qbx_locals(src_weights)
 
     recorder.add("form_global_qbx_locals", timing_future)
 
@@ -537,11 +533,7 @@ def drive_fmm(expansion_wrangler, src_weights, timing_data=None):
 
     qbx_expansions = qbx_expansions + local_result
 
-    tmpres = wrangler.eval_qbx_expansions(qbx_expansions)
-    if isinstance(tmpres, tuple):
-        qbx_potentials, timing_future = tmpres
-    else:
-        qbx_potentials = tmpres
+    qbx_potentials, timing_future = wrangler.eval_qbx_expansions(qbx_expansions)
 
     recorder.add("eval_qbx_expansions", timing_future)
 
-- 
GitLab


From aac21935d81f9152ef6546587a72dbb81597eed1 Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Sat, 26 Jan 2019 23:12:09 -0600
Subject: [PATCH 28/30] Flake8 fixes

---
 pytential/symbolic/pde/cahn_hilliard.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/pytential/symbolic/pde/cahn_hilliard.py b/pytential/symbolic/pde/cahn_hilliard.py
index b9c97516..00d17038 100644
--- a/pytential/symbolic/pde/cahn_hilliard.py
+++ b/pytential/symbolic/pde/cahn_hilliard.py
@@ -104,10 +104,9 @@ class CahnHilliardOperator(L2WeightedPDEOperator):
                     1/(lam1**2-lam2**2)
                     * (
                         op_map(sym.S(knl, density, lam=lam1,
-                            qbx_forced_limit=qbx_forced_limit))
-                        -
-                        op_map(sym.S(knl, density, lam=lam2,
-                            qbx_forced_limit=qbx_forced_limit))))
+                               qbx_forced_limit=qbx_forced_limit))
+                        - op_map(sym.S(knl, density, lam=lam2,
+                                 qbx_forced_limit=qbx_forced_limit))))
         else:
             return (
                     op_map(sym.S(knl, density, lam=self.lambdas[i-1],
@@ -173,12 +172,12 @@ class CahnHilliardOperator(L2WeightedPDEOperator):
             raise NotImplementedError("rscale deprecated, use left_precon instead")
 
         if right_precon is None:
-            right_precon = lambda x: x
-            right_precon_inv = right_precon
+            right_precon = lambda x: x  # noqa: E731
+            right_precon_inv = right_precon  # noqa: F841
 
         if left_precon is None:
-            left_precon = lambda x: x
-            left_precon_inv = left_precon
+            left_precon = lambda x: x  # noqa: E731
+            left_precon_inv = left_precon  # noqa: F841
 
         sig1, sig2 = right_precon(unknown)
 
@@ -215,4 +214,3 @@ class CahnHilliardOperator(L2WeightedPDEOperator):
         a = left_precon(a_pre)
 
         return d+a
-
-- 
GitLab


From 0043aba120215a0122d527387c29e4c41c768cde Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Sun, 19 May 2019 16:37:14 -0500
Subject: [PATCH 29/30] Add robin operator

---
 pytential/symbolic/pde/scalar.py | 100 +++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)

diff --git a/pytential/symbolic/pde/scalar.py b/pytential/symbolic/pde/scalar.py
index 840d8044..bebf6045 100644
--- a/pytential/symbolic/pde/scalar.py
+++ b/pytential/symbolic/pde/scalar.py
@@ -307,4 +307,104 @@ class NeumannOperator(L2WeightedPDEOperator):
 # }}}
 
 
+# {{{ robin
+
+class RobinOperator(L2WeightedPDEOperator):
+    """IE operator and field representation for solving Robin boundary
+    value problems with scalar kernels a * u + b * u_n = g (e.g.
+    :class:`sumpy.kernel.LaplaceKernel`)
+
+    .. automethod:: is_unique_only_up_to_constant
+    .. automethod:: representation
+    .. automethod:: operator
+    """
+
+    def __init__(self, kernel,
+            a=1., b=1.,
+            loc_sign,
+            laplace_kernel=0, use_l2_weighting=False,
+            kernel_arguments=None):
+        """
+        :arg a: coefficient for the function value u.
+        :arg b: coefficient for the normal derivative u_n.
+        :arg loc_sign: +1 for exterior, -1 for interior
+        """
+
+        assert a**2 + b**2 > 0
+        assert loc_sign in [-1, 1]
+
+        assert isinstance(kernel, Kernel)
+
+        if kernel_arguments is None:
+            kernel_arguments = {}
+        self.kernel_arguments = kernel_arguments
+
+        self.a = a
+        self.b = b
+        self.loc_sign = loc_sign
+
+        L2WeightedPDEOperator.__init__(self, kernel, use_l2_weighting)
+
+    def is_unique_only_up_to_constant(self):
+        from sumpy.kernel import LaplaceKernel
+        return (
+                isinstance(self.kernel, LaplaceKernel)
+                and self.loc_sign < 0
+                and self.a == 0
+                )
+
+    def representation(self, u, map_potentials=None, qbx_forced_limit=None,
+            **kwargs):
+        sqrt_w = self.get_sqrt_weight()
+        inv_sqrt_w_u = cse(u/sqrt_w)
+
+        if map_potentials is None:
+            def map_potentials(x):  # pylint:disable=function-redefined
+                return x
+
+        kwargs["qbx_forced_limit"] = qbx_forced_limit
+        kwargs["kernel_arguments"] = self.kernel_arguments
+
+        return (1. / (self.a**2 + self.b**2)) * (
+                self.b * map_potentials(
+                    sym.S(self.kernel, inv_sqrt_w_u, **kwargs))
+                -
+                self.a * map_potentials(
+                    sym.D(self.kernel, inv_sqrt_w_u, **kwargs))
+                )
+
+    def operator(self, u):
+        sqrt_w = self.get_sqrt_weight()
+        inv_sqrt_w_u = cse(u/sqrt_w)
+
+        knl = self.kernel
+
+        knl_kwargs = {}
+        knl_kwargs["kernel_arguments"] = self.kernel_arguments
+
+        if self.is_unique_only_up_to_constant():
+            # The interior Neumann operator in this representation
+            # has a nullspace. The mean of the density must be matched
+            # to the desired solution separately. As is, this operator
+            # returns a mean that is not well-specified.
+
+            amb_dim = self.kernel.dim
+            ones_contribution = (
+                    sym.Ones() * sym.mean(amb_dim, amb_dim-1, inv_sqrt_w_u))
+        else:
+            ones_contribution = 0
+
+        return (-self.loc_sign * 0.5 * u
+                + (1. / (self.a**2 + self.b**2)) * sqrt_w * (
+                    self.a * self.b * sym.S(self.kernel, inv_sqrt_w_u, **kwargs)
+                    - self.a**2 * sym.D(self.kernel, inv_sqrt_w_u, **kwargs)
+                    + self.b**2 * sym.Sp(
+                        knl, inv_sqrt_w_u, qbx_forced_limit="avg", **knl_kwargs)
+                    - self.b * self.a * sym.Dp(
+                        knl, inv_sqrt_w_u, qbx_forced_limit="avg", **knl_kwargs)
+                    + ones_contribution
+                    ))
+
+# }}}
+
 # vim: foldmethod=marker
-- 
GitLab


From 335cf09cbd1d9de77cabf669e22875c136e758b3 Mon Sep 17 00:00:00 2001
From: xywei <wxy0516@gmail.com>
Date: Mon, 20 May 2019 10:56:02 -0500
Subject: [PATCH 30/30] Bug fix

---
 pytential/symbolic/pde/scalar.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytential/symbolic/pde/scalar.py b/pytential/symbolic/pde/scalar.py
index bebf6045..d3d2226e 100644
--- a/pytential/symbolic/pde/scalar.py
+++ b/pytential/symbolic/pde/scalar.py
@@ -320,8 +320,8 @@ class RobinOperator(L2WeightedPDEOperator):
     """
 
     def __init__(self, kernel,
-            a=1., b=1.,
             loc_sign,
+            a=1., b=1.,
             laplace_kernel=0, use_l2_weighting=False,
             kernel_arguments=None):
         """
-- 
GitLab