From edc005b0d3cdfb819c3e8995a1f21ce2fef1f2cb Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 27 Oct 2015 17:41:18 -0500
Subject: [PATCH] Get opposite-face connection building to work

---
 meshmode/discretization/connection.py | 213 ++++++++++++++++++--------
 test/test_meshmode.py                 |  81 ++++++++++
 2 files changed, 230 insertions(+), 64 deletions(-)

diff --git a/meshmode/discretization/connection.py b/meshmode/discretization/connection.py
index bfb54724..17433bdc 100644
--- a/meshmode/discretization/connection.py
+++ b/meshmode/discretization/connection.py
@@ -1,8 +1,4 @@
-from __future__ import division
-from __future__ import absolute_import
-import six
-from six.moves import range
-from six.moves import zip
+from __future__ import division, print_function, absolute_import
 
 __copyright__ = "Copyright (C) 2014 Andreas Kloeckner"
 
@@ -26,6 +22,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
+import six
+from six.moves import range, zip
+
 import numpy as np
 import numpy.linalg as la
 import modepy as mp
@@ -660,17 +659,30 @@ def make_face_restriction(discr, group_factory, boundary_tag):
 # {{{ opposite-face connection
 
 def _make_cross_face_batches(
-        queue, bdry_discr,
+        queue, vol_discr, bdry_discr,
         i_tgt_grp, i_src_grp,
         i_face_tgt,
-        adj_grp, adj_grp_tgt_flags,
-        vbc_tgt_grp_face_batch,
-        src_grp_batch_and_el_lookup):
+        adj_grp,
+        vbc_tgt_grp_face_batch, src_grp_el_lookup):
+
+    # {{{ index wrangling
+
+    # Assert that the adjacency group and the restriction
+    # interpolation batch and the adjacency group have the same
+    # element ordering.
+
+    adj_grp_tgt_flags = adj_grp.element_faces == i_face_tgt
+
+    assert (
+            np.array_equal(
+                adj_grp.elements[adj_grp_tgt_flags],
+                vbc_tgt_grp_face_batch.from_element_indices
+                .get(queue=queue)))
 
     # find to_element_indices
 
     to_bdry_element_indices = (
-            vbc_tgt_grp_face_batch.from_element_indices
+            vbc_tgt_grp_face_batch.to_element_indices
             .get(queue=queue))
 
     # find from_element_indices
@@ -678,17 +690,44 @@ def _make_cross_face_batches(
     from_vol_element_indices = adj_grp.neighbors[adj_grp_tgt_flags]
     from_element_faces = adj_grp.neighbor_faces[adj_grp_tgt_flags]
 
-    from_bdry_element_indices = \
-            src_grp_batch_and_el_lookup.iel_lookup[
-                    from_vol_element_indices, from_element_faces]
+    from_bdry_element_indices = src_grp_el_lookup[
+            from_vol_element_indices, from_element_faces]
+
+    # }}}
+
+    # {{{ visualization (for debugging)
+
+    if 0:
+        print("TVE", adj_grp.elements[adj_grp_tgt_flags])
+        print("TBE", to_bdry_element_indices)
+        print("FVE", from_vol_element_indices)
+        from meshmode.mesh.visualization import draw_2d_mesh
+        import matplotlib.pyplot as pt
+        draw_2d_mesh(vol_discr.mesh, draw_element_numbers=True,
+                set_bounding_box=True,
+                draw_vertex_numbers=False,
+                draw_face_numbers=True,
+                fill=None)
+        pt.figure()
+
+        draw_2d_mesh(bdry_discr.mesh, draw_element_numbers=True,
+                set_bounding_box=True,
+                draw_vertex_numbers=False,
+                draw_face_numbers=True,
+                fill=None)
+
+        pt.show()
+    # }}}
 
-    # {{{ invert face map (using Newton)
+    # {{{ invert face map (using Gauss-Newton)
 
     to_bdry_nodes = (
             bdry_discr.groups[i_tgt_grp].view(bdry_discr.nodes())
             .get(queue=queue)
             [:, to_bdry_element_indices])
 
+    tol = 1e3 * np.finfo(to_bdry_nodes.dtype).eps
+
     from_mesh_grp = bdry_discr.mesh.groups[i_src_grp]
     from_grp = bdry_discr.groups[i_src_grp]
 
@@ -717,10 +756,16 @@ def _make_cross_face_batches(
         basis_at_unit_nodes = np.empty((from_nfuncs, nelements, nto_unit_nodes))
 
         for i, f in enumerate(from_grp.basis()):
-            basis_at_unit_nodes[i] = f(unit_nodes)
+            basis_at_unit_nodes[i] = (
+                    f(unit_nodes.reshape(dim, -1))
+                    .reshape(nelements, nto_unit_nodes))
 
         intp_coeffs = np.einsum("fj,jet->fet", from_inv_t_vdm, basis_at_unit_nodes)
 
+        # If we're interpolating 1, we had better get 1 back.
+        one_deviation = np.abs(np.sum(intp_coeffs, axis=0) - 1)
+        assert (one_deviation < tol).all(), np.max(one_deviation)
+
         return np.einsum("fet,aef->aet", intp_coeffs, from_bdry_nodes)
 
     def get_map_jacobian(unit_nodes):
@@ -731,17 +776,20 @@ def _make_cross_face_batches(
                 (dim, from_nfuncs, nelements, nto_unit_nodes))
 
         for i, df in enumerate(from_grp.grad_basis()):
-            df_result = df(unit_nodes)
+            df_result = df(unit_nodes.reshape(dim, -1))
 
             for rst_axis, df_r in enumerate(df_result):
-                dbasis_at_unit_nodes[rst_axis, i] = df_r
+                dbasis_at_unit_nodes[rst_axis, i] = (
+                        df_r.reshape(nelements, nto_unit_nodes))
 
         dintp_coeffs = np.einsum(
                 "fj,rjet->rfet", from_inv_t_vdm, dbasis_at_unit_nodes)
 
         return np.einsum("rfet,aef->raet", dintp_coeffs, from_bdry_nodes)
 
-    if 1:
+    # {{{ test map applier and jacobian
+
+    if 0:
         u = from_unit_nodes
         f = apply_map(u)
         for h in [1e-1, 1e-2]:
@@ -755,25 +803,93 @@ def _make_cross_face_batches(
 
             print(h, la.norm((f_2-f2_2).ravel()))
 
+    # }}}
+
+    # {{{ visualize initial guess
 
+    if 0:
+        import matplotlib.pyplot as pt
+        guess = apply_map(from_unit_nodes)
+        goals = to_bdry_nodes
 
-    print(r[:, 0])
-    print(to_bdry_nodes[:, 0])
+        from meshmode.discretization.visualization import draw_curve
+        draw_curve(bdry_discr)
 
+        pt.plot(guess[0].reshape(-1), guess[1].reshape(-1), "or")
+        pt.plot(goals[0].reshape(-1), goals[1].reshape(-1), "og")
+        pt.plot(from_bdry_nodes[0].reshape(-1), from_bdry_nodes[1].reshape(-1), "o",
+                color="purple")
+        pt.show()
 
     # }}}
 
+    logger.info("make_opposite_face_connection: begin gauss-newton")
 
+    niter = 0
+    while True:
+        resid = apply_map(from_unit_nodes) - to_bdry_nodes
 
+        df = get_map_jacobian(from_unit_nodes)
+        df_inv_resid = np.empty_like(from_unit_nodes)
+        # FIXME: Should look for a way to batch this
+        for e in range(nelements):
+            for t in range(nto_unit_nodes):
+                df_inv_resid[:, e, t], _, _, _ = \
+                        la.lstsq(df[:, :, e, t].T, resid[:, e, t])
 
-    # group by face maps
+        from_unit_nodes = from_unit_nodes - df_inv_resid
+
+        max_resid = np.max(np.abs(resid))
+        logger.debug("gauss-newton residual: %g" % max_resid)
+
+        if max_resid < tol:
+            logger.info("make_opposite_face_connection: gauss-newton: done, "
+                    "final residual: %g" % max_resid)
+            break
+
+        niter += 1
+        if niter > 10:
+            raise RuntimeError("Gauss-Newton (for finding opposite-face reference "
+                    "coordinates) did not converge")
+
+    # }}}
 
-    yield InterpolationBatch(
-            from_group_index=i_src_grp,
-            from_element_indices=None,
-            to_element_indices=None,
-            result_unit_nodes=None,
-            to_element_face=None)
+    # {{{ find groups of from_unit_nodes
+
+    def to_dev(ary):
+        return cl.array.to_device(queue, ary, array_queue=None)
+
+    done_elements = np.zeros(nelements, dtype=np.bool)
+    while True:
+        todo_elements, = np.where(~done_elements)
+        if not len(todo_elements):
+            return
+
+        template_unit_nodes = from_unit_nodes[:, todo_elements[0], :]
+
+        unit_node_dist = np.max(np.max(np.abs(
+                from_unit_nodes[:, todo_elements, :]
+                -
+                template_unit_nodes.reshape(dim, 1, -1)),
+                axis=2), axis=0)
+
+        close_els = todo_elements[unit_node_dist < tol]
+        done_elements[close_els] = True
+
+        unit_node_dist = np.max(np.max(np.abs(
+                from_unit_nodes[:, todo_elements, :]
+                -
+                template_unit_nodes.reshape(dim, 1, -1)),
+                axis=2), axis=0)
+
+        yield InterpolationBatch(
+                from_group_index=i_src_grp,
+                from_element_indices=to_dev(from_bdry_element_indices[close_els]),
+                to_element_indices=to_dev(to_bdry_element_indices[close_els]),
+                result_unit_nodes=template_unit_nodes,
+                to_element_face=None)
+
+    # }}}
 
 
 def _find_ibatch_for_face(vbc_tgt_grp_batches, iface):
@@ -789,38 +905,20 @@ def _find_ibatch_for_face(vbc_tgt_grp_batches, iface):
     return vbc_tgt_grp_face_batch
 
 
-class GroupBatchAndElementLookup(Record):
-    """
-    .. attribute:: ibatch_lookup
-
-        ``element_id_t [from_nelements, from_nfaces]``
-
-    .. attribute:: iel_lookup
-
-        ``element_id_t [from_nelements, from_nfaces]``
-    """
-
-
-def _make_batch_and_el_lookup_table(queue, connection, igrp):
+def _make_el_lookup_table(queue, connection, igrp):
     from_nelements = connection.from_discr.groups[igrp].nelements
     from_nfaces = connection.from_discr.mesh.groups[igrp].nfaces
 
-    ibatch_lookup = np.empty((from_nelements, from_nfaces),
-            dtype=connection.from_discr.mesh.element_id_dtype)
-    ibatch_lookup.fill(-1)
     iel_lookup = np.empty((from_nelements, from_nfaces),
             dtype=connection.from_discr.mesh.element_id_dtype)
     iel_lookup.fill(-1)
 
     for ibatch, batch in enumerate(connection.groups[igrp].batches):
         from_element_indices = batch.from_element_indices.get(queue=queue)
-        ibatch_lookup[from_element_indices, batch.to_element_face] = ibatch
         iel_lookup[from_element_indices, batch.to_element_face] = \
                 batch.to_element_indices.get(queue=queue)
 
-    return GroupBatchAndElementLookup(
-            ibatch_lookup=ibatch_lookup,
-            iel_lookup=iel_lookup)
+    return iel_lookup
 
 
 def make_opposite_face_connection(volume_to_bdry_conn):
@@ -851,7 +949,7 @@ def make_opposite_face_connection(volume_to_bdry_conn):
         groups = [[] for i_tgt_grp in range(ngrps)]
 
         for i_src_grp in range(ngrps):
-            src_grp_batch_and_el_lookup = _make_batch_and_el_lookup_table(
+            src_grp_el_lookup = _make_el_lookup_table(
                     queue, volume_to_bdry_conn, i_src_grp)
 
             for i_tgt_grp in range(ngrps):
@@ -863,26 +961,13 @@ def make_opposite_face_connection(volume_to_bdry_conn):
                     vbc_tgt_grp_face_batch = _find_ibatch_for_face(
                             vbc_tgt_grp_batches, i_face_tgt)
 
-                    # Assert that the adjacency group and the restriction
-                    # interpolation batch and the adjacency group have the same
-                    # element ordering.
-
-                    adj_grp_tgt_flags = adj_grp.element_faces == i_face_tgt
-
-                    assert (
-                            np.array_equal(
-                                adj_grp.elements[adj_grp_tgt_flags],
-                                vbc_tgt_grp_face_batch.from_element_indices
-                                .get(queue=queue)))
-
                     groups[i_tgt_grp].extend(
                         _make_cross_face_batches(
-                            queue, bdry_discr,
+                            queue, vol_discr, bdry_discr,
                             i_tgt_grp, i_src_grp,
                             i_face_tgt,
-                            adj_grp, adj_grp_tgt_flags,
-                            vbc_tgt_grp_face_batch,
-                            src_grp_batch_and_el_lookup))
+                            adj_grp,
+                            vbc_tgt_grp_face_batch, src_grp_el_lookup))
 
     return DiscretizationConnection(
             from_discr=bdry_discr,
diff --git a/test/test_meshmode.py b/test/test_meshmode.py
index dbea7f77..384cbd87 100644
--- a/test/test_meshmode.py
+++ b/test/test_meshmode.py
@@ -156,6 +156,87 @@ def test_boundary_interpolation(ctx_getter, group_factory, boundary_tag,
             or eoc_rec.max_error() < 1e-14)
 
 
+@pytest.mark.parametrize("group_factory", [
+    InterpolatoryQuadratureSimplexGroupFactory,
+    PolynomialWarpAndBlendGroupFactory
+    ])
+@pytest.mark.parametrize(("mesh_name", "dim", "mesh_pars"), [
+    ("blob", 2, [1e-1, 8e-2, 5e-2]),
+    ("warp", 2, [3, 5, 7]),
+    ("warp", 3, [3, 5]),
+    ])
+@pytest.mark.parametrize("dim", [2, 3])
+def test_opposite_face_interpolation(ctx_getter, group_factory,
+        mesh_name, dim, mesh_pars):
+    logging.basicConfig(level=logging.INFO)
+
+    cl_ctx = ctx_getter()
+    queue = cl.CommandQueue(cl_ctx)
+
+    from meshmode.discretization import Discretization
+    from meshmode.discretization.connection import (
+            make_face_restriction, make_opposite_face_connection)
+
+    from pytools.convergence import EOCRecorder
+    eoc_rec = EOCRecorder()
+
+    order = 5
+
+    def f(x):
+        return 0.1*cl.clmath.sin(30*x)
+
+    for mesh_par in mesh_pars:
+        # {{{ get mesh
+
+        if mesh_name == "blob":
+            assert dim == 2
+
+            h = mesh_par
+
+            from meshmode.mesh.io import generate_gmsh, FileSource
+            print("BEGIN GEN")
+            mesh = generate_gmsh(
+                    FileSource("blob-2d.step"), 2, order=order,
+                    force_ambient_dim=2,
+                    other_options=[
+                        "-string", "Mesh.CharacteristicLengthMax = %s;" % h]
+                    )
+            print("END GEN")
+        elif mesh_name == "warp":
+            from meshmode.mesh.generation import generate_warped_rect_mesh
+            mesh = generate_warped_rect_mesh(dim, order=4, n=mesh_par)
+
+            h = 1/mesh_par
+        else:
+            raise ValueError("mesh_name not recognized")
+
+        # }}}
+
+        vol_discr = Discretization(cl_ctx, mesh,
+                group_factory(order))
+        print("h=%s -> %d elements" % (
+                h, sum(mgrp.nelements for mgrp in mesh.groups)))
+
+        bdry_mesh, bdry_discr, bdry_connection = make_face_restriction(
+                vol_discr, group_factory(order),
+                None)
+
+        opp_face = make_opposite_face_connection(bdry_connection)
+
+        bdry_x = bdry_discr.nodes()[0].with_queue(queue)
+        bdry_f = f(bdry_x)
+
+        bdry_f_2 = opp_face(queue, bdry_f)
+
+        err = la.norm((bdry_f-bdry_f_2).get(), np.inf)
+        eoc_rec.add_data_point(h, err)
+
+    print(eoc_rec)
+    assert (
+            eoc_rec.order_estimate() >= order-0.5
+            or eoc_rec.max_error() < 1e-13)
+
+
 def test_element_orientation():
     from meshmode.mesh.io import generate_gmsh, FileSource
 
-- 
GitLab