From fb193428ede4144adf087daf4cb3709c6d47834d Mon Sep 17 00:00:00 2001
From: ellis <eshoag2@illinois.edu>
Date: Sat, 23 Sep 2017 20:35:15 -0500
Subject: [PATCH 01/83] The birth of a new function

---
 grudge/execution.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/grudge/execution.py b/grudge/execution.py
index ace2dc8b..ff14c6f3 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -279,6 +279,10 @@ class ExecutionMapper(mappers.Evaluator,
 
         return conn(self.queue, self.rec(field_expr)).with_queue(self.queue)
 
+    def map_opposite_partition_face_swap(self, op, field_expr):
+        raise NotImplementedError("map_opposite_partition_face_swap")
+        return None
+
     def map_opposite_interior_face_swap(self, op, field_expr):
         dd = op.dd_in
 
-- 
GitLab


From 594737c59adf949b990bc8a864ca4dd8da876b84 Mon Sep 17 00:00:00 2001
From: ellis <eshoag2@illinois.edu>
Date: Sat, 23 Sep 2017 22:03:25 -0500
Subject: [PATCH 02/83] working

---
 grudge/execution.py | 89 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)

diff --git a/grudge/execution.py b/grudge/execution.py
index ff14c6f3..177cd52e 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -281,6 +281,95 @@ class ExecutionMapper(mappers.Evaluator,
 
     def map_opposite_partition_face_swap(self, op, field_expr):
         raise NotImplementedError("map_opposite_partition_face_swap")
+
+        # TODO: Fetch these variables
+        local_mesh = None
+        vol_discr = None
+        group_factory = None
+        TAG_SEND_MESH = 1
+
+        from mpi4py import MPI
+        comm = MPI.COMM_WORLD
+        # FIXME: Assumes rank 0 is a 'central hub' and
+        #           i_part = rank - 1 for all other ranks
+        rank = comm.Get_rank()
+        num_parts = comm.Get_size() - 1
+
+        i_local_part = rank - 1
+        local_bdry_conns = {}
+        for i_remote_part in range(num_parts):
+            if i_local_part == i_remote_part:
+                continue
+            # Mark faces within local_mesh that are connected to remote_mesh
+            from meshmode.discretization.connection import make_face_restriction
+            from meshmode.mesh import BTAG_PARTITION
+            # TODO: May not be necessary to compute every time
+            local_bdry_conns[i_remote_part] =\
+                    make_face_restriction(vol_discr, group_factory,
+                                          BTAG_PARTITION(i_remote_part))
+
+        # Send boundary data
+        send_reqs = []
+        for i_remote_part in range(num_parts):
+            if i_local_part == i_remote_part:
+                continue
+            bdry_nodes = local_bdry_conns[i_remote_part].to_discr.nodes()
+            if bdry_nodes.size == 0:
+                # local_mesh is not connected to remote_mesh; send None
+                send_reqs.append(comm.isend(None,
+                                            dest=i_remote_part+1,
+                                            tag=TAG_SEND_MESH))
+                continue
+
+            # Gather information to send to other ranks
+            local_bdry = local_bdry_conns[i_remote_part].to_discr
+            local_adj_groups = [local_mesh.facial_adjacency_groups[i][None]
+                                for i in range(len(local_mesh.groups))]
+            local_batches = [local_bdry_conns[i_remote_part].groups[i].batches
+                                for i in range(len(local_mesh.groups))]
+            local_to_elem_faces = [[batch.to_element_face for batch in grp_batches]
+                                        for grp_batches in local_batches]
+            local_to_elem_indices = [[batch.to_element_indices.get(queue=self.queue)
+                                            for batch in grp_batches]
+                                        for grp_batches in local_batches]
+
+            local_data = {'bdry_mesh': local_bdry.mesh,
+                          'adj': local_adj_groups,
+                          'to_elem_faces': local_to_elem_faces,
+                          'to_elem_indices': local_to_elem_indices}
+            send_reqs.append(comm.isend(local_data,
+                                        dest=i_remote_part+1,
+                                        tag=TAG_SEND_MESH))
+
+        # Receive boundary data
+        remote_buf = {}
+        for i_remote_part in range(num_parts):
+            if i_local_part == i_remote_part:
+                continue
+            remote_rank = i_remote_part + 1
+            status = MPI.Status()
+            comm.probe(source=remote_rank, tag=TAG_SEND_MESH, status=status)
+            remote_buf[i_remote_part] = np.empty(status.count, dtype=bytes)
+
+        recv_reqs = {}
+        for i_remote_part, buf in remote_buf.items():
+            remote_rank = i_remote_part + 1
+            recv_reqs[i_remote_part] = comm.irecv(buf=buf,
+                                                  source=remote_rank,
+                                                  tag=TAG_SEND_MESH)
+
+        remote_data = {}
+        for i_remote_part, req in recv_reqs.items():
+            status = MPI.Status()
+            remote_data[i_remote_part] = req.wait(status=status)
+            # Free the buffer
+            remote_buf[i_remote_part] = None  # FIXME: Is this a good idea?
+            print('Rank {0}: Received rank {1} data ({2} bytes)'
+                            .format(rank, i_remote_part + 1, status.count))
+
+        for req in send_reqs:
+            req.wait()
+
         return None
 
     def map_opposite_interior_face_swap(self, op, field_expr):
-- 
GitLab


From 5fb75de68507b50de90342bbb5a00baadbf031d5 Mon Sep 17 00:00:00 2001
From: ellis <eshoag2@illinois.edu>
Date: Sun, 24 Sep 2017 21:52:37 -0500
Subject: [PATCH 03/83] Please flake8

---
 grudge/execution.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index 177cd52e..aa6ad232 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -286,7 +286,8 @@ class ExecutionMapper(mappers.Evaluator,
         local_mesh = None
         vol_discr = None
         group_factory = None
-        TAG_SEND_MESH = 1
+        cl_ctx = None
+        TAG_SEND_MESH = 1  # noqa
 
         from mpi4py import MPI
         comm = MPI.COMM_WORLD
@@ -370,6 +371,27 @@ class ExecutionMapper(mappers.Evaluator,
         for req in send_reqs:
             req.wait()
 
+        connections = []
+        for i_remote_part, data in remote_data.items():
+            if data is None:
+                # Local mesh is not connected to remote mesh
+                continue
+            remote_bdry_mesh = data['bdry_mesh']
+            from meshmode.discretization import Discretization
+            remote_bdry = Discretization(cl_ctx, remote_bdry_mesh, group_factory)
+            remote_adj_groups = data['adj']
+            remote_to_elem_faces = data['to_elem_faces']
+            remote_to_elem_indices = data['to_elem_indices']
+            # Connect local_mesh to remote_mesh
+            from meshmode.discretization.connection import make_partition_connection
+            connection = make_partition_connection(local_bdry_conns[i_remote_part],
+                                                   i_local_part,
+                                                   remote_bdry,
+                                                   remote_adj_groups,
+                                                   remote_to_elem_faces,
+                                                   remote_to_elem_indices)
+            connections.append(connection)
+
         return None
 
     def map_opposite_interior_face_swap(self, op, field_expr):
-- 
GitLab


From 6770e0b33fbfcf70b90c9ac8642a22131a4caeb1 Mon Sep 17 00:00:00 2001
From: ellis <eshoag2@illinois.edu>
Date: Sun, 24 Sep 2017 22:16:20 -0500
Subject: [PATCH 04/83] working

---
 grudge/execution.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index aa6ad232..9285cecc 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -283,7 +283,6 @@ class ExecutionMapper(mappers.Evaluator,
         raise NotImplementedError("map_opposite_partition_face_swap")
 
         # TODO: Fetch these variables
-        local_mesh = None
         vol_discr = None
         group_factory = None
         cl_ctx = None
@@ -324,6 +323,7 @@ class ExecutionMapper(mappers.Evaluator,
 
             # Gather information to send to other ranks
             local_bdry = local_bdry_conns[i_remote_part].to_discr
+            local_mesh = local_bdry_conns[i_remote_part].from_discr.mesh
             local_adj_groups = [local_mesh.facial_adjacency_groups[i][None]
                                 for i in range(len(local_mesh.groups))]
             local_batches = [local_bdry_conns[i_remote_part].groups[i].batches
-- 
GitLab


From 8cd87de48711d8f192bcff7d378864b052ec29ca Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Fri, 13 Oct 2017 18:46:37 -0500
Subject: [PATCH 05/83] working

---
 grudge/execution.py          | 116 +----------------------------------
 grudge/symbolic/operators.py |  19 ++++++
 2 files changed, 21 insertions(+), 114 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index 9285cecc..ec0d6c6a 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -279,120 +279,8 @@ class ExecutionMapper(mappers.Evaluator,
 
         return conn(self.queue, self.rec(field_expr)).with_queue(self.queue)
 
-    def map_opposite_partition_face_swap(self, op, field_expr):
-        raise NotImplementedError("map_opposite_partition_face_swap")
-
-        # TODO: Fetch these variables
-        vol_discr = None
-        group_factory = None
-        cl_ctx = None
-        TAG_SEND_MESH = 1  # noqa
-
-        from mpi4py import MPI
-        comm = MPI.COMM_WORLD
-        # FIXME: Assumes rank 0 is a 'central hub' and
-        #           i_part = rank - 1 for all other ranks
-        rank = comm.Get_rank()
-        num_parts = comm.Get_size() - 1
-
-        i_local_part = rank - 1
-        local_bdry_conns = {}
-        for i_remote_part in range(num_parts):
-            if i_local_part == i_remote_part:
-                continue
-            # Mark faces within local_mesh that are connected to remote_mesh
-            from meshmode.discretization.connection import make_face_restriction
-            from meshmode.mesh import BTAG_PARTITION
-            # TODO: May not be necessary to compute every time
-            local_bdry_conns[i_remote_part] =\
-                    make_face_restriction(vol_discr, group_factory,
-                                          BTAG_PARTITION(i_remote_part))
-
-        # Send boundary data
-        send_reqs = []
-        for i_remote_part in range(num_parts):
-            if i_local_part == i_remote_part:
-                continue
-            bdry_nodes = local_bdry_conns[i_remote_part].to_discr.nodes()
-            if bdry_nodes.size == 0:
-                # local_mesh is not connected to remote_mesh; send None
-                send_reqs.append(comm.isend(None,
-                                            dest=i_remote_part+1,
-                                            tag=TAG_SEND_MESH))
-                continue
-
-            # Gather information to send to other ranks
-            local_bdry = local_bdry_conns[i_remote_part].to_discr
-            local_mesh = local_bdry_conns[i_remote_part].from_discr.mesh
-            local_adj_groups = [local_mesh.facial_adjacency_groups[i][None]
-                                for i in range(len(local_mesh.groups))]
-            local_batches = [local_bdry_conns[i_remote_part].groups[i].batches
-                                for i in range(len(local_mesh.groups))]
-            local_to_elem_faces = [[batch.to_element_face for batch in grp_batches]
-                                        for grp_batches in local_batches]
-            local_to_elem_indices = [[batch.to_element_indices.get(queue=self.queue)
-                                            for batch in grp_batches]
-                                        for grp_batches in local_batches]
-
-            local_data = {'bdry_mesh': local_bdry.mesh,
-                          'adj': local_adj_groups,
-                          'to_elem_faces': local_to_elem_faces,
-                          'to_elem_indices': local_to_elem_indices}
-            send_reqs.append(comm.isend(local_data,
-                                        dest=i_remote_part+1,
-                                        tag=TAG_SEND_MESH))
-
-        # Receive boundary data
-        remote_buf = {}
-        for i_remote_part in range(num_parts):
-            if i_local_part == i_remote_part:
-                continue
-            remote_rank = i_remote_part + 1
-            status = MPI.Status()
-            comm.probe(source=remote_rank, tag=TAG_SEND_MESH, status=status)
-            remote_buf[i_remote_part] = np.empty(status.count, dtype=bytes)
-
-        recv_reqs = {}
-        for i_remote_part, buf in remote_buf.items():
-            remote_rank = i_remote_part + 1
-            recv_reqs[i_remote_part] = comm.irecv(buf=buf,
-                                                  source=remote_rank,
-                                                  tag=TAG_SEND_MESH)
-
-        remote_data = {}
-        for i_remote_part, req in recv_reqs.items():
-            status = MPI.Status()
-            remote_data[i_remote_part] = req.wait(status=status)
-            # Free the buffer
-            remote_buf[i_remote_part] = None  # FIXME: Is this a good idea?
-            print('Rank {0}: Received rank {1} data ({2} bytes)'
-                            .format(rank, i_remote_part + 1, status.count))
-
-        for req in send_reqs:
-            req.wait()
-
-        connections = []
-        for i_remote_part, data in remote_data.items():
-            if data is None:
-                # Local mesh is not connected to remote mesh
-                continue
-            remote_bdry_mesh = data['bdry_mesh']
-            from meshmode.discretization import Discretization
-            remote_bdry = Discretization(cl_ctx, remote_bdry_mesh, group_factory)
-            remote_adj_groups = data['adj']
-            remote_to_elem_faces = data['to_elem_faces']
-            remote_to_elem_indices = data['to_elem_indices']
-            # Connect local_mesh to remote_mesh
-            from meshmode.discretization.connection import make_partition_connection
-            connection = make_partition_connection(local_bdry_conns[i_remote_part],
-                                                   i_local_part,
-                                                   remote_bdry,
-                                                   remote_adj_groups,
-                                                   remote_to_elem_faces,
-                                                   remote_to_elem_indices)
-            connections.append(connection)
-
-        return None
+    def map_opposite_rank_face_swap(self, op, field_expr):
+        raise NotImplementedError("map_opposite_rank_face_swap")
 
     def map_opposite_interior_face_swap(self, op, field_expr):
         dd = op.dd_in
diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index dc2e4fa1..70c43a10 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -379,6 +379,25 @@ class RefInverseMassOperator(RefMassOperatorBase):
 
 # {{{ boundary-related operators
 
+class OppositeRankFaceSwap(Operator):
+    def __init__(self, dd_in=None, dd_out=None):
+        sym = _sym()
+
+        if dd_in is None:
+            dd_in = sym.DOFDesc(BTAG_PARTITION, None)
+        if dd_out is None:
+            dd_out = dd_in
+
+        if dd_in.domain_tag is not BTAG_PARTITION:
+            raise ValueError("dd_in must be a rank boundary faces domain")
+        if dd_out != dd_in:
+            raise ValueError("dd_out and dd_in must be identical")
+
+        super(OppositeRankFaceSwap, self).__init__(dd_in, dd_out)
+
+    mapper_method = intern("map_opposite_rank_face_swap")
+
+
 class OppositeInteriorFaceSwap(Operator):
     def __init__(self, dd_in=None, dd_out=None):
         sym = _sym()
-- 
GitLab


From a4ca817cdd157160807a9f059c00d7ccb7515596 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Wed, 18 Oct 2017 00:08:36 -0500
Subject: [PATCH 06/83] working

---
 grudge/execution.py                 |  2 ++
 grudge/symbolic/mappers/__init__.py | 13 +++++++++++++
 grudge/symbolic/operators.py        |  1 +
 grudge/symbolic/primitives.py       |  4 +++-
 4 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index ec0d6c6a..d6725f2f 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -575,6 +575,8 @@ def process_sym_operator(sym_operator, post_bind_mapper=None,
     # dumper("before-derivative-join", sym_operator)
     # sym_operator = mappers.DerivativeJoiner()(sym_operator)
 
+    sys_operator = mappers.DistributedMapper()(sym_operator)
+
     dumper("process-finished", sym_operator)
 
     return sym_operator
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index 60b489ce..ae676628 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -331,6 +331,19 @@ class OperatorBinder(CSECachingMapperMixin, IdentityMapper):
 # }}}
 
 
+class DistributedMapper(CSECachingMapperMixin, IdentityMapper):
+
+    # FIXME: Not sure what this is
+    map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
+
+    def map_operator_binding(self, expr):
+        if isinstance(expr.op, op.OppositeInteriorFaceSwap):
+            return 42
+            # return expr.op + op.OppositeRankFaceSwap()(self.rec(expr.field))
+        else:
+            return IdentityMapper.map_operator_binding(self, expr)
+
+
 # {{{ operator specializer
 
 class OperatorSpecializer(CSECachingMapperMixin, IdentityMapper):
diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index 70c43a10..23ca69bc 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -383,6 +383,7 @@ class OppositeRankFaceSwap(Operator):
     def __init__(self, dd_in=None, dd_out=None):
         sym = _sym()
 
+        from meshmode.mesh import BTAG_PARTITION
         if dd_in is None:
             dd_in = sym.DOFDesc(BTAG_PARTITION, None)
         if dd_out is None:
diff --git a/grudge/symbolic/primitives.py b/grudge/symbolic/primitives.py
index 11d5ae8a..4fe9b132 100644
--- a/grudge/symbolic/primitives.py
+++ b/grudge/symbolic/primitives.py
@@ -28,7 +28,7 @@ from six.moves import range, intern
 
 import numpy as np
 import pymbolic.primitives
-from meshmode.mesh import BTAG_ALL, BTAG_REALLY_ALL, BTAG_NONE  # noqa
+from meshmode.mesh import BTAG_ALL, BTAG_REALLY_ALL, BTAG_NONE, BTAG_PARTITION  # noqa
 from meshmode.discretization.connection import (  # noqa
         FRESTR_ALL_FACES, FRESTR_INTERIOR_FACES)
 
@@ -183,6 +183,8 @@ class DOFDesc(object):
             pass
         elif domain_tag in [BTAG_ALL, BTAG_REALLY_ALL, BTAG_NONE]:
             pass
+        elif domain_tag is BTAG_PARTITION:
+            pass
         elif isinstance(domain_tag, DTAG_BOUNDARY):
             pass
         else:
-- 
GitLab


From 1b356e44a4d6bc52ab59746f91f3ec88e7bcb036 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Fri, 20 Oct 2017 11:46:28 -0500
Subject: [PATCH 07/83] working

---
 examples/wave/wave-min.py           | 6 +++---
 grudge/execution.py                 | 5 +++--
 grudge/symbolic/mappers/__init__.py | 8 ++++++--
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/examples/wave/wave-min.py b/examples/wave/wave-min.py
index 6e2baa1b..bd3424bc 100644
--- a/examples/wave/wave-min.py
+++ b/examples/wave/wave-min.py
@@ -35,7 +35,7 @@ def main(write_output=True, order=4):
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
 
-    dims = 3
+    dims = 2
     from meshmode.mesh.generation import generate_regular_rect_mesh
     mesh = generate_regular_rect_mesh(
             a=(-0.5,)*dims,
@@ -84,8 +84,8 @@ def main(write_output=True, order=4):
 
     # print(sym.pretty(op.sym_operator()))
     bound_op = bind(discr, op.sym_operator())
-    # print(bound_op)
-    # 1/0
+    print(bound_op)
+    1/0
 
     def rhs(t, w):
         return bound_op(queue, t=t, w=w)
diff --git a/grudge/execution.py b/grudge/execution.py
index d6725f2f..ceb413aa 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -559,6 +559,9 @@ def process_sym_operator(sym_operator, post_bind_mapper=None,
     dumper("before-global-to-reference", sym_operator)
     sym_operator = mappers.GlobalToReferenceMapper(mesh.ambient_dim)(sym_operator)
 
+    dumper("before-distributed", sym_operator)
+    sys_operator = mappers.DistributedMapper()(sym_operator)
+
     # Ordering restriction:
     #
     # - Must specialize quadrature operators before performing inverse mass
@@ -575,8 +578,6 @@ def process_sym_operator(sym_operator, post_bind_mapper=None,
     # dumper("before-derivative-join", sym_operator)
     # sym_operator = mappers.DerivativeJoiner()(sym_operator)
 
-    sys_operator = mappers.DistributedMapper()(sym_operator)
-
     dumper("process-finished", sym_operator)
 
     return sym_operator
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index ae676628..94bf9734 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -147,6 +147,7 @@ class OperatorReducerMixin(LocalOpReducerMixin, FluxOpReducerMixin):
     map_ref_mass = _map_op_base
     map_ref_inverse_mass = _map_op_base
 
+    map_opposite_rank_face_swap = _map_op_base
     map_opposite_interior_face_swap = _map_op_base
     map_face_mass_operator = _map_op_base
     map_ref_face_mass_operator = _map_op_base
@@ -195,6 +196,7 @@ class IdentityMapperMixin(LocalOpReducerMixin, FluxOpReducerMixin):
     map_ref_mass = map_elementwise_linear
     map_ref_inverse_mass = map_elementwise_linear
 
+    map_opposite_rank_face_swap = map_elementwise_linear
     map_opposite_interior_face_swap = map_elementwise_linear
     map_face_mass_operator = map_elementwise_linear
     map_ref_face_mass_operator = map_elementwise_linear
@@ -338,8 +340,7 @@ class DistributedMapper(CSECachingMapperMixin, IdentityMapper):
 
     def map_operator_binding(self, expr):
         if isinstance(expr.op, op.OppositeInteriorFaceSwap):
-            return 42
-            # return expr.op + op.OppositeRankFaceSwap()(self.rec(expr.field))
+            return op.OppositeRankFaceSwap()(self.rec(expr.field))
         else:
             return IdentityMapper.map_operator_binding(self, expr)
 
@@ -683,6 +684,9 @@ class StringifyMapper(pymbolic.mapper.stringifier.StringifyMapper):
     def map_ref_face_mass_operator(self, expr, enclosing_prec):
         return "RefFaceM" + self._format_op_dd(expr)
 
+    def map_opposite_rank_face_swap(self, expr, enclosing_prec):
+        return "RankSwap" + self._format_op_dd(expr)
+
     def map_opposite_interior_face_swap(self, expr, enclosing_prec):
         return "OppSwap" + self._format_op_dd(expr)
 
-- 
GitLab


From 6dae890941bbfcadc93be03e3004e479495b972b Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Fri, 20 Oct 2017 12:56:48 -0500
Subject: [PATCH 08/83] working

---
 grudge/execution.py                 |  3 ++-
 grudge/symbolic/mappers/__init__.py |  3 ++-
 grudge/symbolic/operators.py        | 14 ++++++++------
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index ceb413aa..402bb6b4 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -532,6 +532,7 @@ def process_sym_operator(sym_operator, post_bind_mapper=None,
         dumper("before-empty-flux-killer", sym_operator)
         sym_operator = mappers.EmptyFluxKiller(mesh)(sym_operator)
 
+
     dumper("before-cfold", sym_operator)
     sym_operator = mappers.CommutativeConstantFoldingMapper()(sym_operator)
 
@@ -560,7 +561,7 @@ def process_sym_operator(sym_operator, post_bind_mapper=None,
     sym_operator = mappers.GlobalToReferenceMapper(mesh.ambient_dim)(sym_operator)
 
     dumper("before-distributed", sym_operator)
-    sys_operator = mappers.DistributedMapper()(sym_operator)
+    sym_operator = mappers.DistributedMapper()(sym_operator)
 
     # Ordering restriction:
     #
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index 94bf9734..323cc4ae 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -340,7 +340,8 @@ class DistributedMapper(CSECachingMapperMixin, IdentityMapper):
 
     def map_operator_binding(self, expr):
         if isinstance(expr.op, op.OppositeInteriorFaceSwap):
-            return op.OppositeRankFaceSwap()(self.rec(expr.field))
+            return (op.OppositeInteriorFaceSwap()(self.rec(expr.field))
+                    + op.OppositeRankFaceSwap()(self.rec(expr.field)))
         else:
             return IdentityMapper.map_operator_binding(self, expr)
 
diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index 23ca69bc..05a23adf 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -383,16 +383,18 @@ class OppositeRankFaceSwap(Operator):
     def __init__(self, dd_in=None, dd_out=None):
         sym = _sym()
 
-        from meshmode.mesh import BTAG_PARTITION
+        # from meshmode.mesh import BTAG_PARTITION
         if dd_in is None:
-            dd_in = sym.DOFDesc(BTAG_PARTITION, None)
+            # FIXME: What is FRESTR_INTERIOR_FACES?
+            dd_in = sym.DOFDesc(sym.FRESTR_INTERIOR_FACES)
+            # dd_in = sym.DOFDesc(sym.BTAG_PARTITION)
         if dd_out is None:
             dd_out = dd_in
 
-        if dd_in.domain_tag is not BTAG_PARTITION:
-            raise ValueError("dd_in must be a rank boundary faces domain")
-        if dd_out != dd_in:
-            raise ValueError("dd_out and dd_in must be identical")
+        # if dd_in.domain_tag is not BTAG_PARTITION:
+        #     raise ValueError("dd_in must be a rank boundary faces domain")
+        # if dd_out != dd_in:
+        #     raise ValueError("dd_out and dd_in must be identical")
 
         super(OppositeRankFaceSwap, self).__init__(dd_in, dd_out)
 
-- 
GitLab


From 9fb8571907c620cc55aabfe80bed059ca290021a Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Sat, 21 Oct 2017 17:39:19 -0500
Subject: [PATCH 09/83] Fix whitespace

---
 grudge/execution.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index 402bb6b4..345005e4 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -532,7 +532,6 @@ def process_sym_operator(sym_operator, post_bind_mapper=None,
         dumper("before-empty-flux-killer", sym_operator)
         sym_operator = mappers.EmptyFluxKiller(mesh)(sym_operator)
 
-
     dumper("before-cfold", sym_operator)
     sym_operator = mappers.CommutativeConstantFoldingMapper()(sym_operator)
 
-- 
GitLab


From 05fd17e15e9e4c74f02c86b40394abb346b28cd3 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Sat, 21 Oct 2017 18:29:25 -0500
Subject: [PATCH 10/83] new tests for mpi communication

---
 grudge/symbolic/operators.py   |   6 +-
 test/test_mpi_communication.py | 104 +++++++++++++++++++++++++++++++++
 2 files changed, 107 insertions(+), 3 deletions(-)
 create mode 100644 test/test_mpi_communication.py

diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index 05a23adf..a1d0f210 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -387,14 +387,14 @@ class OppositeRankFaceSwap(Operator):
         if dd_in is None:
             # FIXME: What is FRESTR_INTERIOR_FACES?
             dd_in = sym.DOFDesc(sym.FRESTR_INTERIOR_FACES)
-            # dd_in = sym.DOFDesc(sym.BTAG_PARTITION)
+            # dd_in = sym.DOFDesc(BTAG_PARTITION)
         if dd_out is None:
             dd_out = dd_in
 
         # if dd_in.domain_tag is not BTAG_PARTITION:
         #     raise ValueError("dd_in must be a rank boundary faces domain")
-        # if dd_out != dd_in:
-        #     raise ValueError("dd_out and dd_in must be identical")
+        if dd_out != dd_in:
+            raise ValueError("dd_out and dd_in must be identical")
 
         super(OppositeRankFaceSwap, self).__init__(dd_in, dd_out)
 
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
new file mode 100644
index 00000000..a343beb0
--- /dev/null
+++ b/test/test_mpi_communication.py
@@ -0,0 +1,104 @@
+from __future__ import division, absolute_import, print_function
+
+__copyright__ = """
+Copyright (C) 2017 Ellis Hoag
+Copyright (C) 2017 Andreas Kloeckner
+"""
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import pytest
+import os
+
+import logging
+logger = logging.getLogger(__name__)
+
+import numpy as np
+
+
+def mpi_communication_entrypoint():
+    from meshmode.distributed import MPIMeshDistributor, MPIBoundaryCommunicator
+
+    from mpi4py import MPI
+    comm = MPI.COMM_WORLD
+    rank = comm.Get_rank()
+    num_parts = comm.Get_size()
+
+    mesh_dist = MPIMeshDistributor(comm)
+
+    if mesh_dist.is_mananger_rank():
+        np.random.seed(42)
+        from meshmode.mesh.generation import generate_warped_rect_mesh
+        meshes = [generate_warped_rect_mesh(3, order=4, n=4) for _ in range(2)]
+
+        from meshmode.mesh.processing import merge_disjoint_meshes
+        mesh = merge_disjoint_meshes(meshes)
+
+        part_per_element = np.random.randint(num_parts, size=mesh.nelements)
+
+        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)
+    else:
+        local_mesh = mesh_dist.receive_mesh_part()
+
+    from meshmode.discretization.poly_element\
+                    import PolynomialWarpAndBlendGroupFactory
+    group_factory = PolynomialWarpAndBlendGroupFactory(4)
+    import pyopencl as cl
+    cl_ctx = cl.create_some_context()
+    queue = cl.CommandQueue(cl_ctx)
+
+    from meshmode.discretization import Discretization
+    vol_discr = Discretization(cl_ctx, local_mesh, group_factory)
+
+    logger.debug("Rank %d exiting", rank)
+
+
+# {{{ MPI test pytest entrypoint
+
+@pytest.mark.mpi
+@pytest.mark.parametrize("num_partitions", [3, 4])
+def test_mpi_communication(num_partitions):
+    pytest.importorskip("mpi4py")
+
+    num_ranks = num_partitions
+    from subprocess import check_call
+    import sys
+    newenv = os.environ.copy()
+    newenv["RUN_WITHIN_MPI"] = "1"
+    check_call([
+        "mpiexec", "-np", str(num_ranks), "-x", "RUN_WITHIN_MPI",
+        sys.executable, __file__],
+        env=newenv)
+
+# }}}
+
+if __name__ == "__main__":
+    if "RUN_WITHIN_MPI" in os.environ:
+        mpi_communication_entrypoint()
+    else:
+        import sys
+        if len(sys.argv) > 1:
+            exec(sys.argv[1])
+        else:
+            from py.test.cmdline import main
+            main([__file__])
+
+# vim: fdm=marker
-- 
GitLab


From be284cfe75ead90ea5c84097495a6b2b63b138b6 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Sun, 22 Oct 2017 00:04:42 -0500
Subject: [PATCH 11/83] working

---
 test/test_mpi_communication.py | 107 +++++++++++++++++++++++++++------
 1 file changed, 89 insertions(+), 18 deletions(-)

diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index a343beb0..4d3026b3 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -27,15 +27,19 @@ THE SOFTWARE.
 
 import pytest
 import os
-
+import numpy as np
+import pyopencl as cl
 import logging
 logger = logging.getLogger(__name__)
 
-import numpy as np
+from grudge import sym, bind, Discretization
+from grudge.shortcuts import set_up_rk4
 
 
 def mpi_communication_entrypoint():
-    from meshmode.distributed import MPIMeshDistributor, MPIBoundaryCommunicator
+    cl_ctx = cl.create_some_context()
+    queue = cl.CommandQueue(cl_ctx)
+    from meshmode.distributed import MPIMeshDistributor
 
     from mpi4py import MPI
     comm = MPI.COMM_WORLD
@@ -44,30 +48,97 @@ def mpi_communication_entrypoint():
 
     mesh_dist = MPIMeshDistributor(comm)
 
-    if mesh_dist.is_mananger_rank():
-        np.random.seed(42)
-        from meshmode.mesh.generation import generate_warped_rect_mesh
-        meshes = [generate_warped_rect_mesh(3, order=4, n=4) for _ in range(2)]
+    dims = 2
+    dt = 0.04
+    order = 4
 
-        from meshmode.mesh.processing import merge_disjoint_meshes
-        mesh = merge_disjoint_meshes(meshes)
+    if mesh_dist.is_mananger_rank():
+        from meshmode.mesh.generation import generate_regular_rect_mesh
+        mesh = generate_regular_rect_mesh(a=(-0.5,)*dims,
+                                          b=(0.5,)*dims,
+                                          n=(16,)*dims)
 
-        part_per_element = np.random.randint(num_parts, size=mesh.nelements)
+        from pymetis import part_graph
+        _, p = part_graph(num_parts,
+                          xadj=mesh.nodal_adjacency.neighbors_starts.tolist(),
+                          adjncy=mesh.nodal_adjacency.neighbors.tolist())
+        part_per_element = np.array(p)
 
         local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)
     else:
         local_mesh = mesh_dist.receive_mesh_part()
 
-    from meshmode.discretization.poly_element\
-                    import PolynomialWarpAndBlendGroupFactory
-    group_factory = PolynomialWarpAndBlendGroupFactory(4)
-    import pyopencl as cl
-    cl_ctx = cl.create_some_context()
-    queue = cl.CommandQueue(cl_ctx)
+    vol_discr = Discretization(cl_ctx, local_mesh, order=order)
+
+    source_center = np.array([0.1, 0.22, 0.33])[:local_mesh.dim]
+    source_width = 0.05
+    source_omega = 3
+
+    sym_x = sym.nodes(local_mesh.dim)
+    sym_source_center_dist = sym_x - source_center
+    sym_t = sym.ScalarVariable("t")
+
+    from grudge.models.wave import StrongWaveOperator
+    from meshmode.mesh import BTAG_ALL, BTAG_NONE
+    op = StrongWaveOperator(-0.1, vol_discr.dim,
+            source_f=(
+                sym.sin(source_omega*sym_t)
+                * sym.exp(
+                    -np.dot(sym_source_center_dist, sym_source_center_dist)
+                    / source_width**2)),
+            dirichlet_tag=BTAG_NONE,
+            neumann_tag=BTAG_NONE,
+            radiation_tag=BTAG_ALL,
+            flux_type="upwind")
+
+    from pytools.obj_array import join_fields
+    fields = join_fields(vol_discr.zeros(queue),
+            [vol_discr.zeros(queue) for i in range(vol_discr.dim)])
+
+    # FIXME
+    #dt = op.estimate_rk4_timestep(vol_discr, fields=fields)
+
+    op.check_bc_coverage(local_mesh)
+
+    # print(sym.pretty(op.sym_operator()))
+    bound_op = bind(vol_discr, op.sym_operator())
+    # print(bound_op)
+    # 1/0
+
+    def rhs(t, w):
+        return bound_op(queue, t=t, w=w)
+
+    dt_stepper = set_up_rk4("w", dt, fields, rhs)
+
+    final_t = 10
+    nsteps = int(final_t/dt)
+    print("dt=%g nsteps=%d" % (dt, nsteps))
+
+    from grudge.shortcuts import make_visualizer
+    vis = make_visualizer(vol_discr, vis_order=order)
+
+    step = 0
+
+    norm = bind(vol_discr, sym.norm(2, sym.var("u")))
+
+    from time import time
+    t_last_step = time()
+
+    for event in dt_stepper.run(t_end=final_t):
+        if isinstance(event, dt_stepper.StateComputed):
+            assert event.component_id == "w"
 
-    from meshmode.discretization import Discretization
-    vol_discr = Discretization(cl_ctx, local_mesh, group_factory)
+            step += 1
 
+            print(step, event.t, norm(queue, u=event.state_component[0]),
+                    time()-t_last_step)
+            if step % 10 == 0:
+                vis.write_vtk_file("r%d-fld-%04d.vtu" % (rank, step),
+                        [
+                            ("u", event.state_component[0]),
+                            ("v", event.state_component[1:]),
+                            ])
+            t_last_step = time()
     logger.debug("Rank %d exiting", rank)
 
 
-- 
GitLab


From 618fdc46ba265f515909a8424e5ee2ad1b0cfeb3 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Tue, 24 Oct 2017 20:23:53 -0500
Subject: [PATCH 12/83] Working

---
 grudge/execution.py            | 12 +++++++++++-
 test/test_mpi_communication.py |  8 +++++---
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index 345005e4..e0dd2e93 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -280,7 +280,17 @@ class ExecutionMapper(mappers.Evaluator,
         return conn(self.queue, self.rec(field_expr)).with_queue(self.queue)
 
     def map_opposite_rank_face_swap(self, op, field_expr):
-        raise NotImplementedError("map_opposite_rank_face_swap")
+        # raise NotImplementedError("map_opposite_rank_face_swap")
+        from mpi4py import MPI
+        mpi_comm = MPI.COMM_WORLD
+        from meshmode.discretization.poly_element\
+                        import PolynomialWarpAndBlendGroupFactory
+        group_factory = PolynomialWarpAndBlendGroupFactory(4)
+        vol_discr = self.discr.boundary_discr(sym.BTAG_PARTITION, sym.QTAG_NONE)
+
+        from meshmode.distributed import MPIBoundaryCommunicator
+        bdry_comm = MPIBoundaryCommunicator(mpi_comm, self.queue, vol_discr, group_factory)
+        return bdry_comm(self.queue, self.rec(field_expr)).with_queue(self.queue)
 
     def map_opposite_interior_face_swap(self, op, field_expr):
         dd = op.dd_in
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 4d3026b3..b9e0fe04 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -98,7 +98,9 @@ def mpi_communication_entrypoint():
     # FIXME
     #dt = op.estimate_rk4_timestep(vol_discr, fields=fields)
 
-    op.check_bc_coverage(local_mesh)
+    # FIXME: Should meshmode consider BTAG_PARTITION to be a boundary?
+    #           Fails because: "found faces without boundary conditions"
+    # op.check_bc_coverage(local_mesh)
 
     # print(sym.pretty(op.sym_operator()))
     bound_op = bind(vol_discr, op.sym_operator())
@@ -133,7 +135,7 @@ def mpi_communication_entrypoint():
             print(step, event.t, norm(queue, u=event.state_component[0]),
                     time()-t_last_step)
             if step % 10 == 0:
-                vis.write_vtk_file("r%d-fld-%04d.vtu" % (rank, step),
+                vis.write_vtk_file("rank%d-fld-%04d.vtu" % (rank, step),
                         [
                             ("u", event.state_component[0]),
                             ("v", event.state_component[1:]),
@@ -145,7 +147,7 @@ def mpi_communication_entrypoint():
 # {{{ MPI test pytest entrypoint
 
 @pytest.mark.mpi
-@pytest.mark.parametrize("num_partitions", [3, 4])
+@pytest.mark.parametrize("num_partitions", [3])
 def test_mpi_communication(num_partitions):
     pytest.importorskip("mpi4py")
 
-- 
GitLab


From f93e0efcafecf551d51dc648008c52a2d1e08781 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Tue, 24 Oct 2017 20:59:03 -0500
Subject: [PATCH 13/83] todo list

---
 grudge/execution.py                 | 15 ++++++++++-----
 grudge/symbolic/mappers/__init__.py |  1 +
 grudge/symbolic/primitives.py       |  5 +++--
 test/test_mpi_communication.py      |  1 +
 4 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index e0dd2e93..b0f2dca6 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -280,17 +280,22 @@ class ExecutionMapper(mappers.Evaluator,
         return conn(self.queue, self.rec(field_expr)).with_queue(self.queue)
 
     def map_opposite_rank_face_swap(self, op, field_expr):
-        # raise NotImplementedError("map_opposite_rank_face_swap")
         from mpi4py import MPI
         mpi_comm = MPI.COMM_WORLD
+
+        # TODO: Where can I find the group factory?
         from meshmode.discretization.poly_element\
                         import PolynomialWarpAndBlendGroupFactory
-        group_factory = PolynomialWarpAndBlendGroupFactory(4)
-        vol_discr = self.discr.boundary_discr(sym.BTAG_PARTITION, sym.QTAG_NONE)
+        group_factory = PolynomialWarpAndBlendGroupFactory(self.discr.order)
 
         from meshmode.distributed import MPIBoundaryCommunicator
-        bdry_comm = MPIBoundaryCommunicator(mpi_comm, self.queue, vol_discr, group_factory)
-        return bdry_comm(self.queue, self.rec(field_expr)).with_queue(self.queue)
+        bdry_comm = MPIBoundaryCommunicator(mpi_comm, self.queue,
+                                            self.discr.volume_discr,
+                                            group_factory)
+
+        raise NotImplementedError("map_opposite_rank_face_swap")
+        # TODO: How do we use bdry_comm.remote_to_local_bdry_conns to communicate
+        #       data?
 
     def map_opposite_interior_face_swap(self, op, field_expr):
         dd = op.dd_in
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index 323cc4ae..a687482e 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -340,6 +340,7 @@ class DistributedMapper(CSECachingMapperMixin, IdentityMapper):
 
     def map_operator_binding(self, expr):
         if isinstance(expr.op, op.OppositeInteriorFaceSwap):
+            # FIXME: I'm sure this is not right....but it's a start
             return (op.OppositeInteriorFaceSwap()(self.rec(expr.field))
                     + op.OppositeRankFaceSwap()(self.rec(expr.field)))
         else:
diff --git a/grudge/symbolic/primitives.py b/grudge/symbolic/primitives.py
index 4fe9b132..44eb7893 100644
--- a/grudge/symbolic/primitives.py
+++ b/grudge/symbolic/primitives.py
@@ -183,8 +183,9 @@ class DOFDesc(object):
             pass
         elif domain_tag in [BTAG_ALL, BTAG_REALLY_ALL, BTAG_NONE]:
             pass
-        elif domain_tag is BTAG_PARTITION:
-            pass
+        # FIXME: I think I need to pass BTAG_PARTITION from OppositeRankFaceSwap
+        # elif domain_tag is BTAG_PARTITION:
+        #     pass
         elif isinstance(domain_tag, DTAG_BOUNDARY):
             pass
         else:
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index b9e0fe04..f338423b 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -163,6 +163,7 @@ def test_mpi_communication(num_partitions):
 
 # }}}
 
+
 if __name__ == "__main__":
     if "RUN_WITHIN_MPI" in os.environ:
         mpi_communication_entrypoint()
-- 
GitLab


From 8d7758222657b5c752c9670581ec6ec84ae8f009 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Wed, 25 Oct 2017 15:26:43 -0500
Subject: [PATCH 14/83] working

---
 grudge/symbolic/operators.py  | 8 +++-----
 grudge/symbolic/primitives.py | 3 ---
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index a1d0f210..c1a006e8 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -383,16 +383,14 @@ class OppositeRankFaceSwap(Operator):
     def __init__(self, dd_in=None, dd_out=None):
         sym = _sym()
 
-        # from meshmode.mesh import BTAG_PARTITION
         if dd_in is None:
-            # FIXME: What is FRESTR_INTERIOR_FACES?
+            # FIXME: Is this correct?
             dd_in = sym.DOFDesc(sym.FRESTR_INTERIOR_FACES)
-            # dd_in = sym.DOFDesc(BTAG_PARTITION)
         if dd_out is None:
             dd_out = dd_in
 
-        # if dd_in.domain_tag is not BTAG_PARTITION:
-        #     raise ValueError("dd_in must be a rank boundary faces domain")
+        if dd_in.domain_tag is not sym.FRESTR_INTERIOR_FACES:
+            raise ValueError("dd_in must be an interior faces domain")
         if dd_out != dd_in:
             raise ValueError("dd_out and dd_in must be identical")
 
diff --git a/grudge/symbolic/primitives.py b/grudge/symbolic/primitives.py
index 44eb7893..173a1a2d 100644
--- a/grudge/symbolic/primitives.py
+++ b/grudge/symbolic/primitives.py
@@ -183,9 +183,6 @@ class DOFDesc(object):
             pass
         elif domain_tag in [BTAG_ALL, BTAG_REALLY_ALL, BTAG_NONE]:
             pass
-        # FIXME: I think I need to pass BTAG_PARTITION from OppositeRankFaceSwap
-        # elif domain_tag is BTAG_PARTITION:
-        #     pass
         elif isinstance(domain_tag, DTAG_BOUNDARY):
             pass
         else:
-- 
GitLab


From 6aefe27727ad516dcb9dd9ffc1f3019c6141ebcf Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Sun, 5 Nov 2017 18:42:12 -0600
Subject: [PATCH 15/83] Notes for myself

---
 grudge/execution.py                 | 7 +++++--
 grudge/symbolic/mappers/__init__.py | 3 +--
 grudge/symbolic/operators.py        | 2 +-
 test/test_mpi_communication.py      | 4 ++--
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index b0f2dca6..c84db90c 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -294,8 +294,11 @@ class ExecutionMapper(mappers.Evaluator,
                                             group_factory)
 
         raise NotImplementedError("map_opposite_rank_face_swap")
-        # TODO: How do we use bdry_comm.remote_to_local_bdry_conns to communicate
-        #       data?
+
+        # FIXME: One rank face swap should swap data between the local rank
+        #           and exactly one remote rank
+        return bdry_comm.remote_to_local_bdry_conns[0](
+                    self.queue, self.rec(field_expr)).with_queue(self.queue)
 
     def map_opposite_interior_face_swap(self, op, field_expr):
         dd = op.dd_in
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index a687482e..c528c01d 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -335,12 +335,11 @@ class OperatorBinder(CSECachingMapperMixin, IdentityMapper):
 
 class DistributedMapper(CSECachingMapperMixin, IdentityMapper):
 
-    # FIXME: Not sure what this is
     map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
 
     def map_operator_binding(self, expr):
         if isinstance(expr.op, op.OppositeInteriorFaceSwap):
-            # FIXME: I'm sure this is not right....but it's a start
+            # FIXME: Add the sum of the rank face swaps over each rank
             return (op.OppositeInteriorFaceSwap()(self.rec(expr.field))
                     + op.OppositeRankFaceSwap()(self.rec(expr.field)))
         else:
diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index c1a006e8..188b37c8 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -384,7 +384,7 @@ class OppositeRankFaceSwap(Operator):
         sym = _sym()
 
         if dd_in is None:
-            # FIXME: Is this correct?
+            # FIXME: Use BTAG_PARTITION instead
             dd_in = sym.DOFDesc(sym.FRESTR_INTERIOR_FACES)
         if dd_out is None:
             dd_out = dd_in
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index f338423b..55c364b6 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -96,7 +96,7 @@ def mpi_communication_entrypoint():
             [vol_discr.zeros(queue) for i in range(vol_discr.dim)])
 
     # FIXME
-    #dt = op.estimate_rk4_timestep(vol_discr, fields=fields)
+    # dt = op.estimate_rk4_timestep(vol_discr, fields=fields)
 
     # FIXME: Should meshmode consider BTAG_PARTITION to be a boundary?
     #           Fails because: "found faces without boundary conditions"
@@ -147,7 +147,7 @@ def mpi_communication_entrypoint():
 # {{{ MPI test pytest entrypoint
 
 @pytest.mark.mpi
-@pytest.mark.parametrize("num_partitions", [3])
+@pytest.mark.parametrize("num_partitions", [2])
 def test_mpi_communication(num_partitions):
     pytest.importorskip("mpi4py")
 
-- 
GitLab


From f5826c0da96501d235e08d57324804cb0b6259e1 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Mon, 13 Nov 2017 13:34:06 -0600
Subject: [PATCH 16/83] Working

---
 grudge/execution.py                 |  8 ++++++--
 grudge/symbolic/mappers/__init__.py | 11 ++++++++---
 grudge/symbolic/operators.py        | 13 ++++++++-----
 grudge/symbolic/primitives.py       |  2 +-
 test/test_mpi_communication.py      |  2 +-
 5 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index c84db90c..95f9af0d 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -293,11 +293,15 @@ class ExecutionMapper(mappers.Evaluator,
                                             self.discr.volume_discr,
                                             group_factory)
 
-        raise NotImplementedError("map_opposite_rank_face_swap")
+        # raise NotImplementedError("map_opposite_rank_face_swap")
+
+        if op.remote_rank not in bdry_comm.connected_parts:
+            # Perhaps this should be detected earlier
+            return 0
 
         # FIXME: One rank face swap should swap data between the local rank
         #           and exactly one remote rank
-        return bdry_comm.remote_to_local_bdry_conns[0](
+        return bdry_comm.remote_to_local_bdry_conns[op.remote_rank](
                     self.queue, self.rec(field_expr)).with_queue(self.queue)
 
     def map_opposite_interior_face_swap(self, op, field_expr):
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index c528c01d..9daab6da 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -339,9 +339,14 @@ class DistributedMapper(CSECachingMapperMixin, IdentityMapper):
 
     def map_operator_binding(self, expr):
         if isinstance(expr.op, op.OppositeInteriorFaceSwap):
-            # FIXME: Add the sum of the rank face swaps over each rank
-            return (op.OppositeInteriorFaceSwap()(self.rec(expr.field))
-                    + op.OppositeRankFaceSwap()(self.rec(expr.field)))
+            result = op.OppositeInteriorFaceSwap()(self.rec(expr.field))
+            # FIXME: Maybe narrow this down
+            from mpi4py import MPI
+            num_ranks = MPI.COMM_WORLD.Get_size()
+            connected_ranks = range(num_ranks)
+            for remote_rank in connected_ranks:
+                result += op.OppositeRankFaceSwap(remote_rank)(self.rec(expr.field))
+            return result
         else:
             return IdentityMapper.map_operator_binding(self, expr)
 
diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index 188b37c8..f91a2206 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -380,22 +380,25 @@ class RefInverseMassOperator(RefMassOperatorBase):
 # {{{ boundary-related operators
 
 class OppositeRankFaceSwap(Operator):
-    def __init__(self, dd_in=None, dd_out=None):
+    def __init__(self, remote_rank, dd_in=None, dd_out=None):
         sym = _sym()
 
         if dd_in is None:
             # FIXME: Use BTAG_PARTITION instead
             dd_in = sym.DOFDesc(sym.FRESTR_INTERIOR_FACES)
+            # dd_in = sym.DOFDesc(sym.BTAG_PARTITION)
         if dd_out is None:
             dd_out = dd_in
 
-        if dd_in.domain_tag is not sym.FRESTR_INTERIOR_FACES:
-            raise ValueError("dd_in must be an interior faces domain")
-        if dd_out != dd_in:
-            raise ValueError("dd_out and dd_in must be identical")
+        # if dd_in.domain_tag is not sym.BTAG_PARTITION:
+        #     raise ValueError("dd_in must be an interior faces domain")
+        # if dd_out != dd_in:
+        #     raise ValueError("dd_out and dd_in must be identical")
 
         super(OppositeRankFaceSwap, self).__init__(dd_in, dd_out)
 
+        self.remote_rank = remote_rank
+
     mapper_method = intern("map_opposite_rank_face_swap")
 
 
diff --git a/grudge/symbolic/primitives.py b/grudge/symbolic/primitives.py
index 173a1a2d..5827805f 100644
--- a/grudge/symbolic/primitives.py
+++ b/grudge/symbolic/primitives.py
@@ -181,7 +181,7 @@ class DOFDesc(object):
             pass
         elif domain_tag is None:
             pass
-        elif domain_tag in [BTAG_ALL, BTAG_REALLY_ALL, BTAG_NONE]:
+        elif domain_tag in [BTAG_ALL, BTAG_REALLY_ALL, BTAG_NONE, BTAG_PARTITION]:
             pass
         elif isinstance(domain_tag, DTAG_BOUNDARY):
             pass
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 55c364b6..29aab0d9 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -112,7 +112,7 @@ def mpi_communication_entrypoint():
 
     dt_stepper = set_up_rk4("w", dt, fields, rhs)
 
-    final_t = 10
+    final_t = 1
     nsteps = int(final_t/dt)
     print("dt=%g nsteps=%d" % (dt, nsteps))
 
-- 
GitLab


From 3c69f46627f5ce7b4300f3c0f6088f762cbd5e78 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Sun, 19 Nov 2017 01:24:29 -0600
Subject: [PATCH 17/83] Working

---
 grudge/execution.py                 | 22 +++++++++-------------
 grudge/symbolic/mappers/__init__.py | 15 ++++++++-------
 grudge/symbolic/operators.py        | 17 +++++++----------
 3 files changed, 24 insertions(+), 30 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index 95f9af0d..50ee50a2 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -280,6 +280,7 @@ class ExecutionMapper(mappers.Evaluator,
         return conn(self.queue, self.rec(field_expr)).with_queue(self.queue)
 
     def map_opposite_rank_face_swap(self, op, field_expr):
+        # raise NotImplementedError("map_opposite_rank_face_swap")
         from mpi4py import MPI
         mpi_comm = MPI.COMM_WORLD
 
@@ -289,20 +290,15 @@ class ExecutionMapper(mappers.Evaluator,
         group_factory = PolynomialWarpAndBlendGroupFactory(self.discr.order)
 
         from meshmode.distributed import MPIBoundaryCommunicator
-        bdry_comm = MPIBoundaryCommunicator(mpi_comm, self.queue,
-                                            self.discr.volume_discr,
-                                            group_factory)
-
-        # raise NotImplementedError("map_opposite_rank_face_swap")
+        bdry_conn_future = MPIBoundaryCommunicator(mpi_comm, self.queue,
+                                                   self.discr.volume_discr,
+                                                   group_factory,
+                                                   op.i_remote_rank)
+        # TODO: How does this end up in execute_dynamic?
+        bdry_conn, _ = bdry_conn_future()
+        return bdry_conn(self.queue, self.rec(field_expr)).with_queue(self.queue)
 
-        if op.remote_rank not in bdry_comm.connected_parts:
-            # Perhaps this should be detected earlier
-            return 0
 
-        # FIXME: One rank face swap should swap data between the local rank
-        #           and exactly one remote rank
-        return bdry_comm.remote_to_local_bdry_conns[op.remote_rank](
-                    self.queue, self.rec(field_expr)).with_queue(self.queue)
 
     def map_opposite_interior_face_swap(self, op, field_expr):
         dd = op.dd_in
@@ -582,7 +578,7 @@ def process_sym_operator(sym_operator, post_bind_mapper=None,
     sym_operator = mappers.GlobalToReferenceMapper(mesh.ambient_dim)(sym_operator)
 
     dumper("before-distributed", sym_operator)
-    sym_operator = mappers.DistributedMapper()(sym_operator)
+    sym_operator = mappers.DistributedMapper(mesh)(sym_operator)
 
     # Ordering restriction:
     #
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index 9daab6da..933d1fbf 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -337,15 +337,16 @@ class DistributedMapper(CSECachingMapperMixin, IdentityMapper):
 
     map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
 
+    def __init__(self, mesh):
+        from meshmode.distributed import get_connected_partitions
+        self.connected_parts = get_connected_partitions(mesh)
+
     def map_operator_binding(self, expr):
         if isinstance(expr.op, op.OppositeInteriorFaceSwap):
-            result = op.OppositeInteriorFaceSwap()(self.rec(expr.field))
-            # FIXME: Maybe narrow this down
-            from mpi4py import MPI
-            num_ranks = MPI.COMM_WORLD.Get_size()
-            connected_ranks = range(num_ranks)
-            for remote_rank in connected_ranks:
-                result += op.OppositeRankFaceSwap(remote_rank)(self.rec(expr.field))
+            field = self.rec(expr.field)
+            result = op.OppositeInteriorFaceSwap()(field)
+            for i_remote_rank in self.connected_parts:
+                result += op.OppositeRankFaceSwap(i_remote_rank)(field)
             return result
         else:
             return IdentityMapper.map_operator_binding(self, expr)
diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index f91a2206..6570f27f 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -380,24 +380,21 @@ class RefInverseMassOperator(RefMassOperatorBase):
 # {{{ boundary-related operators
 
 class OppositeRankFaceSwap(Operator):
-    def __init__(self, remote_rank, dd_in=None, dd_out=None):
+    def __init__(self, i_remote_rank, dd_in=None, dd_out=None):
         sym = _sym()
 
         if dd_in is None:
-            # FIXME: Use BTAG_PARTITION instead
-            dd_in = sym.DOFDesc(sym.FRESTR_INTERIOR_FACES)
-            # dd_in = sym.DOFDesc(sym.BTAG_PARTITION)
+            dd_in = sym.DOFDesc(sym.BTAG_PARTITION)  # TODO: Throws an error later
         if dd_out is None:
             dd_out = dd_in
 
-        # if dd_in.domain_tag is not sym.BTAG_PARTITION:
-        #     raise ValueError("dd_in must be an interior faces domain")
-        # if dd_out != dd_in:
-        #     raise ValueError("dd_out and dd_in must be identical")
+        if dd_in.domain_tag is not sym.BTAG_PARTITION:
+            raise ValueError("dd_in must be a rank boundary faces domain")
+        if dd_out != dd_in:
+            raise ValueError("dd_out and dd_in must be identical")
 
         super(OppositeRankFaceSwap, self).__init__(dd_in, dd_out)
-
-        self.remote_rank = remote_rank
+        self.i_remote_rank = i_remote_rank
 
     mapper_method = intern("map_opposite_rank_face_swap")
 
-- 
GitLab


From 9e6870b280ffb880663dbcc0b2252b7837708fbb Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Mon, 20 Nov 2017 10:38:59 -0600
Subject: [PATCH 18/83] Working

---
 grudge/execution.py           | 4 +---
 grudge/symbolic/operators.py  | 9 +++++----
 grudge/symbolic/primitives.py | 2 +-
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index 50ee50a2..be684c25 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -294,12 +294,11 @@ class ExecutionMapper(mappers.Evaluator,
                                                    self.discr.volume_discr,
                                                    group_factory,
                                                    op.i_remote_rank)
-        # TODO: How does this end up in execute_dynamic?
+        # TODO: Need to tell the future what boundary data to transfer
         bdry_conn, _ = bdry_conn_future()
         return bdry_conn(self.queue, self.rec(field_expr)).with_queue(self.queue)
 
 
-
     def map_opposite_interior_face_swap(self, op, field_expr):
         dd = op.dd_in
 
@@ -307,7 +306,6 @@ class ExecutionMapper(mappers.Evaluator,
         if qtag is None:
             # FIXME: Remove once proper quadrature support arrives
             qtag = sym.QTAG_NONE
-
         return self.discr.opposite_face_connection(qtag)(
                 self.queue, self.rec(field_expr)).with_queue(self.queue)
 
diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index 6570f27f..8bde018d 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -384,14 +384,15 @@ class OppositeRankFaceSwap(Operator):
         sym = _sym()
 
         if dd_in is None:
+            # dd_in = sym.DOFDesc(sym.FRESTR_INTERIOR_FACES)
             dd_in = sym.DOFDesc(sym.BTAG_PARTITION)  # TODO: Throws an error later
         if dd_out is None:
             dd_out = dd_in
 
-        if dd_in.domain_tag is not sym.BTAG_PARTITION:
-            raise ValueError("dd_in must be a rank boundary faces domain")
-        if dd_out != dd_in:
-            raise ValueError("dd_out and dd_in must be identical")
+        # if dd_in.domain_tag is not sym.BTAG_PARTITION:
+        #     raise ValueError("dd_in must be a rank boundary faces domain")
+        # if dd_out != dd_in:
+        #     raise ValueError("dd_out and dd_in must be identical")
 
         super(OppositeRankFaceSwap, self).__init__(dd_in, dd_out)
         self.i_remote_rank = i_remote_rank
diff --git a/grudge/symbolic/primitives.py b/grudge/symbolic/primitives.py
index 5827805f..a6593e99 100644
--- a/grudge/symbolic/primitives.py
+++ b/grudge/symbolic/primitives.py
@@ -209,7 +209,7 @@ class DOFDesc(object):
     def is_boundary(self):
         return (
                 self.domain_tag in [
-                    BTAG_ALL, BTAG_NONE, BTAG_REALLY_ALL]
+                    BTAG_ALL, BTAG_NONE, BTAG_REALLY_ALL, BTAG_PARTITION]
                 or isinstance(self.domain_tag, DTAG_BOUNDARY))
 
     def is_trace(self):
-- 
GitLab


From fd3d60bd2f65014b72e566f97b8e40851b820752 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Mon, 20 Nov 2017 11:06:22 -0600
Subject: [PATCH 19/83] Whitespace fix

---
 grudge/execution.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index be684c25..c48cc392 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -298,7 +298,6 @@ class ExecutionMapper(mappers.Evaluator,
         bdry_conn, _ = bdry_conn_future()
         return bdry_conn(self.queue, self.rec(field_expr)).with_queue(self.queue)
 
-
     def map_opposite_interior_face_swap(self, op, field_expr):
         dd = op.dd_in
 
-- 
GitLab


From 850dc6fe262250cbb718872d4e9418068734fb6b Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Mon, 20 Nov 2017 14:05:40 -0600
Subject: [PATCH 20/83] Fix binding bug

---
 grudge/symbolic/dofdesc_inference.py | 2 +-
 grudge/symbolic/mappers/__init__.py  | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/grudge/symbolic/dofdesc_inference.py b/grudge/symbolic/dofdesc_inference.py
index 9cb54357..832c6a03 100644
--- a/grudge/symbolic/dofdesc_inference.py
+++ b/grudge/symbolic/dofdesc_inference.py
@@ -171,7 +171,7 @@ class DOFDescInferenceMapper(RecursiveMapper, CSECachingMapperMixin):
                         " in '%s'"
                         % (
                             type(expr).__name__,
-                            op_dd, expr.dd_in,
+                            op_dd, expr.op.dd_in,
                             str(expr)))
 
         return operator.dd_out
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index f9cde6aa..a6298f31 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -592,6 +592,7 @@ class StringifyMapper(pymbolic.mapper.stringifier.StringifyMapper):
             else:
                 return repr(s)
 
+        from meshmode.mesh import BTAG_PARTITION
         from meshmode.discretization.connection import (
                 FACE_RESTR_ALL, FACE_RESTR_INTERIOR)
         if dd.domain_tag is None:
@@ -604,6 +605,8 @@ class StringifyMapper(pymbolic.mapper.stringifier.StringifyMapper):
             result = "all_faces"
         elif dd.domain_tag is FACE_RESTR_INTERIOR:
             result = "int_faces"
+        elif dd.domain_tag is FRESTR_INTERIOR_FACES:
+            result = "int_faces"
         else:
             result = fmt(dd.domain_tag)
 
-- 
GitLab


From 5c07792cc83f3b7842b6dc22762b8e7bdb36a9f5 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Wed, 22 Nov 2017 17:16:28 -0600
Subject: [PATCH 21/83] Fix error string typo

---
 grudge/symbolic/dofdesc_inference.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/grudge/symbolic/dofdesc_inference.py b/grudge/symbolic/dofdesc_inference.py
index 832c6a03..7e1de605 100644
--- a/grudge/symbolic/dofdesc_inference.py
+++ b/grudge/symbolic/dofdesc_inference.py
@@ -48,11 +48,11 @@ def unify_dofdescs(dd_a, dd_b, expr=None):
         elif dd_b.domain_tag == DTAG_SCALAR:
             return dd_a
         else:
-            raise ValueError("mismatched domain tags" + loc_str)
+            raise ValueError("mismatched domain tags " + loc_str)
 
     # domain tags match
     if dd_a.quadrature_tag != dd_b.quadrature_tag:
-        raise ValueError("mismatched quadrature tags" + loc_str)
+        raise ValueError("mismatched quadrature tags " + loc_str)
 
     return dd_a
 
-- 
GitLab


From 918d0184b2013ec298e76b81b7690b6b73dd8605 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Wed, 22 Nov 2017 17:25:09 -0600
Subject: [PATCH 22/83] Working

---
 grudge/execution.py                 |  3 ++-
 grudge/symbolic/mappers/__init__.py | 33 +++++++++++++++++++++++++----
 grudge/symbolic/operators.py        | 11 +++++-----
 grudge/symbolic/primitives.py       |  7 ++++--
 4 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index f33e2a1e..e5e01a60 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -284,10 +284,11 @@ class ExecutionMapper(mappers.Evaluator,
         from mpi4py import MPI
         mpi_comm = MPI.COMM_WORLD
 
-        # TODO: Where can I find the group factory?
         from meshmode.discretization.poly_element\
                         import PolynomialWarpAndBlendGroupFactory
         group_factory = PolynomialWarpAndBlendGroupFactory(self.discr.order)
+        # group_factory = self.discr.volume_discr.\
+        #                     get_group_factory_for_quadrature_tag(sym.QTAG_NONE)
 
         from meshmode.distributed import MPIBoundaryCommunicator
         bdry_conn_future = MPIBoundaryCommunicator(mpi_comm, self.queue,
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index a6298f31..866cd4da 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -343,15 +343,40 @@ class DistributedMapper(CSECachingMapperMixin, IdentityMapper):
 
     def map_operator_binding(self, expr):
         if isinstance(expr.op, op.OppositeInteriorFaceSwap):
-            field = self.rec(expr.field)
-            result = op.OppositeInteriorFaceSwap()(field)
+            result = op.OppositeInteriorFaceSwap()(self.rec(expr.field))
             for i_remote_rank in self.connected_parts:
+                field = InterpolateToRankBoundariesMapper(i_remote_rank)(expr.field)
+                # FIXME: OppositeRankFaceSwap returns BTAG_PARTITION data
+                #       and we cannot add that to our FACE_RESTR_INTERIOR data
                 result += op.OppositeRankFaceSwap(i_remote_rank)(field)
+                # r = op.OppositeRankFaceSwap(i_remote_rank)(field)
+                # from meshmode.mesh import BTAG_PARTITION
+                # dd_in = BTAG_PARTITION(i_remote_rank)
+                # dd_out = result.op.dd_out
+                # print(dd_in, dd_out)
+                # result += op.InterpolationOperator(dd_in=dd_in, dd_out=dd_out)(r)
             return result
         else:
             return IdentityMapper.map_operator_binding(self, expr)
 
 
+class InterpolateToRankBoundariesMapper(CSECachingMapperMixin, IdentityMapper):
+
+    map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
+
+    def __init__(self, i_remote_rank):
+        from meshmode.mesh import BTAG_PARTITION
+        self.dd_out = BTAG_PARTITION(i_remote_rank)
+
+    def map_operator_binding(self, expr):
+        if isinstance(expr.op, op.InterpolationOperator):
+            dd_in = expr.op.dd_in
+            dd_out = self.dd_out
+            return op.InterpolationOperator(dd_in=dd_in, dd_out=dd_out)(expr.field)
+        else:
+            return IdentityMapper.map_operator_binding(self, expr)
+
+
 # {{{ operator specializer
 
 class OperatorSpecializer(CSECachingMapperMixin, IdentityMapper):
@@ -605,8 +630,8 @@ class StringifyMapper(pymbolic.mapper.stringifier.StringifyMapper):
             result = "all_faces"
         elif dd.domain_tag is FACE_RESTR_INTERIOR:
             result = "int_faces"
-        elif dd.domain_tag is FRESTR_INTERIOR_FACES:
-            result = "int_faces"
+        elif isinstance(dd.domain_tag, BTAG_PARTITION):
+            result = "rank%d_faces" % dd.domain_tag.part_nr
         else:
             result = fmt(dd.domain_tag)
 
diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index efabdaa3..d073e06b 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -384,15 +384,14 @@ class OppositeRankFaceSwap(Operator):
         sym = _sym()
 
         if dd_in is None:
-            # dd_in = sym.DOFDesc(sym.FRESTR_INTERIOR_FACES)
-            dd_in = sym.DOFDesc(sym.BTAG_PARTITION)  # TODO: Throws an error later
+            dd_in = sym.DOFDesc(sym.BTAG_PARTITION(i_remote_rank))
         if dd_out is None:
             dd_out = dd_in
 
-        # if dd_in.domain_tag is not sym.BTAG_PARTITION:
-        #     raise ValueError("dd_in must be a rank boundary faces domain")
-        # if dd_out != dd_in:
-        #     raise ValueError("dd_out and dd_in must be identical")
+        if not isinstance(dd_in.domain_tag, sym.BTAG_PARTITION):
+            raise ValueError("dd_in must be a rank boundary faces domain")
+        if dd_out != dd_in:
+            raise ValueError("dd_out and dd_in must be identical")
 
         super(OppositeRankFaceSwap, self).__init__(dd_in, dd_out)
         self.i_remote_rank = i_remote_rank
diff --git a/grudge/symbolic/primitives.py b/grudge/symbolic/primitives.py
index 5b21b3d0..81a2ef9e 100644
--- a/grudge/symbolic/primitives.py
+++ b/grudge/symbolic/primitives.py
@@ -181,7 +181,9 @@ class DOFDesc(object):
             pass
         elif domain_tag is None:
             pass
-        elif domain_tag in [BTAG_ALL, BTAG_REALLY_ALL, BTAG_NONE, BTAG_PARTITION]:
+        elif isinstance(domain_tag, BTAG_PARTITION):
+            pass
+        elif domain_tag in [BTAG_ALL, BTAG_REALLY_ALL, BTAG_NONE]:
             pass
         elif isinstance(domain_tag, DTAG_BOUNDARY):
             pass
@@ -209,7 +211,8 @@ class DOFDesc(object):
     def is_boundary(self):
         return (
                 self.domain_tag in [
-                    BTAG_ALL, BTAG_NONE, BTAG_REALLY_ALL, BTAG_PARTITION]
+                    BTAG_ALL, BTAG_NONE, BTAG_REALLY_ALL]
+                or isinstance(self.domain_tag, BTAG_PARTITION)
                 or isinstance(self.domain_tag, DTAG_BOUNDARY))
 
     def is_trace(self):
-- 
GitLab


From 68d8d97d55e70287f9efa84f67d9de9ad3a18e49 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Sat, 9 Dec 2017 12:54:25 -0600
Subject: [PATCH 23/83] Add distributed mapper

---
 grudge/symbolic/mappers/__init__.py | 73 +++++++++++++++++++++--------
 1 file changed, 53 insertions(+), 20 deletions(-)

diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index 866cd4da..e196eef8 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -333,6 +333,8 @@ class OperatorBinder(CSECachingMapperMixin, IdentityMapper):
 # }}}
 
 
+# {{{ distributed mappers
+
 class DistributedMapper(CSECachingMapperMixin, IdentityMapper):
 
     map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
@@ -342,40 +344,71 @@ class DistributedMapper(CSECachingMapperMixin, IdentityMapper):
         self.connected_parts = get_connected_partitions(mesh)
 
     def map_operator_binding(self, expr):
-        if isinstance(expr.op, op.OppositeInteriorFaceSwap):
-            result = op.OppositeInteriorFaceSwap()(self.rec(expr.field))
+        if isinstance(expr.op, op.RefFaceMassOperator):
+            return expr.op(RankCommunicationMapper(self.connected_parts)(expr.field))
+        else:
+            return IdentityMapper.map_operator_binding(self, expr)
+
+
+class RankCommunicationMapper(CSECachingMapperMixin, IdentityMapper):
+
+    map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
+
+    def __init__(self, connected_parts):
+        self.connected_parts = connected_parts
+
+    def map_operator_binding(self, expr):
+        from meshmode.mesh import BTAG_PARTITION
+        from meshmode.discretization.connection import (FACE_RESTR_ALL,
+                                                        FACE_RESTR_INTERIOR)
+        if (isinstance(expr.op, op.InterpolationOperator)
+                and expr.op.dd_in.domain_tag is FACE_RESTR_INTERIOR
+                and expr.op.dd_out.domain_tag is FACE_RESTR_ALL):
+            distributed_work = 0
             for i_remote_rank in self.connected_parts:
-                field = InterpolateToRankBoundariesMapper(i_remote_rank)(expr.field)
-                # FIXME: OppositeRankFaceSwap returns BTAG_PARTITION data
-                #       and we cannot add that to our FACE_RESTR_INTERIOR data
-                result += op.OppositeRankFaceSwap(i_remote_rank)(field)
-                # r = op.OppositeRankFaceSwap(i_remote_rank)(field)
-                # from meshmode.mesh import BTAG_PARTITION
-                # dd_in = BTAG_PARTITION(i_remote_rank)
-                # dd_out = result.op.dd_out
-                # print(dd_in, dd_out)
-                # result += op.InterpolationOperator(dd_in=dd_in, dd_out=dd_out)(r)
-            return result
+                f1 = OppSwapToRankSwapMapper(i_remote_rank)(expr.field)
+                btag_rank = BTAG_PARTITION(i_remote_rank)
+                distributed_work += op.InterpolationOperator(dd_in=btag_rank,
+                                                             dd_out=expr.op.dd_out)(f1)
+            return expr + distributed_work
+
         else:
             return IdentityMapper.map_operator_binding(self, expr)
 
 
-class InterpolateToRankBoundariesMapper(CSECachingMapperMixin, IdentityMapper):
+class OppSwapToRankSwapMapper(CSECachingMapperMixin, IdentityMapper):
 
     map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
 
     def __init__(self, i_remote_rank):
-        from meshmode.mesh import BTAG_PARTITION
-        self.dd_out = BTAG_PARTITION(i_remote_rank)
+        self.i_remote_rank = i_remote_rank
 
     def map_operator_binding(self, expr):
-        if isinstance(expr.op, op.InterpolationOperator):
-            dd_in = expr.op.dd_in
-            dd_out = self.dd_out
-            return op.InterpolationOperator(dd_in=dd_in, dd_out=dd_out)(expr.field)
+        from meshmode.discretization.connection import (FACE_RESTR_ALL,
+                                                        FACE_RESTR_INTERIOR)
+        from meshmode.mesh import BTAG_PARTITION
+        from grudge.symbolic.primitives import NodeCoordinateComponent
+        btag_rank = BTAG_PARTITION(self.i_remote_rank)
+        if isinstance(expr.op, op.OppositeInteriorFaceSwap):
+            return op.OppositeRankFaceSwap(self.i_remote_rank)(self.rec(expr.field))
+        elif (isinstance(expr.op, op.InterpolationOperator)
+                    and expr.op.dd_out.domain_tag is FACE_RESTR_INTERIOR):
+            return op.InterpolationOperator(dd_in=expr.op.dd_in,
+                                            dd_out=btag_rank)(self.rec(expr.field))
+        elif (isinstance(expr.op, op.RefDiffOperator)
+                    and expr.op.dd_in.domain_tag is FACE_RESTR_INTERIOR
+                    and expr.op.dd_out.domain_tag is FACE_RESTR_INTERIOR):
+            dd = sym.as_dofdesc(btag_rank)
+            f = NodeCoordinateComponent(expr.field.axis, dd=dd)
+            return op.RefDiffOperator(expr.op.rst_axis,
+                                      dd_in=dd,
+                                      dd_out=dd)(f)
         else:
+            print(type(expr.op))
             return IdentityMapper.map_operator_binding(self, expr)
 
+# }}}
+
 
 # {{{ operator specializer
 
-- 
GitLab


From cdba51940401c165f408dc617e0666b2db81e415 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Sat, 9 Dec 2017 14:27:55 -0600
Subject: [PATCH 24/83] Fix formatting

---
 grudge/symbolic/mappers/__init__.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index e196eef8..e164e5b2 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -366,10 +366,10 @@ class RankCommunicationMapper(CSECachingMapperMixin, IdentityMapper):
                 and expr.op.dd_out.domain_tag is FACE_RESTR_ALL):
             distributed_work = 0
             for i_remote_rank in self.connected_parts:
-                f1 = OppSwapToRankSwapMapper(i_remote_rank)(expr.field)
+                mapped_field = OppSwapToRankSwapMapper(i_remote_rank)(expr.field)
                 btag_rank = BTAG_PARTITION(i_remote_rank)
                 distributed_work += op.InterpolationOperator(dd_in=btag_rank,
-                                                             dd_out=expr.op.dd_out)(f1)
+                                             dd_out=expr.op.dd_out)(mapped_field)
             return expr + distributed_work
 
         else:
@@ -384,8 +384,7 @@ class OppSwapToRankSwapMapper(CSECachingMapperMixin, IdentityMapper):
         self.i_remote_rank = i_remote_rank
 
     def map_operator_binding(self, expr):
-        from meshmode.discretization.connection import (FACE_RESTR_ALL,
-                                                        FACE_RESTR_INTERIOR)
+        from meshmode.discretization.connection import FACE_RESTR_INTERIOR
         from meshmode.mesh import BTAG_PARTITION
         from grudge.symbolic.primitives import NodeCoordinateComponent
         btag_rank = BTAG_PARTITION(self.i_remote_rank)
-- 
GitLab


From 95d3027fd30964e46b626a1f8f0b2e2ef0036db5 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Mon, 11 Dec 2017 14:05:39 -0600
Subject: [PATCH 25/83] Rename var

---
 grudge/execution.py                 | 2 +-
 grudge/symbolic/mappers/__init__.py | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index e5e01a60..282d75f9 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -280,13 +280,13 @@ class ExecutionMapper(mappers.Evaluator,
         return conn(self.queue, self.rec(field_expr)).with_queue(self.queue)
 
     def map_opposite_rank_face_swap(self, op, field_expr):
-        # raise NotImplementedError("map_opposite_rank_face_swap")
         from mpi4py import MPI
         mpi_comm = MPI.COMM_WORLD
 
         from meshmode.discretization.poly_element\
                         import PolynomialWarpAndBlendGroupFactory
         group_factory = PolynomialWarpAndBlendGroupFactory(self.discr.order)
+        # TODO
         # group_factory = self.discr.volume_discr.\
         #                     get_group_factory_for_quadrature_tag(sym.QTAG_NONE)
 
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index e164e5b2..f405a70b 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -398,12 +398,11 @@ class OppSwapToRankSwapMapper(CSECachingMapperMixin, IdentityMapper):
                     and expr.op.dd_in.domain_tag is FACE_RESTR_INTERIOR
                     and expr.op.dd_out.domain_tag is FACE_RESTR_INTERIOR):
             dd = sym.as_dofdesc(btag_rank)
-            f = NodeCoordinateComponent(expr.field.axis, dd=dd)
+            rank_faces = NodeCoordinateComponent(expr.field.axis, dd=dd)
             return op.RefDiffOperator(expr.op.rst_axis,
                                       dd_in=dd,
-                                      dd_out=dd)(f)
+                                      dd_out=dd)(rank_faces)
         else:
-            print(type(expr.op))
             return IdentityMapper.map_operator_binding(self, expr)
 
 # }}}
-- 
GitLab


From 0cb5c11ea8595b44c355543775af7b5b87b71734 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Mon, 11 Dec 2017 16:03:45 -0600
Subject: [PATCH 26/83] Working

---
 grudge/execution.py                 | 6 +++++-
 grudge/symbolic/mappers/__init__.py | 5 ++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index 282d75f9..e562cec2 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -576,7 +576,11 @@ def process_sym_operator(sym_operator, post_bind_mapper=None,
     sym_operator = mappers.GlobalToReferenceMapper(mesh.ambient_dim)(sym_operator)
 
     dumper("before-distributed", sym_operator)
-    sym_operator = mappers.DistributedMapper(mesh)(sym_operator)
+    from meshmode.distributed import get_connected_partitions
+    connected_parts = get_connected_partitions(mesh)
+    sym_operator = mappers.DistributedMapper(connected_parts)(sym_operator)
+    # print(sym.pretty(sym_operator))
+    # 1/0
 
     # Ordering restriction:
     #
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index f405a70b..501b0ee5 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -339,9 +339,8 @@ class DistributedMapper(CSECachingMapperMixin, IdentityMapper):
 
     map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
 
-    def __init__(self, mesh):
-        from meshmode.distributed import get_connected_partitions
-        self.connected_parts = get_connected_partitions(mesh)
+    def __init__(self, connected_parts):
+        self.connected_parts = connected_parts
 
     def map_operator_binding(self, expr):
         if isinstance(expr.op, op.RefFaceMassOperator):
-- 
GitLab


From 15cf207e1469d01339e773a12db8d773d411084b Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Tue, 9 Jan 2018 12:39:25 -0600
Subject: [PATCH 27/83] Clean up code

---
 grudge/execution.py                 | 11 ++----
 grudge/symbolic/mappers/__init__.py | 57 +++++++++++++++++------------
 grudge/symbolic/operators.py        | 14 ++++---
 grudge/symbolic/primitives.py       |  1 +
 4 files changed, 45 insertions(+), 38 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index e562cec2..6df3029f 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -285,16 +285,13 @@ class ExecutionMapper(mappers.Evaluator,
 
         from meshmode.discretization.poly_element\
                         import PolynomialWarpAndBlendGroupFactory
-        group_factory = PolynomialWarpAndBlendGroupFactory(self.discr.order)
-        # TODO
-        # group_factory = self.discr.volume_discr.\
-        #                     get_group_factory_for_quadrature_tag(sym.QTAG_NONE)
+        grp_factory = self.discr.get_group_factory_for_quadrature_tag(sym.QTAG_NONE)
 
         from meshmode.distributed import MPIBoundaryCommunicator
         bdry_conn_future = MPIBoundaryCommunicator(mpi_comm, self.queue,
                                                    self.discr.volume_discr,
-                                                   group_factory,
-                                                   op.i_remote_rank)
+                                                   grp_factory,
+                                                   op.i_remote_part)
         # TODO: Need to tell the future what boundary data to transfer
         bdry_conn, _ = bdry_conn_future()
         return bdry_conn(self.queue, self.rec(field_expr)).with_queue(self.queue)
@@ -579,8 +576,6 @@ def process_sym_operator(sym_operator, post_bind_mapper=None,
     from meshmode.distributed import get_connected_partitions
     connected_parts = get_connected_partitions(mesh)
     sym_operator = mappers.DistributedMapper(connected_parts)(sym_operator)
-    # print(sym.pretty(sym_operator))
-    # 1/0
 
     # Ordering restriction:
     #
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index 501b0ee5..2b9b1a32 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -336,7 +336,6 @@ class OperatorBinder(CSECachingMapperMixin, IdentityMapper):
 # {{{ distributed mappers
 
 class DistributedMapper(CSECachingMapperMixin, IdentityMapper):
-
     map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
 
     def __init__(self, connected_parts):
@@ -350,7 +349,6 @@ class DistributedMapper(CSECachingMapperMixin, IdentityMapper):
 
 
 class RankCommunicationMapper(CSECachingMapperMixin, IdentityMapper):
-
     map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
 
     def __init__(self, connected_parts):
@@ -364,10 +362,10 @@ class RankCommunicationMapper(CSECachingMapperMixin, IdentityMapper):
                 and expr.op.dd_in.domain_tag is FACE_RESTR_INTERIOR
                 and expr.op.dd_out.domain_tag is FACE_RESTR_ALL):
             distributed_work = 0
-            for i_remote_rank in self.connected_parts:
-                mapped_field = OppSwapToRankSwapMapper(i_remote_rank)(expr.field)
-                btag_rank = BTAG_PARTITION(i_remote_rank)
-                distributed_work += op.InterpolationOperator(dd_in=btag_rank,
+            for i_remote_part in self.connected_parts:
+                mapped_field = RankGeometryChanger(i_remote_part)(expr.field)
+                btag_part = BTAG_PARTITION(i_remote_part)
+                distributed_work += op.InterpolationOperator(dd_in=btag_part,
                                              dd_out=expr.op.dd_out)(mapped_field)
             return expr + distributed_work
 
@@ -375,34 +373,45 @@ class RankCommunicationMapper(CSECachingMapperMixin, IdentityMapper):
             return IdentityMapper.map_operator_binding(self, expr)
 
 
-class OppSwapToRankSwapMapper(CSECachingMapperMixin, IdentityMapper):
-
+class RankGeometryChanger(CSECachingMapperMixin, IdentityMapper):
     map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
 
-    def __init__(self, i_remote_rank):
-        self.i_remote_rank = i_remote_rank
-
-    def map_operator_binding(self, expr):
+    def __init__(self, i_remote_part):
         from meshmode.discretization.connection import FACE_RESTR_INTERIOR
         from meshmode.mesh import BTAG_PARTITION
-        from grudge.symbolic.primitives import NodeCoordinateComponent
-        btag_rank = BTAG_PARTITION(self.i_remote_rank)
+        self.prev_dd = sym.as_dofdesc(FACE_RESTR_INTERIOR)
+        self.new_dd = sym.as_dofdesc(BTAG_PARTITION(i_remote_part))
+
+    def _raise_unable(self, expr):
+        raise ValueError("encountered '%s' in updating subexpression for "
+            "changed geometry (likely for distributed computation); "
+            "unable to adapt from '%s' to '%s'"
+            % (str(expr), self.prev_dd, self.new_dd))
+
+    def map_operator_binding(self, expr):
         if isinstance(expr.op, op.OppositeInteriorFaceSwap):
-            return op.OppositeRankFaceSwap(self.i_remote_rank)(self.rec(expr.field))
+            return op.OppositeRankFaceSwap(dd_in=self.new_dd)(self.rec(expr.field))
         elif (isinstance(expr.op, op.InterpolationOperator)
-                    and expr.op.dd_out.domain_tag is FACE_RESTR_INTERIOR):
+                    and expr.op.dd_out == self.prev_dd):
             return op.InterpolationOperator(dd_in=expr.op.dd_in,
-                                            dd_out=btag_rank)(self.rec(expr.field))
+                                            dd_out=self.new_dd)(expr.field)
         elif (isinstance(expr.op, op.RefDiffOperator)
-                    and expr.op.dd_in.domain_tag is FACE_RESTR_INTERIOR
-                    and expr.op.dd_out.domain_tag is FACE_RESTR_INTERIOR):
-            dd = sym.as_dofdesc(btag_rank)
-            rank_faces = NodeCoordinateComponent(expr.field.axis, dd=dd)
+                    and expr.op.dd_out == self.prev_dd
+                    and expr.op.dd_in == self.prev_dd):
             return op.RefDiffOperator(expr.op.rst_axis,
-                                      dd_in=dd,
-                                      dd_out=dd)(rank_faces)
+                                      dd_in=self.new_dd,
+                                      dd_out=self.new_dd)(self.rec(expr.field))
         else:
-            return IdentityMapper.map_operator_binding(self, expr)
+            self._raise_unable(expr)
+
+    def map_grudge_variable(self, expr):
+        self._raise_unable(expr)
+
+    def map_node_coordinate_component(self, expr):
+        if expr.dd == self.prev_dd:
+            return type(expr)(expr.axis, self.new_dd)
+        else:
+            self._raise_unable(expr)
 
 # }}}
 
diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index d073e06b..3fe5658e 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -380,21 +380,23 @@ class RefInverseMassOperator(RefMassOperatorBase):
 # {{{ boundary-related operators
 
 class OppositeRankFaceSwap(Operator):
-    def __init__(self, i_remote_rank, dd_in=None, dd_out=None):
+    def __init__(self, dd_in=None, dd_out=None):
         sym = _sym()
 
-        if dd_in is None:
-            dd_in = sym.DOFDesc(sym.BTAG_PARTITION(i_remote_rank))
-        if dd_out is None:
+        if dd_in is None and dd_in is None:
+            raise ValueError("dd_in or dd_out must be specified")
+        elif dd_in is None:
+            dd_in = dd_out
+        elif dd_out is None:
             dd_out = dd_in
 
         if not isinstance(dd_in.domain_tag, sym.BTAG_PARTITION):
-            raise ValueError("dd_in must be a rank boundary faces domain")
+            raise ValueError("dd_in must be a partition boundary faces domain")
         if dd_out != dd_in:
             raise ValueError("dd_out and dd_in must be identical")
 
         super(OppositeRankFaceSwap, self).__init__(dd_in, dd_out)
-        self.i_remote_rank = i_remote_rank
+        self.i_remote_part = dd_in.domain_tag.part_nr
 
     mapper_method = intern("map_opposite_rank_face_swap")
 
diff --git a/grudge/symbolic/primitives.py b/grudge/symbolic/primitives.py
index 81a2ef9e..761fc7a7 100644
--- a/grudge/symbolic/primitives.py
+++ b/grudge/symbolic/primitives.py
@@ -154,6 +154,7 @@ class DOFDesc(object):
             :class:`meshmode.discretization.BTAG_ALL`,
             :class:`meshmode.discretization.BTAG_NONE`,
             :class:`meshmode.discretization.BTAG_REALLY_ALL`,
+            :class:`meshmode.discretization.PARTITION`,
             or :class
             or *None* to indicate that the geometry is not yet known.
 
-- 
GitLab


From 887e832351ffaa9fee7784258ed557cbaf0a3783 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Tue, 9 Jan 2018 12:42:22 -0600
Subject: [PATCH 28/83] Fix code style

---
 grudge/execution.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index 6df3029f..695b83c9 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -283,8 +283,6 @@ class ExecutionMapper(mappers.Evaluator,
         from mpi4py import MPI
         mpi_comm = MPI.COMM_WORLD
 
-        from meshmode.discretization.poly_element\
-                        import PolynomialWarpAndBlendGroupFactory
         grp_factory = self.discr.get_group_factory_for_quadrature_tag(sym.QTAG_NONE)
 
         from meshmode.distributed import MPIBoundaryCommunicator
-- 
GitLab


From 7c640619f2dc6be4df097051ca759ebabdc83922 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Tue, 9 Jan 2018 12:50:24 -0600
Subject: [PATCH 29/83] Rename vars

---
 grudge/execution.py                 | 2 +-
 grudge/symbolic/mappers/__init__.py | 9 +++++----
 grudge/symbolic/operators.py        | 6 +++---
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index 695b83c9..66d2cd53 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -279,7 +279,7 @@ class ExecutionMapper(mappers.Evaluator,
 
         return conn(self.queue, self.rec(field_expr)).with_queue(self.queue)
 
-    def map_opposite_rank_face_swap(self, op, field_expr):
+    def map_opposite_partition_face_swap(self, op, field_expr):
         from mpi4py import MPI
         mpi_comm = MPI.COMM_WORLD
 
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index 2b9b1a32..14049676 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -147,7 +147,7 @@ class OperatorReducerMixin(LocalOpReducerMixin, FluxOpReducerMixin):
     map_ref_mass = _map_op_base
     map_ref_inverse_mass = _map_op_base
 
-    map_opposite_rank_face_swap = _map_op_base
+    map_opposite_partition_face_swap = _map_op_base
     map_opposite_interior_face_swap = _map_op_base
     map_face_mass_operator = _map_op_base
     map_ref_face_mass_operator = _map_op_base
@@ -196,7 +196,7 @@ class IdentityMapperMixin(LocalOpReducerMixin, FluxOpReducerMixin):
     map_ref_mass = map_elementwise_linear
     map_ref_inverse_mass = map_elementwise_linear
 
-    map_opposite_rank_face_swap = map_elementwise_linear
+    map_opposite_partition_face_swap = map_elementwise_linear
     map_opposite_interior_face_swap = map_elementwise_linear
     map_face_mass_operator = map_elementwise_linear
     map_ref_face_mass_operator = map_elementwise_linear
@@ -390,7 +390,8 @@ class RankGeometryChanger(CSECachingMapperMixin, IdentityMapper):
 
     def map_operator_binding(self, expr):
         if isinstance(expr.op, op.OppositeInteriorFaceSwap):
-            return op.OppositeRankFaceSwap(dd_in=self.new_dd)(self.rec(expr.field))
+            return op.OppositePartitionFaceSwap(dd_in=self.new_dd)(
+                                                        self.rec(expr.field))
         elif (isinstance(expr.op, op.InterpolationOperator)
                     and expr.op.dd_out == self.prev_dd):
             return op.InterpolationOperator(dd_in=expr.op.dd_in,
@@ -758,7 +759,7 @@ class StringifyMapper(pymbolic.mapper.stringifier.StringifyMapper):
     def map_ref_face_mass_operator(self, expr, enclosing_prec):
         return "RefFaceM" + self._format_op_dd(expr)
 
-    def map_opposite_rank_face_swap(self, expr, enclosing_prec):
+    def map_opposite_partition_face_swap(self, expr, enclosing_prec):
         return "RankSwap" + self._format_op_dd(expr)
 
     def map_opposite_interior_face_swap(self, expr, enclosing_prec):
diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index 3fe5658e..3d9ddf16 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -379,7 +379,7 @@ class RefInverseMassOperator(RefMassOperatorBase):
 
 # {{{ boundary-related operators
 
-class OppositeRankFaceSwap(Operator):
+class OppositePartitionFaceSwap(Operator):
     def __init__(self, dd_in=None, dd_out=None):
         sym = _sym()
 
@@ -395,10 +395,10 @@ class OppositeRankFaceSwap(Operator):
         if dd_out != dd_in:
             raise ValueError("dd_out and dd_in must be identical")
 
-        super(OppositeRankFaceSwap, self).__init__(dd_in, dd_out)
+        super(OppositePartitionFaceSwap, self).__init__(dd_in, dd_out)
         self.i_remote_part = dd_in.domain_tag.part_nr
 
-    mapper_method = intern("map_opposite_rank_face_swap")
+    mapper_method = intern("map_opposite_partition_face_swap")
 
 
 class OppositeInteriorFaceSwap(Operator):
-- 
GitLab


From eaa76711c8f00813b3069f085a93c100a8ea4b28 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Thu, 18 Jan 2018 11:32:01 -0600
Subject: [PATCH 30/83] Bug fixes

---
 grudge/execution.py                 |   2 +-
 grudge/symbolic/mappers/__init__.py |  41 +++++---
 grudge/symbolic/operators.py        |  14 +--
 test/test_mpi_communication.py      | 152 +++++++++++++++++++++++++++-
 4 files changed, 185 insertions(+), 24 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index 66d2cd53..5199f8fb 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -250,7 +250,7 @@ class ExecutionMapper(mappers.Evaluator,
 
         if dd_in.is_volume():
             if dd_out.domain_tag is sym.FACE_RESTR_ALL:
-                conn = self.discr.all_faces_connection(qtag)
+                conn = self.discr.all_faces_volume_connection(qtag)
             elif dd_out.domain_tag is sym.FACE_RESTR_INTERIOR:
                 conn = self.discr.interior_faces_connection(qtag)
             elif dd_out.is_boundary():
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index 14049676..d2ef5c66 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -338,19 +338,6 @@ class OperatorBinder(CSECachingMapperMixin, IdentityMapper):
 class DistributedMapper(CSECachingMapperMixin, IdentityMapper):
     map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
 
-    def __init__(self, connected_parts):
-        self.connected_parts = connected_parts
-
-    def map_operator_binding(self, expr):
-        if isinstance(expr.op, op.RefFaceMassOperator):
-            return expr.op(RankCommunicationMapper(self.connected_parts)(expr.field))
-        else:
-            return IdentityMapper.map_operator_binding(self, expr)
-
-
-class RankCommunicationMapper(CSECachingMapperMixin, IdentityMapper):
-    map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
-
     def __init__(self, connected_parts):
         self.connected_parts = connected_parts
 
@@ -368,11 +355,37 @@ class RankCommunicationMapper(CSECachingMapperMixin, IdentityMapper):
                 distributed_work += op.InterpolationOperator(dd_in=btag_part,
                                              dd_out=expr.op.dd_out)(mapped_field)
             return expr + distributed_work
-
+        # if isinstance(expr.op, op.RefFaceMassOperator):
+        #     return expr.op(RankCommunicationMapper(self.connected_parts)(expr.field))
         else:
             return IdentityMapper.map_operator_binding(self, expr)
 
 
+# class RankCommunicationMapper(CSECachingMapperMixin, IdentityMapper):
+#     map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
+#
+#     def __init__(self, connected_parts):
+#         self.connected_parts = connected_parts
+#
+#     def map_operator_binding(self, expr):
+#         from meshmode.mesh import BTAG_PARTITION
+#         from meshmode.discretization.connection import (FACE_RESTR_ALL,
+#                                                         FACE_RESTR_INTERIOR)
+#         if (isinstance(expr.op, op.InterpolationOperator)
+#                 and expr.op.dd_in.domain_tag is FACE_RESTR_INTERIOR
+#                 and expr.op.dd_out.domain_tag is FACE_RESTR_ALL):
+#             distributed_work = 0
+#             for i_remote_part in self.connected_parts:
+#                 mapped_field = RankGeometryChanger(i_remote_part)(expr.field)
+#                 btag_part = BTAG_PARTITION(i_remote_part)
+#                 distributed_work += op.InterpolationOperator(dd_in=btag_part,
+#                                              dd_out=expr.op.dd_out)(mapped_field)
+#             return expr + distributed_work
+#
+#         else:
+#             return IdentityMapper.map_operator_binding(self, expr)
+
+
 class RankGeometryChanger(CSECachingMapperMixin, IdentityMapper):
     map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
 
diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index 3d9ddf16..7dc28669 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -383,19 +383,19 @@ class OppositePartitionFaceSwap(Operator):
     def __init__(self, dd_in=None, dd_out=None):
         sym = _sym()
 
-        if dd_in is None and dd_in is None:
+        if dd_in is None and dd_out is None:
             raise ValueError("dd_in or dd_out must be specified")
         elif dd_in is None:
             dd_in = dd_out
         elif dd_out is None:
             dd_out = dd_in
 
-        if not isinstance(dd_in.domain_tag, sym.BTAG_PARTITION):
+        super(OppositePartitionFaceSwap, self).__init__(dd_in, dd_out)
+        if not isinstance(self.dd_in.domain_tag, sym.BTAG_PARTITION):
             raise ValueError("dd_in must be a partition boundary faces domain")
-        if dd_out != dd_in:
+        if self.dd_out != self.dd_in:
             raise ValueError("dd_out and dd_in must be identical")
 
-        super(OppositePartitionFaceSwap, self).__init__(dd_in, dd_out)
         self.i_remote_part = dd_in.domain_tag.part_nr
 
     mapper_method = intern("map_opposite_partition_face_swap")
@@ -410,12 +410,12 @@ class OppositeInteriorFaceSwap(Operator):
         if dd_out is None:
             dd_out = dd_in
 
-        if dd_in.domain_tag is not sym.FACE_RESTR_INTERIOR:
+        super(OppositeInteriorFaceSwap, self).__init__(dd_in, dd_out)
+        if self.dd_in.domain_tag is not sym.FACE_RESTR_INTERIOR:
             raise ValueError("dd_in must be an interior faces domain")
-        if dd_out != dd_in:
+        if self.dd_out != self.dd_in:
             raise ValueError("dd_out and dd_in must be identical")
 
-        super(OppositeInteriorFaceSwap, self).__init__(dd_in, dd_out)
 
     mapper_method = intern("map_opposite_interior_face_swap")
 
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 29aab0d9..30710d67 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -36,6 +36,154 @@ from grudge import sym, bind, Discretization
 from grudge.shortcuts import set_up_rk4
 
 
+# TODO: Make new test
+# Create a partitioned mesh and apply sin(2x + 3y) to its field
+# If everything is working, the boundaries of the partitions should be continuous
+# Look at int_tpair
+# Interpolate volume to boundary, ask for the opposite partition at the boundary
+# then compare
+# def mpi_communication_entrypoint():
+#     cl_ctx = cl.create_some_context()
+#     queue = cl.CommandQueue(cl_ctx)
+#     from meshmode.distributed import MPIMeshDistributor
+#
+#     from mpi4py import MPI
+#     comm = MPI.COMM_WORLD
+#     rank = comm.Get_rank()
+#     num_parts = comm.Get_size()
+#
+#     mesh_dist = MPIMeshDistributor(comm)
+#
+#     dims = 2
+#     dt = 0.04
+#     order = 6
+#
+#     if mesh_dist.is_mananger_rank():
+#         from meshmode.mesh.generation import generate_regular_rect_mesh
+#         mesh = generate_regular_rect_mesh(a=(-0.5,)*dims,
+#                                           b=(0.5,)*dims,
+#                                           n=(16,)*dims)
+#
+#         from pymetis import part_graph
+#         _, p = part_graph(num_parts,
+#                           xadj=mesh.nodal_adjacency.neighbors_starts.tolist(),
+#                           adjncy=mesh.nodal_adjacency.neighbors.tolist())
+#         part_per_element = np.array(p)
+#
+#         local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)
+#     else:
+#         local_mesh = mesh_dist.receive_mesh_part()
+#
+#     vol_discr = Discretization(cl_ctx, local_mesh, order=order)
+#
+#     if 0:
+#         sym_x = sym.nodes(local_mesh.dim)
+#         myfunc_symb = sym.sin(np.dot(sym_x, [2, 3]))
+#         myfunc = bind(vol_discr, myfunc_symb)(queue)
+#
+#         sym_all_faces_func = sym.cse(
+#             sym.interp("vol", "all_faces")(sym.var("myfunc")))
+#         sym_int_faces_func = sym.cse(
+#             sym.interp("vol", "int_faces")(sym.var("myfunc")))
+#         sym_bdry_faces_func = sym.cse(
+#             sym.interp(sym.BTAG_ALL, "all_faces")(
+#                 sym.interp("vol", sym.BTAG_ALL)(sym.var("myfunc"))))
+#
+#         bound_face_swap = bind(vol_discr,
+#             sym.interp("int_faces", "all_faces")(
+#                 sym.OppositeInteriorFaceSwap("int_faces")(
+#                     sym_int_faces_func)
+#                 ) - (sym_all_faces_func - sym_bdry_faces_func)
+#                 )
+#
+#         hopefully_zero = bound_face_swap(queue, myfunc=myfunc)
+#         np.set_printoptions(threshold=100000000, suppress=True)
+#         print(hopefully_zero)
+#
+#         import numpy.linalg as la
+#         print(la.norm(hopefully_zero.get()))
+#     else:
+#         sym_x = sym.nodes(local_mesh.dim)
+#         myfunc_symb = sym.sin(np.dot(sym_x, [2, 3]))
+#         myfunc = bind(vol_discr, myfunc_symb)(queue)
+#
+#         sym_all_faces_func = sym.cse(
+#             sym.interp("vol", "all_faces")(sym.var("myfunc"))
+#             - sym.interp(sym.BTAG_ALL, "all_faces")(
+#                 sym.interp("vol", sym.BTAG_ALL)(sym.var("myfunc")))
+#             )
+#         sym_int_faces_func = sym.cse(
+#             sym.interp("vol", "int_faces")(sym.var("myfunc")))
+#
+#         swapped = bind(vol_discr,
+#             sym.interp("int_faces", "all_faces")(
+#                 sym.OppositeInteriorFaceSwap("int_faces")(
+#                     sym_int_faces_func)
+#                 ))(queue, myfunc=myfunc)
+#         unswapped = bind(vol_discr, sym_all_faces_func)(queue, myfunc=myfunc)
+#
+#         together = np.zeros((3,)+swapped.shape)
+#         print(together.shape)
+#         together[0] = swapped.get()
+#         together[1] = unswapped.get()
+#         together[2] = together[1]-together[0]
+#
+#         np.set_printoptions(threshold=100000000, suppress=True, linewidth=150)
+#         print(together.T)
+#
+#         import numpy.linalg as la
+#         print(la.norm(hopefully_zero.get()))
+#     1/0
+#
+#     w = sym.make_sym_array("w", vol_discr.dim+1)
+#     operator = sym.InverseMassOperator()(
+#                     sym.FaceMassOperator()(sym.int_tpair(w)))
+#
+#     # print(sym.pretty(operator)
+#     bound_op = bind(vol_discr, operator)
+#     # print(bound_op)
+#     # 1/0
+#
+#     def rhs(t, w):
+#         return bound_op(queue, t=t, w=w)
+#
+#     from pytools.obj_array import join_fields
+#     fields = join_fields(vol_discr.zeros(queue),
+#             [vol_discr.zeros(queue) for i in range(vol_discr.dim)])
+#
+#     dt_stepper = set_up_rk4("w", dt, fields, rhs)
+#
+#     final_t = 10
+#     nsteps = int(final_t/dt)
+#     print("rank=%d dt=%g nsteps=%d" % (rank, dt, nsteps))
+#
+#     from grudge.shortcuts import make_visualizer
+#     vis = make_visualizer(vol_discr, vis_order=order)
+#
+#     step = 0
+#
+#     norm = bind(vol_discr, sym.norm(2, sym.var("u")))
+#
+#     from time import time
+#     t_last_step = time()
+#
+#     for event in dt_stepper.run(t_end=final_t):
+#         if isinstance(event, dt_stepper.StateComputed):
+#             assert event.component_id == "w"
+#
+#             step += 1
+#
+#             print(step, event.t, norm(queue, u=event.state_component[0]),
+#                     time()-t_last_step)
+#             if step % 10 == 0:
+#                 vis.write_vtk_file("rank%d-fld-%04d.vtu" % (rank, step),
+#                         [
+#                             ("u", event.state_component[0]),
+#                             ("v", event.state_component[1:]),
+#                             ])
+#             t_last_step = time()
+#     logger.debug("Rank %d exiting", rank)
+
 def mpi_communication_entrypoint():
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
@@ -112,9 +260,9 @@ def mpi_communication_entrypoint():
 
     dt_stepper = set_up_rk4("w", dt, fields, rhs)
 
-    final_t = 1
+    final_t = 10
     nsteps = int(final_t/dt)
-    print("dt=%g nsteps=%d" % (dt, nsteps))
+    print("rank=%d dt=%g nsteps=%d" % (rank, dt, nsteps))
 
     from grudge.shortcuts import make_visualizer
     vis = make_visualizer(vol_discr, vis_order=order)
-- 
GitLab


From 3791c78c52ef3c41d3a8a0daa59b5b4241899388 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Thu, 18 Jan 2018 12:20:24 -0600
Subject: [PATCH 31/83] Add simple test case

---
 examples/wave/wave-min.py      |   2 -
 test/test_mpi_communication.py | 221 +++++++++++----------------------
 2 files changed, 70 insertions(+), 153 deletions(-)

diff --git a/examples/wave/wave-min.py b/examples/wave/wave-min.py
index bd3424bc..aa119aa5 100644
--- a/examples/wave/wave-min.py
+++ b/examples/wave/wave-min.py
@@ -84,8 +84,6 @@ def main(write_output=True, order=4):
 
     # print(sym.pretty(op.sym_operator()))
     bound_op = bind(discr, op.sym_operator())
-    print(bound_op)
-    1/0
 
     def rhs(t, w):
         return bound_op(queue, t=t, w=w)
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 30710d67..05def1d2 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -36,153 +36,66 @@ from grudge import sym, bind, Discretization
 from grudge.shortcuts import set_up_rk4
 
 
-# TODO: Make new test
-# Create a partitioned mesh and apply sin(2x + 3y) to its field
-# If everything is working, the boundaries of the partitions should be continuous
-# Look at int_tpair
-# Interpolate volume to boundary, ask for the opposite partition at the boundary
-# then compare
-# def mpi_communication_entrypoint():
-#     cl_ctx = cl.create_some_context()
-#     queue = cl.CommandQueue(cl_ctx)
-#     from meshmode.distributed import MPIMeshDistributor
-#
-#     from mpi4py import MPI
-#     comm = MPI.COMM_WORLD
-#     rank = comm.Get_rank()
-#     num_parts = comm.Get_size()
-#
-#     mesh_dist = MPIMeshDistributor(comm)
-#
-#     dims = 2
-#     dt = 0.04
-#     order = 6
-#
-#     if mesh_dist.is_mananger_rank():
-#         from meshmode.mesh.generation import generate_regular_rect_mesh
-#         mesh = generate_regular_rect_mesh(a=(-0.5,)*dims,
-#                                           b=(0.5,)*dims,
-#                                           n=(16,)*dims)
-#
-#         from pymetis import part_graph
-#         _, p = part_graph(num_parts,
-#                           xadj=mesh.nodal_adjacency.neighbors_starts.tolist(),
-#                           adjncy=mesh.nodal_adjacency.neighbors.tolist())
-#         part_per_element = np.array(p)
-#
-#         local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)
-#     else:
-#         local_mesh = mesh_dist.receive_mesh_part()
-#
-#     vol_discr = Discretization(cl_ctx, local_mesh, order=order)
-#
-#     if 0:
-#         sym_x = sym.nodes(local_mesh.dim)
-#         myfunc_symb = sym.sin(np.dot(sym_x, [2, 3]))
-#         myfunc = bind(vol_discr, myfunc_symb)(queue)
-#
-#         sym_all_faces_func = sym.cse(
-#             sym.interp("vol", "all_faces")(sym.var("myfunc")))
-#         sym_int_faces_func = sym.cse(
-#             sym.interp("vol", "int_faces")(sym.var("myfunc")))
-#         sym_bdry_faces_func = sym.cse(
-#             sym.interp(sym.BTAG_ALL, "all_faces")(
-#                 sym.interp("vol", sym.BTAG_ALL)(sym.var("myfunc"))))
-#
-#         bound_face_swap = bind(vol_discr,
-#             sym.interp("int_faces", "all_faces")(
-#                 sym.OppositeInteriorFaceSwap("int_faces")(
-#                     sym_int_faces_func)
-#                 ) - (sym_all_faces_func - sym_bdry_faces_func)
-#                 )
-#
-#         hopefully_zero = bound_face_swap(queue, myfunc=myfunc)
-#         np.set_printoptions(threshold=100000000, suppress=True)
-#         print(hopefully_zero)
-#
-#         import numpy.linalg as la
-#         print(la.norm(hopefully_zero.get()))
-#     else:
-#         sym_x = sym.nodes(local_mesh.dim)
-#         myfunc_symb = sym.sin(np.dot(sym_x, [2, 3]))
-#         myfunc = bind(vol_discr, myfunc_symb)(queue)
-#
-#         sym_all_faces_func = sym.cse(
-#             sym.interp("vol", "all_faces")(sym.var("myfunc"))
-#             - sym.interp(sym.BTAG_ALL, "all_faces")(
-#                 sym.interp("vol", sym.BTAG_ALL)(sym.var("myfunc")))
-#             )
-#         sym_int_faces_func = sym.cse(
-#             sym.interp("vol", "int_faces")(sym.var("myfunc")))
-#
-#         swapped = bind(vol_discr,
-#             sym.interp("int_faces", "all_faces")(
-#                 sym.OppositeInteriorFaceSwap("int_faces")(
-#                     sym_int_faces_func)
-#                 ))(queue, myfunc=myfunc)
-#         unswapped = bind(vol_discr, sym_all_faces_func)(queue, myfunc=myfunc)
-#
-#         together = np.zeros((3,)+swapped.shape)
-#         print(together.shape)
-#         together[0] = swapped.get()
-#         together[1] = unswapped.get()
-#         together[2] = together[1]-together[0]
-#
-#         np.set_printoptions(threshold=100000000, suppress=True, linewidth=150)
-#         print(together.T)
-#
-#         import numpy.linalg as la
-#         print(la.norm(hopefully_zero.get()))
-#     1/0
-#
-#     w = sym.make_sym_array("w", vol_discr.dim+1)
-#     operator = sym.InverseMassOperator()(
-#                     sym.FaceMassOperator()(sym.int_tpair(w)))
-#
-#     # print(sym.pretty(operator)
-#     bound_op = bind(vol_discr, operator)
-#     # print(bound_op)
-#     # 1/0
-#
-#     def rhs(t, w):
-#         return bound_op(queue, t=t, w=w)
-#
-#     from pytools.obj_array import join_fields
-#     fields = join_fields(vol_discr.zeros(queue),
-#             [vol_discr.zeros(queue) for i in range(vol_discr.dim)])
-#
-#     dt_stepper = set_up_rk4("w", dt, fields, rhs)
-#
-#     final_t = 10
-#     nsteps = int(final_t/dt)
-#     print("rank=%d dt=%g nsteps=%d" % (rank, dt, nsteps))
-#
-#     from grudge.shortcuts import make_visualizer
-#     vis = make_visualizer(vol_discr, vis_order=order)
-#
-#     step = 0
-#
-#     norm = bind(vol_discr, sym.norm(2, sym.var("u")))
-#
-#     from time import time
-#     t_last_step = time()
-#
-#     for event in dt_stepper.run(t_end=final_t):
-#         if isinstance(event, dt_stepper.StateComputed):
-#             assert event.component_id == "w"
-#
-#             step += 1
-#
-#             print(step, event.t, norm(queue, u=event.state_component[0]),
-#                     time()-t_last_step)
-#             if step % 10 == 0:
-#                 vis.write_vtk_file("rank%d-fld-%04d.vtu" % (rank, step),
-#                         [
-#                             ("u", event.state_component[0]),
-#                             ("v", event.state_component[1:]),
-#                             ])
-#             t_last_step = time()
-#     logger.debug("Rank %d exiting", rank)
+def boundary_communication_entrypoint():
+    cl_ctx = cl.create_some_context()
+    queue = cl.CommandQueue(cl_ctx)
+    from meshmode.distributed import MPIMeshDistributor
+
+    from mpi4py import MPI
+    comm = MPI.COMM_WORLD
+    num_parts = comm.Get_size()
+
+    mesh_dist = MPIMeshDistributor(comm)
+
+    order = 2
+
+    if mesh_dist.is_mananger_rank():
+        from meshmode.mesh.generation import generate_regular_rect_mesh
+        mesh = generate_regular_rect_mesh(a=(-0.5,)*2,
+                                          b=(0.5,)*2,
+                                          n=(3,)*2)
+
+        from pymetis import part_graph
+        _, p = part_graph(num_parts,
+                          xadj=mesh.nodal_adjacency.neighbors_starts.tolist(),
+                          adjncy=mesh.nodal_adjacency.neighbors.tolist())
+        part_per_element = np.array(p)
+
+        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)
+    else:
+        local_mesh = mesh_dist.receive_mesh_part()
+
+    vol_discr = Discretization(cl_ctx, local_mesh, order=order)
+
+    sym_x = sym.nodes(local_mesh.dim)
+    myfunc_symb = sym.sin(np.dot(sym_x, [2, 3]))
+    myfunc = bind(vol_discr, myfunc_symb)(queue)
+
+    sym_all_faces_func = sym.cse(
+        sym.interp("vol", "all_faces")(sym.var("myfunc")))
+    sym_int_faces_func = sym.cse(
+        sym.interp("vol", "int_faces")(sym.var("myfunc")))
+    sym_bdry_faces_func = sym.cse(
+        sym.interp(sym.BTAG_ALL, "all_faces")(
+            sym.interp("vol", sym.BTAG_ALL)(sym.var("myfunc"))))
+
+    bound_face_swap = bind(vol_discr,
+        sym.interp("int_faces", "all_faces")(
+            sym.OppositeInteriorFaceSwap("int_faces")(
+                sym_int_faces_func)
+            ) - (sym_all_faces_func - sym_bdry_faces_func)
+            )
+
+    hopefully_zero = bound_face_swap(queue, myfunc=myfunc)
+    import numpy.linalg as la
+    error = la.norm(hopefully_zero.get())
+
+    np.set_printoptions(threshold=100000000, suppress=True)
+    print(hopefully_zero)
+    print(error)
+
+    assert error < 1e-14
+
 
 def mpi_communication_entrypoint():
     cl_ctx = cl.create_some_context()
@@ -295,15 +208,19 @@ def mpi_communication_entrypoint():
 # {{{ MPI test pytest entrypoint
 
 @pytest.mark.mpi
-@pytest.mark.parametrize("num_partitions", [2])
-def test_mpi_communication(num_partitions):
+@pytest.mark.parametrize("testcase", [
+    # "MPI_COMMUNICATION",
+    "BOUNDARY_COMMUNICATION"
+    ])
+@pytest.mark.parametrize("num_ranks", [2])
+def test_mpi(testcase, num_ranks):
     pytest.importorskip("mpi4py")
 
-    num_ranks = num_partitions
     from subprocess import check_call
     import sys
     newenv = os.environ.copy()
     newenv["RUN_WITHIN_MPI"] = "1"
+    newenv[testcase] = "1"
     check_call([
         "mpiexec", "-np", str(num_ranks), "-x", "RUN_WITHIN_MPI",
         sys.executable, __file__],
@@ -313,8 +230,10 @@ def test_mpi_communication(num_partitions):
 
 
 if __name__ == "__main__":
-    if "RUN_WITHIN_MPI" in os.environ:
+    if "MPI_COMMUNICATION" in os.environ:
         mpi_communication_entrypoint()
+    elif "BOUNDARY_COMMUNICATION" in os.environ:
+        boundary_communication_entrypoint()
     else:
         import sys
         if len(sys.argv) > 1:
-- 
GitLab


From e90514e08be53e277838b6f7a45cd7e4135398ed Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Thu, 18 Jan 2018 12:31:10 -0600
Subject: [PATCH 32/83] Small fixes

---
 grudge/symbolic/mappers/__init__.py | 34 +++++------------------------
 grudge/symbolic/operators.py        |  2 +-
 2 files changed, 6 insertions(+), 30 deletions(-)

diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index d2ef5c66..a0d16d42 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -355,37 +355,10 @@ class DistributedMapper(CSECachingMapperMixin, IdentityMapper):
                 distributed_work += op.InterpolationOperator(dd_in=btag_part,
                                              dd_out=expr.op.dd_out)(mapped_field)
             return expr + distributed_work
-        # if isinstance(expr.op, op.RefFaceMassOperator):
-        #     return expr.op(RankCommunicationMapper(self.connected_parts)(expr.field))
         else:
             return IdentityMapper.map_operator_binding(self, expr)
 
 
-# class RankCommunicationMapper(CSECachingMapperMixin, IdentityMapper):
-#     map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
-#
-#     def __init__(self, connected_parts):
-#         self.connected_parts = connected_parts
-#
-#     def map_operator_binding(self, expr):
-#         from meshmode.mesh import BTAG_PARTITION
-#         from meshmode.discretization.connection import (FACE_RESTR_ALL,
-#                                                         FACE_RESTR_INTERIOR)
-#         if (isinstance(expr.op, op.InterpolationOperator)
-#                 and expr.op.dd_in.domain_tag is FACE_RESTR_INTERIOR
-#                 and expr.op.dd_out.domain_tag is FACE_RESTR_ALL):
-#             distributed_work = 0
-#             for i_remote_part in self.connected_parts:
-#                 mapped_field = RankGeometryChanger(i_remote_part)(expr.field)
-#                 btag_part = BTAG_PARTITION(i_remote_part)
-#                 distributed_work += op.InterpolationOperator(dd_in=btag_part,
-#                                              dd_out=expr.op.dd_out)(mapped_field)
-#             return expr + distributed_work
-#
-#         else:
-#             return IdentityMapper.map_operator_binding(self, expr)
-
-
 class RankGeometryChanger(CSECachingMapperMixin, IdentityMapper):
     map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
 
@@ -402,8 +375,11 @@ class RankGeometryChanger(CSECachingMapperMixin, IdentityMapper):
             % (str(expr), self.prev_dd, self.new_dd))
 
     def map_operator_binding(self, expr):
-        if isinstance(expr.op, op.OppositeInteriorFaceSwap):
-            return op.OppositePartitionFaceSwap(dd_in=self.new_dd)(
+        if (isinstance(expr.op, op.OppositeInteriorFaceSwap)
+                    and expr.op.dd_in == self.prev_dd
+                    and expr.op.dd_out == self.prev_dd):
+            return op.OppositePartitionFaceSwap(dd_in=self.new_dd,
+                                                dd_out=self.new_dd)(
                                                         self.rec(expr.field))
         elif (isinstance(expr.op, op.InterpolationOperator)
                     and expr.op.dd_out == self.prev_dd):
diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index 7dc28669..739e7b6c 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -396,7 +396,7 @@ class OppositePartitionFaceSwap(Operator):
         if self.dd_out != self.dd_in:
             raise ValueError("dd_out and dd_in must be identical")
 
-        self.i_remote_part = dd_in.domain_tag.part_nr
+        self.i_remote_part = self.dd_in.domain_tag.part_nr
 
     mapper_method = intern("map_opposite_partition_face_swap")
 
-- 
GitLab


From dd233f4d2041738d07b280d85b9e04fb78c1bf5b Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Mon, 22 Jan 2018 13:22:38 -0600
Subject: [PATCH 33/83] Refine test cases

---
 test/test_mpi_communication.py | 35 +++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 05def1d2..db14dd13 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -36,7 +36,7 @@ from grudge import sym, bind, Discretization
 from grudge.shortcuts import set_up_rk4
 
 
-def boundary_communication_entrypoint():
+def simple_communication_entrypoint():
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
     from meshmode.distributed import MPIMeshDistributor
@@ -208,19 +208,31 @@ def mpi_communication_entrypoint():
 # {{{ MPI test pytest entrypoint
 
 @pytest.mark.mpi
-@pytest.mark.parametrize("testcase", [
-    # "MPI_COMMUNICATION",
-    "BOUNDARY_COMMUNICATION"
-    ])
 @pytest.mark.parametrize("num_ranks", [2])
-def test_mpi(testcase, num_ranks):
+def test_mpi(num_ranks):
     pytest.importorskip("mpi4py")
 
     from subprocess import check_call
     import sys
     newenv = os.environ.copy()
     newenv["RUN_WITHIN_MPI"] = "1"
-    newenv[testcase] = "1"
+    newenv["TEST_MPI_COMMUNICATION"] = "1"
+    check_call([
+        "mpiexec", "-np", str(num_ranks), "-x", "RUN_WITHIN_MPI",
+        sys.executable, __file__],
+        env=newenv)
+
+
+@pytest.mark.mpi
+@pytest.mark.parametrize("num_ranks", [2])
+def test_simple_mpi(num_ranks):
+    pytest.importorskip("mpi4py")
+
+    from subprocess import check_call
+    import sys
+    newenv = os.environ.copy()
+    newenv["RUN_WITHIN_MPI"] = "1"
+    newenv["TEST_SIMPLE_COMMUNICATION"] = "1"
     check_call([
         "mpiexec", "-np", str(num_ranks), "-x", "RUN_WITHIN_MPI",
         sys.executable, __file__],
@@ -230,10 +242,11 @@ def test_mpi(testcase, num_ranks):
 
 
 if __name__ == "__main__":
-    if "MPI_COMMUNICATION" in os.environ:
-        mpi_communication_entrypoint()
-    elif "BOUNDARY_COMMUNICATION" in os.environ:
-        boundary_communication_entrypoint()
+    if "RUN_WITHIN_MPI" in os.environ:
+        if "TEST_MPI_COMMUNICATION" in os.environ:
+            mpi_communication_entrypoint()
+        elif "TEST_SIMPLE_COMMUNICATION" in os.environ:
+            simple_communication_entrypoint()
     else:
         import sys
         if len(sys.argv) > 1:
-- 
GitLab


From 4b6c9f9c31afd222477e1df952a6be62047d21cd Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Mon, 22 Jan 2018 13:24:09 -0600
Subject: [PATCH 34/83] Refine names

---
 test/test_mpi_communication.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index db14dd13..f3a81181 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -36,7 +36,7 @@ from grudge import sym, bind, Discretization
 from grudge.shortcuts import set_up_rk4
 
 
-def simple_communication_entrypoint():
+def simple_mpi_communication_entrypoint():
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
     from meshmode.distributed import MPIMeshDistributor
@@ -232,7 +232,7 @@ def test_simple_mpi(num_ranks):
     import sys
     newenv = os.environ.copy()
     newenv["RUN_WITHIN_MPI"] = "1"
-    newenv["TEST_SIMPLE_COMMUNICATION"] = "1"
+    newenv["TEST_SIMPLE_MPI_COMMUNICATION"] = "1"
     check_call([
         "mpiexec", "-np", str(num_ranks), "-x", "RUN_WITHIN_MPI",
         sys.executable, __file__],
@@ -245,8 +245,8 @@ if __name__ == "__main__":
     if "RUN_WITHIN_MPI" in os.environ:
         if "TEST_MPI_COMMUNICATION" in os.environ:
             mpi_communication_entrypoint()
-        elif "TEST_SIMPLE_COMMUNICATION" in os.environ:
-            simple_communication_entrypoint()
+        elif "TEST_SIMPLE_MPI_COMMUNICATION" in os.environ:
+            simple_mpi_communication_entrypoint()
     else:
         import sys
         if len(sys.argv) > 1:
-- 
GitLab


From 322eacbd990420d69c4e7e5ac5a8bb8fafaefa49 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Mon, 22 Jan 2018 15:25:07 -0600
Subject: [PATCH 35/83] Fix whitespace

---
 grudge/symbolic/operators.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index 739e7b6c..c4f6ed65 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -416,7 +416,6 @@ class OppositeInteriorFaceSwap(Operator):
         if self.dd_out != self.dd_in:
             raise ValueError("dd_out and dd_in must be identical")
 
-
     mapper_method = intern("map_opposite_interior_face_swap")
 
 
-- 
GitLab


From 924bf21ade8cda6de4bba619b397a82d3bb544da Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Thu, 25 Jan 2018 11:51:30 -0600
Subject: [PATCH 36/83] Add simple mpi test

---
 test/test_mpi_communication.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index f3a81181..6244dcc8 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -36,7 +36,7 @@ from grudge import sym, bind, Discretization
 from grudge.shortcuts import set_up_rk4
 
 
-def simple_mpi_communication_entrypoint():
+def simple_mpi_communication_entrypoint(order):
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
     from meshmode.distributed import MPIMeshDistributor
@@ -47,19 +47,19 @@ def simple_mpi_communication_entrypoint():
 
     mesh_dist = MPIMeshDistributor(comm)
 
-    order = 2
-
     if mesh_dist.is_mananger_rank():
         from meshmode.mesh.generation import generate_regular_rect_mesh
-        mesh = generate_regular_rect_mesh(a=(-0.5,)*2,
-                                          b=(0.5,)*2,
+        mesh = generate_regular_rect_mesh(a=(-1,)*2,
+                                          b=(1,)*2,
                                           n=(3,)*2)
 
-        from pymetis import part_graph
-        _, p = part_graph(num_parts,
-                          xadj=mesh.nodal_adjacency.neighbors_starts.tolist(),
-                          adjncy=mesh.nodal_adjacency.neighbors.tolist())
-        part_per_element = np.array(p)
+        # This gives [0, 0, 0, 1, 0, 1, 1, 1]
+        # from pymetis import part_graph
+        # _, p = part_graph(num_parts,
+        #                   xadj=mesh.nodal_adjacency.neighbors_starts.tolist(),
+        #                   adjncy=mesh.nodal_adjacency.neighbors.tolist())
+        # part_per_element = np.array(p)
+        part_per_element = np.array([0, 0, 0, 1, 0, 1, 1, 1])
 
         local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)
     else:
@@ -225,7 +225,8 @@ def test_mpi(num_ranks):
 
 @pytest.mark.mpi
 @pytest.mark.parametrize("num_ranks", [2])
-def test_simple_mpi(num_ranks):
+@pytest.mark.parametrize("order", [2])
+def test_simple_mpi(num_ranks, order):
     pytest.importorskip("mpi4py")
 
     from subprocess import check_call
@@ -233,6 +234,7 @@ def test_simple_mpi(num_ranks):
     newenv = os.environ.copy()
     newenv["RUN_WITHIN_MPI"] = "1"
     newenv["TEST_SIMPLE_MPI_COMMUNICATION"] = "1"
+    newenv["order"] = str(order)
     check_call([
         "mpiexec", "-np", str(num_ranks), "-x", "RUN_WITHIN_MPI",
         sys.executable, __file__],
@@ -246,7 +248,8 @@ if __name__ == "__main__":
         if "TEST_MPI_COMMUNICATION" in os.environ:
             mpi_communication_entrypoint()
         elif "TEST_SIMPLE_MPI_COMMUNICATION" in os.environ:
-            simple_mpi_communication_entrypoint()
+            order = int(os.environ["order"])
+            simple_mpi_communication_entrypoint(order)
     else:
         import sys
         if len(sys.argv) > 1:
-- 
GitLab


From af2b38d1a35c61674bb3ef46678d25ad1f44d4b0 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 31 Jan 2018 00:56:52 -0600
Subject: [PATCH 37/83] Fixes for interface changes after merging master into
 mpi-communication

---
 grudge/execution.py            | 9 ++++++---
 test/test_mpi_communication.py | 6 +++---
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index 1f7e60b8..56ae7c1c 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -249,11 +249,12 @@ class ExecutionMapper(mappers.Evaluator,
         from mpi4py import MPI
         mpi_comm = MPI.COMM_WORLD
 
-        grp_factory = self.discr.get_group_factory_for_quadrature_tag(sym.QTAG_NONE)
+        grp_factory = self.discrwb.group_factory_for_quadrature_tag(sym.QTAG_NONE)
 
+        volume_discr = self.discrwb.discr_from_dd("vol")
         from meshmode.distributed import MPIBoundaryCommunicator
         bdry_conn_future = MPIBoundaryCommunicator(mpi_comm, self.queue,
-                                                   self.discr.volume_discr,
+                                                   volume_discr,
                                                    grp_factory,
                                                    op.i_remote_part)
         # TODO: Need to tell the future what boundary data to transfer
@@ -549,8 +550,10 @@ def process_sym_operator(discrwb, sym_operator, post_bind_mapper=None,
     sym_operator = mappers.GlobalToReferenceMapper(discrwb.ambient_dim)(sym_operator)
 
     dumper("before-distributed", sym_operator)
+
+    volume_mesh = discrwb.discr_from_dd("vol").mesh
     from meshmode.distributed import get_connected_partitions
-    connected_parts = get_connected_partitions(mesh)
+    connected_parts = get_connected_partitions(volume_mesh)
     sym_operator = mappers.DistributedMapper(connected_parts)(sym_operator)
 
     # Ordering restriction:
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 6244dcc8..68901da5 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -32,7 +32,7 @@ import pyopencl as cl
 import logging
 logger = logging.getLogger(__name__)
 
-from grudge import sym, bind, Discretization
+from grudge import sym, bind, DGDiscretizationWithBoundaries
 from grudge.shortcuts import set_up_rk4
 
 
@@ -65,7 +65,7 @@ def simple_mpi_communication_entrypoint(order):
     else:
         local_mesh = mesh_dist.receive_mesh_part()
 
-    vol_discr = Discretization(cl_ctx, local_mesh, order=order)
+    vol_discr = DGDiscretizationWithBoundaries(cl_ctx, local_mesh, order=order)
 
     sym_x = sym.nodes(local_mesh.dim)
     myfunc_symb = sym.sin(np.dot(sym_x, [2, 3]))
@@ -129,7 +129,7 @@ def mpi_communication_entrypoint():
     else:
         local_mesh = mesh_dist.receive_mesh_part()
 
-    vol_discr = Discretization(cl_ctx, local_mesh, order=order)
+    vol_discr = DGDiscretizationWithBoundaries(cl_ctx, local_mesh, order=order)
 
     source_center = np.array([0.1, 0.22, 0.33])[:local_mesh.dim]
     source_width = 0.05
-- 
GitLab


From 202e431a235fbd026abe19a142cce2295d07a9f9 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Thu, 1 Feb 2018 18:03:18 -0600
Subject: [PATCH 38/83] Restructure MPI comm into setup/send+receive combo

---
 grudge/discretization.py            | 49 ++++++++++++++++++++++++++++-
 grudge/execution.py                 | 39 ++++++++++++++---------
 grudge/symbolic/mappers/__init__.py |  2 +-
 test/test_mpi_communication.py      |  6 ++--
 4 files changed, 78 insertions(+), 18 deletions(-)

diff --git a/grudge/discretization.py b/grudge/discretization.py
index 6f39762c..bc59299f 100644
--- a/grudge/discretization.py
+++ b/grudge/discretization.py
@@ -23,7 +23,9 @@ THE SOFTWARE.
 """
 
 
+import six
 from pytools import memoize_method
+import pyopencl as cl
 from grudge import sym
 import numpy as np
 
@@ -47,7 +49,8 @@ class DGDiscretizationWithBoundaries(DiscretizationBase):
     .. automethod :: zeros
     """
 
-    def __init__(self, cl_ctx, mesh, order, quad_min_degrees=None):
+    def __init__(self, cl_ctx, mesh, order, quad_min_degrees=None,
+            mpi_communicator=None):
         """
         :param quad_min_degrees: A mapping from quadrature tags to the degrees
             to which the desired quadrature is supposed to be exact.
@@ -74,6 +77,50 @@ class DGDiscretizationWithBoundaries(DiscretizationBase):
 
         # }}}
 
+        with cl.CommandQueue(cl_ctx) as queue:
+            self._dist_boundary_connections = \
+                    self._set_up_distributed_communication(mpi_communicator, queue)
+
+        self.mpi_communicator = mpi_communicator
+
+    def _set_up_distributed_communication(self, mpi_communicator, queue):
+        from_dd = sym.DOFDesc("vol", sym.QTAG_NONE)
+
+        from meshmode.distributed import get_connected_partitions
+        connected_parts = get_connected_partitions(self._volume_discr.mesh)
+
+        if mpi_communicator is None and connected_parts:
+            raise RuntimeError("must supply an MPI communicator when using a "
+                    "distributed mesh")
+
+        grp_factory = self.group_factory_for_quadrature_tag(sym.QTAG_NONE)
+
+        setup_helpers = {}
+        boundary_connections = {}
+
+        from meshmode.distributed import MPIBoundaryCommSetupHelper
+        for i_remote_part in connected_parts:
+            conn = self.connection_from_dds(
+                    from_dd,
+                    sym.DOFDesc(sym.BTAG_PARTITION(i_remote_part), sym.QTAG_NONE))
+            setup_helper = setup_helpers[i_remote_part] = MPIBoundaryCommSetupHelper(
+                    mpi_communicator, queue, conn, i_remote_part, grp_factory)
+            setup_helper.post_sends()
+
+        for i_remote_part, setup_helper in six.iteritems(setup_helpers):
+            boundary_connections[i_remote_part] = setup_helper.complete_setup()
+
+        return boundary_connections
+
+    def get_distributed_boundary_swap_connection(self, dd):
+        if dd.quadrature_tag != sym.QTAG_NONE:
+            # FIXME
+            raise NotImplementedError("Distributed communication with quadrature")
+
+        assert isinstance(dd.domain_tag, sym.BTAG_PARTITION)
+
+        return self._dist_boundary_connections[dd.domain_tag.part_nr]
+
     @memoize_method
     def discr_from_dd(self, dd):
         dd = sym.as_dofdesc(dd)
diff --git a/grudge/execution.py b/grudge/execution.py
index 56ae7c1c..1fdec1b9 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -36,6 +36,9 @@ import logging
 logger = logging.getLogger(__name__)
 
 
+MPI_TAG_GRUDGE_DATA = 0x3700d3e
+
+
 # {{{ exec mapper
 
 class ExecutionMapper(mappers.Evaluator,
@@ -246,20 +249,28 @@ class ExecutionMapper(mappers.Evaluator,
         return conn(self.queue, self.rec(field_expr)).with_queue(self.queue)
 
     def map_opposite_partition_face_swap(self, op, field_expr):
-        from mpi4py import MPI
-        mpi_comm = MPI.COMM_WORLD
-
-        grp_factory = self.discrwb.group_factory_for_quadrature_tag(sym.QTAG_NONE)
-
-        volume_discr = self.discrwb.discr_from_dd("vol")
-        from meshmode.distributed import MPIBoundaryCommunicator
-        bdry_conn_future = MPIBoundaryCommunicator(mpi_comm, self.queue,
-                                                   volume_discr,
-                                                   grp_factory,
-                                                   op.i_remote_part)
-        # TODO: Need to tell the future what boundary data to transfer
-        bdry_conn, _ = bdry_conn_future()
-        return bdry_conn(self.queue, self.rec(field_expr)).with_queue(self.queue)
+        assert op.dd_in == op.dd_out
+
+        bdry_conn = self.discrwb.get_distributed_boundary_swap_connection(op.dd_in)
+        loc_bdry_vec = self.rec(field_expr).get(self.queue)
+
+        comm = self.discrwb.mpi_communicator
+
+        remote_rank = op.dd_in.domain_tag.part_nr
+
+        send_req = comm.Isend(loc_bdry_vec, remote_rank,
+                tag=MPI_TAG_GRUDGE_DATA)
+
+        recv_vec_host = np.empty_like(loc_bdry_vec)
+        comm.Recv(recv_vec_host, source=remote_rank, tag=MPI_TAG_GRUDGE_DATA)
+        send_req.wait()
+
+        recv_vec_dev = cl.array.to_device(self.queue, recv_vec_host)
+
+        shuffled_recv_vec = bdry_conn(self.queue, recv_vec_dev) \
+                .with_queue(self.queue)
+
+        return shuffled_recv_vec
 
     def map_opposite_interior_face_swap(self, op, field_expr):
         dd = op.dd_in
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index ddba6c8d..a810a335 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -334,7 +334,7 @@ class OperatorBinder(CSECachingMapperMixin, IdentityMapper):
 # }}}
 
 
-# {{{ distributed mappers
+# {{{ mappers for distributed computation
 
 class DistributedMapper(CSECachingMapperMixin, IdentityMapper):
     map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 68901da5..208de1af 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -65,7 +65,8 @@ def simple_mpi_communication_entrypoint(order):
     else:
         local_mesh = mesh_dist.receive_mesh_part()
 
-    vol_discr = DGDiscretizationWithBoundaries(cl_ctx, local_mesh, order=order)
+    vol_discr = DGDiscretizationWithBoundaries(cl_ctx, local_mesh, order=order,
+            mpi_communicator=comm)
 
     sym_x = sym.nodes(local_mesh.dim)
     myfunc_symb = sym.sin(np.dot(sym_x, [2, 3]))
@@ -129,7 +130,8 @@ def mpi_communication_entrypoint():
     else:
         local_mesh = mesh_dist.receive_mesh_part()
 
-    vol_discr = DGDiscretizationWithBoundaries(cl_ctx, local_mesh, order=order)
+    vol_discr = DGDiscretizationWithBoundaries(cl_ctx, local_mesh, order=order,
+            mpi_communicator=comm)
 
     source_center = np.array([0.1, 0.22, 0.33])[:local_mesh.dim]
     source_width = 0.05
-- 
GitLab


From 665072636a507cd18013d915c0d198ee20abaebd Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Thu, 1 Feb 2018 20:03:17 -0600
Subject: [PATCH 39/83] Point CIs at meshmode partition branch

---
 requirements.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index deb09394..ee4c5287 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,4 +7,7 @@ git+https://gitlab.tiker.net/inducer/dagrt.git
 git+https://gitlab.tiker.net/inducer/leap.git
 git+https://github.com/inducer/meshpy.git
 git+https://github.com/inducer/modepy.git
-git+https://github.com/inducer/meshmode.git
+
+# FIXME: Revert to this when merged
+#git+https://github.com/inducer/meshmode.git
+git+https://gitlab.tiker.net/eshoag2/meshmode.git@partition
-- 
GitLab


From e0000155761ba6783433870ab2a8d2c181d7c06b Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Thu, 1 Feb 2018 20:29:09 -0600
Subject: [PATCH 40/83] Install MPI for CI

---
 .gitlab-ci.yml | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index a865565e..98eb9c5d 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,13 +1,28 @@
-Python 2.7 POCL:
+Python 2.7 POCL MPI:
   script:
   - export PY_EXE=python2.7
   - export PYOPENCL_TEST=portable
-  - export EXTRA_INSTALL="numpy mako"
+  - export EXTRA_INSTALL="numpy mako mpi4py"
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
   - ". ./build-and-test-py-project.sh"
   tags:
   - python2.7
   - pocl
+  - mpi
+  except:
+  - tags
+
+Python 3.5 POCL MPI:
+  script:
+  - export PY_EXE=python3.5
+  - export PYOPENCL_TEST=portable
+  - export EXTRA_INSTALL="numpy mako mpi4py"
+  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
+  - ". ./build-and-test-py-project.sh"
+  tags:
+  - python3.5
+  - pocl
+  - mpi
   except:
   - tags
 
-- 
GitLab


From 6aacc7482c26851aba6ad4b41040b586ecdbde2c Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 7 Feb 2018 19:25:45 -0600
Subject: [PATCH 41/83] Improve a section comment

---
 grudge/execution.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index cc8703db..7b27390c 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -336,7 +336,8 @@ class ExecutionMapper(mappers.Evaluator,
 
     # }}}
 
-    # {{{ code execution functions
+    # {{{ instruction execution functions
+
     def map_insn_loopy_kernel(self, insn):
         kwargs = {}
         kdescr = insn.kernel_descriptor
-- 
GitLab


From 89a5a86cd56a5f1643a040a201467c9a25cac9f9 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Thu, 15 Feb 2018 16:10:40 -0600
Subject: [PATCH 42/83] grudge mpi communication

---
 grudge/execution.py                  | 53 ++++++++++++++---------
 grudge/symbolic/compiler.py          | 65 ++++++++++++++++++++++++++++
 grudge/symbolic/dofdesc_inference.py |  3 ++
 grudge/symbolic/mappers/__init__.py  |  2 +-
 grudge/symbolic/operators.py         |  2 +
 test/test_mpi_communication.py       | 26 +++++------
 6 files changed, 115 insertions(+), 36 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index 7b27390c..cdc7579f 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -36,7 +36,8 @@ import logging
 logger = logging.getLogger(__name__)
 
 
-MPI_TAG_GRUDGE_DATA = 0x3700d3e
+# TODO: Maybe we should move this somewhere else.
+# MPI_TAG_GRUDGE_DATA = 0x3700d3e
 
 
 # {{{ exec mapper
@@ -251,27 +252,9 @@ class ExecutionMapper(mappers.Evaluator,
 
     def map_opposite_partition_face_swap(self, op, field_expr):
         assert op.dd_in == op.dd_out
-
         bdry_conn = self.discrwb.get_distributed_boundary_swap_connection(op.dd_in)
-        loc_bdry_vec = self.rec(field_expr).get(self.queue)
-
-        comm = self.discrwb.mpi_communicator
-
-        remote_rank = op.dd_in.domain_tag.part_nr
-
-        send_req = comm.Isend(loc_bdry_vec, remote_rank,
-                tag=MPI_TAG_GRUDGE_DATA)
-
-        recv_vec_host = np.empty_like(loc_bdry_vec)
-        comm.Recv(recv_vec_host, source=remote_rank, tag=MPI_TAG_GRUDGE_DATA)
-        send_req.wait()
-
-        recv_vec_dev = cl.array.to_device(self.queue, recv_vec_host)
-
-        shuffled_recv_vec = bdry_conn(self.queue, recv_vec_dev) \
-                .with_queue(self.queue)
-
-        return shuffled_recv_vec
+        remote_bdry_vec = self.rec(field_expr)  # swapped by RankDataSwapAssign
+        return bdry_conn(self.queue, remote_bdry_vec).with_queue(self.queue)
 
     def map_opposite_interior_face_swap(self, op, field_expr):
         return self.discrwb.opposite_face_connection()(
@@ -338,6 +321,34 @@ class ExecutionMapper(mappers.Evaluator,
 
     # {{{ instruction execution functions
 
+    def map_insn_rank_data_swap(self, insn):
+        local_data = self.rec(insn.field).get(self.queue)
+        comm = self.discrwb.mpi_communicator
+
+        send_req = comm.Isend(local_data, insn.i_remote_rank, tag=insn.tag)
+
+        remote_data_host = np.empty_like(local_data)
+        comm.Recv(remote_data_host, source=insn.i_remote_rank, tag=insn.tag)
+        send_req.wait()
+        remote_data = cl.array.to_device(self.queue, remote_data_host)
+
+        return [(insn.name, remote_data)], []
+
+        # class Future:
+        #     def is_ready(self):
+        #         return comm.improbe(source=insn.i_remote_rank, tag=insn.tag)
+        #
+        #     def __call__(self):
+        #         remote_data_host = np.empty_like(local_data)
+        #         comm.Recv(remote_data_host, source=insn.i_remote_rank, tag=insn.tag)
+        #         send_req.wait()
+        #
+        #         remote_data = cl.array.to_device(queue, remote_data_host)
+        #         return [(insn.name, remote_data)], []
+        #
+        # return [], [Future()]
+
+
     def map_insn_loopy_kernel(self, insn):
         kwargs = {}
         kdescr = insn.kernel_descriptor
diff --git a/grudge/symbolic/compiler.py b/grudge/symbolic/compiler.py
index c555cea0..450b3cd4 100644
--- a/grudge/symbolic/compiler.py
+++ b/grudge/symbolic/compiler.py
@@ -198,6 +198,50 @@ class Assign(AssignBase):
     mapper_method = intern("map_insn_assign")
 
 
+class RankDataSwapAssign(Instruction):
+    """
+    .. attribute:: name
+    .. attribute:: field
+    .. attribute:: i_remote_rank
+
+        The number of the remote rank that this instruction swaps data with.
+
+    .. attribute:: mpi_tag_offset
+
+        A tag offset for mpi that should be unique for each instance within
+        a particular rank.
+
+    .. attribute:: dd_out
+    .. attribute:: comment
+    """
+    # TODO: Is this number ok? We probably want it to be global.
+    MPI_TAG_GRUDGE_DATA = 0x3700d3e
+
+    def __init__(self, name, field, op):
+        self.name = name
+        self.field = field
+        self.i_remote_rank = op.i_remote_part
+        self.dd_out = op.dd_out
+        self.tag = self.MPI_TAG_GRUDGE_DATA + op.mpi_tag_offset
+        self.comment = "Swap data with rank %02d" % self.i_remote_rank
+
+    @memoize_method
+    def get_assignees(self):
+        return set([self.name])
+
+    @memoize_method
+    def get_dependencies(self):
+        return _make_dep_mapper(include_subscripts=False)(self.field)
+
+    def __str__(self):
+        return ("{\n"
+                "    /* %s */\n"
+                "    %s <- %s\n"
+                "}\n" % (self.comment, self.name, self.field))
+
+    mapper_method = intern("map_insn_rank_data_swap")
+
+
 class ToDiscretizationScopedAssign(Assign):
     scope_indicator = "(to discr)-"
 
@@ -933,6 +977,9 @@ class ToLoopyInstructionMapper(object):
                 governing_dd=governing_dd)
             )
 
+    def map_insn_rank_data_swap(self, insn):
+        return insn
+
     def map_insn_assign_to_discr_scoped(self, insn):
         return insn
 
@@ -1122,6 +1169,8 @@ class OperatorCompiler(mappers.IdentityMapper):
     def map_operator_binding(self, expr, codegen_state, name_hint=None):
         if isinstance(expr.op, sym.RefDiffOperatorBase):
             return self.map_ref_diff_op_binding(expr, codegen_state)
+        elif isinstance(expr.op, sym.OppositePartitionFaceSwap):
+            return self.map_rank_data_swap_binding(expr, codegen_state)
         else:
             # make sure operator assignments stand alone and don't get muddled
             # up in vector math
@@ -1180,6 +1229,22 @@ class OperatorCompiler(mappers.IdentityMapper):
 
             return self.expr_to_var[expr]
 
+    def map_rank_data_swap_binding(self, expr, codegen_state):
+        try:
+            return self.expr_to_var[expr]
+        except KeyError:
+            field = self.rec(expr.field, codegen_state)
+            name = self.name_gen("raw_rank%02d_bdry_data" % expr.op.i_remote_part)
+            field_insn = RankDataSwapAssign(name=name, field=field, op=expr.op)
+            codegen_state.get_code_list(self).append(field_insn)
+            field_var = Variable(field_insn.name)
+            # TODO: Do I need this?
+            # self.expr_to_var[field] = field_var
+            self.expr_to_var[expr] = self.assign_to_new_var(codegen_state,
+                                                            expr.op(field_var),
+                                                            prefix="other")
+            return self.expr_to_var[expr]
+
     # }}}
 
 # }}}
diff --git a/grudge/symbolic/dofdesc_inference.py b/grudge/symbolic/dofdesc_inference.py
index 7e1de605..92be126f 100644
--- a/grudge/symbolic/dofdesc_inference.py
+++ b/grudge/symbolic/dofdesc_inference.py
@@ -201,6 +201,9 @@ class DOFDescInferenceMapper(RecursiveMapper, CSECachingMapperMixin):
                 for name, expr in zip(insn.names, insn.exprs)
                 ]
 
+    def map_insn_rank_data_swap(self, insn):
+        return [(insn.name, insn.dd_out)]
+
     map_insn_assign_to_discr_scoped = map_insn_assign
 
     def map_insn_diff_batch_assign(self, insn):
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index 2ddd6f5d..9db1ab31 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -661,7 +661,7 @@ class StringifyMapper(pymbolic.mapper.stringifier.StringifyMapper):
         elif dd.domain_tag is FACE_RESTR_INTERIOR:
             result = "int_faces"
         elif isinstance(dd.domain_tag, BTAG_PARTITION):
-            result = "rank%d_faces" % dd.domain_tag.part_nr
+            result = "part%d_faces" % dd.domain_tag.part_nr
         else:
             result = fmt(dd.domain_tag)
 
diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index 294c4374..7cdb3d2b 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -427,6 +427,8 @@ class OppositePartitionFaceSwap(Operator):
             raise ValueError("dd_out and dd_in must be identical")
 
         self.i_remote_part = self.dd_in.domain_tag.part_nr
+        # FIXME: We should have a unique offset for each instance on a particular rank
+        self.mpi_tag_offset = 0
 
     mapper_method = intern("map_opposite_partition_face_swap")
 
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 208de1af..3bf012f3 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -36,7 +36,7 @@ from grudge import sym, bind, DGDiscretizationWithBoundaries
 from grudge.shortcuts import set_up_rk4
 
 
-def simple_mpi_communication_entrypoint(order):
+def simple_mpi_communication_entrypoint():
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
     from meshmode.distributed import MPIMeshDistributor
@@ -53,19 +53,17 @@ def simple_mpi_communication_entrypoint(order):
                                           b=(1,)*2,
                                           n=(3,)*2)
 
-        # This gives [0, 0, 0, 1, 0, 1, 1, 1]
-        # from pymetis import part_graph
-        # _, p = part_graph(num_parts,
-        #                   xadj=mesh.nodal_adjacency.neighbors_starts.tolist(),
-        #                   adjncy=mesh.nodal_adjacency.neighbors.tolist())
-        # part_per_element = np.array(p)
-        part_per_element = np.array([0, 0, 0, 1, 0, 1, 1, 1])
+        from pymetis import part_graph
+        _, p = part_graph(num_parts,
+                          xadj=mesh.nodal_adjacency.neighbors_starts.tolist(),
+                          adjncy=mesh.nodal_adjacency.neighbors.tolist())
+        part_per_element = np.array(p)
 
         local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)
     else:
         local_mesh = mesh_dist.receive_mesh_part()
 
-    vol_discr = DGDiscretizationWithBoundaries(cl_ctx, local_mesh, order=order,
+    vol_discr = DGDiscretizationWithBoundaries(cl_ctx, local_mesh, order=5,
             mpi_communicator=comm)
 
     sym_x = sym.nodes(local_mesh.dim)
@@ -87,6 +85,9 @@ def simple_mpi_communication_entrypoint(order):
             ) - (sym_all_faces_func - sym_bdry_faces_func)
             )
 
+    print(bound_face_swap)
+    # 1/0
+
     hopefully_zero = bound_face_swap(queue, myfunc=myfunc)
     import numpy.linalg as la
     error = la.norm(hopefully_zero.get())
@@ -227,8 +228,7 @@ def test_mpi(num_ranks):
 
 @pytest.mark.mpi
 @pytest.mark.parametrize("num_ranks", [2])
-@pytest.mark.parametrize("order", [2])
-def test_simple_mpi(num_ranks, order):
+def test_simple_mpi(num_ranks):
     pytest.importorskip("mpi4py")
 
     from subprocess import check_call
@@ -236,7 +236,6 @@ def test_simple_mpi(num_ranks, order):
     newenv = os.environ.copy()
     newenv["RUN_WITHIN_MPI"] = "1"
     newenv["TEST_SIMPLE_MPI_COMMUNICATION"] = "1"
-    newenv["order"] = str(order)
     check_call([
         "mpiexec", "-np", str(num_ranks), "-x", "RUN_WITHIN_MPI",
         sys.executable, __file__],
@@ -250,8 +249,7 @@ if __name__ == "__main__":
         if "TEST_MPI_COMMUNICATION" in os.environ:
             mpi_communication_entrypoint()
         elif "TEST_SIMPLE_MPI_COMMUNICATION" in os.environ:
-            order = int(os.environ["order"])
-            simple_mpi_communication_entrypoint(order)
+            simple_mpi_communication_entrypoint()
     else:
         import sys
         if len(sys.argv) > 1:
-- 
GitLab


From 3b8ea9f43d7b4ff3e159d47333c11f8e33ffdf78 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Mon, 26 Feb 2018 10:01:48 -0600
Subject: [PATCH 43/83] Add tag distribution

---
 grudge/execution.py                 | 91 ++++++++++++++++++++++-------
 grudge/symbolic/compiler.py         | 19 +++---
 grudge/symbolic/mappers/__init__.py | 50 +++++++++++++++-
 grudge/symbolic/operators.py        |  9 ++-
 test/test_mpi_communication.py      | 42 ++++++-------
 5 files changed, 158 insertions(+), 53 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index cdc7579f..a12c6dbe 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -325,29 +325,49 @@ class ExecutionMapper(mappers.Evaluator,
         local_data = self.rec(insn.field).get(self.queue)
         comm = self.discrwb.mpi_communicator
 
-        send_req = comm.Isend(local_data, insn.i_remote_rank, tag=insn.tag)
+        # print("Sending data to rank %d with tag %d"
+        #             % (insn.i_remote_rank, insn.send_tag))
+        send_req = comm.Isend(local_data, insn.i_remote_rank, tag=insn.send_tag)
 
         remote_data_host = np.empty_like(local_data)
-        comm.Recv(remote_data_host, source=insn.i_remote_rank, tag=insn.tag)
-        send_req.wait()
-        remote_data = cl.array.to_device(self.queue, remote_data_host)
-
-        return [(insn.name, remote_data)], []
-
-        # class Future:
-        #     def is_ready(self):
-        #         return comm.improbe(source=insn.i_remote_rank, tag=insn.tag)
-        #
-        #     def __call__(self):
-        #         remote_data_host = np.empty_like(local_data)
-        #         comm.Recv(remote_data_host, source=insn.i_remote_rank, tag=insn.tag)
-        #         send_req.wait()
-        #
-        #         remote_data = cl.array.to_device(queue, remote_data_host)
-        #         return [(insn.name, remote_data)], []
-        #
-        # return [], [Future()]
+        recv_req = comm.Irecv(remote_data_host, insn.i_remote_rank, insn.recv_tag)
 
+        # Do all instructions complete before futures?
+        # FIXME: We CANNOT have any possibility of deadlock
+        # One option is to add an attribute that tells the scheduler that this should not be foreced
+
+        class RecvFuture:
+            def __init__(self, recv_req, insn_name, remote_data_host, queue):
+                self.receive_request = recv_req
+                self.insn_name = insn_name
+                self.remote_data_host = remote_data_host
+                self.queue = queue
+
+            def is_ready(self):
+                return self.receive_request.Test()
+
+            def __call__(self):
+                # assert self.is_ready(), "RecvFuture was not ready to be called!"
+                self.receive_request.Wait()
+                remote_data = cl.array.to_device(self.queue, self.remote_data_host)
+                return [(self.insn_name, remote_data)], []
+
+
+        class SendFuture:
+            def __init__(self, send_request):
+                self.send_request = send_request
+
+            def is_ready(self):
+                return self.send_request.Test()
+
+            def __call__(self):
+                # assert self.is_ready(), "SendFuture was not ready to be called!"
+                self.send_request.wait()
+                return [], []
+
+
+        return [], [RecvFuture(recv_req, insn.name, remote_data_host, self.queue),
+                    SendFuture(send_req)]
 
     def map_insn_loopy_kernel(self, insn):
         kwargs = {}
@@ -558,6 +578,37 @@ def process_sym_operator(discrwb, sym_operator, post_bind_mapper=None,
     connected_parts = get_connected_partitions(volume_mesh)
     sym_operator = mappers.DistributedMapper(connected_parts)(sym_operator)
 
+    # TODO
+    # This MPI communication my not be necessary. The goal is to define unique and
+    # consistent tags for each OppSwap. This could be achieved by defining some
+    # ordering of these opperators and assigning tags accordingly.
+    comm = discrwb.mpi_communicator
+    i_local_rank = comm.Get_rank()
+
+    # NOTE: MPITagCollector does not modify sym_operator
+    tag_mapper = mappers.MPITagCollector(i_local_rank)
+    sym_operator = tag_mapper(sym_operator)
+
+    if len(tag_mapper.send_tag_lookups) > 0:
+        # TODO: Tag should probably be global
+        MPI_TAG_SEND_TAGS = 1729
+        send_reqs = []
+        for i_remote_rank in connected_parts:
+            send_tags = tag_mapper.send_tag_lookups[i_remote_rank]
+            send_reqs.append(comm.isend(send_tags, source=i_remote_rank,
+                                                   tag=MPI_TAG_SEND_TAGS))
+
+        recv_tag_lookups = {}
+        for i_remote_rank in connected_parts:
+            recv_tags = comm.recv(source=i_remote_rank, tag=MPI_TAG_SEND_TAGS)
+            recv_tag_lookups[i_remote_rank] = recv_tags
+
+        for req in send_reqs:
+            req.wait()
+
+        sym_operator = mappers.MPITagDistributor(recv_tag_lookups,
+                                                 i_local_rank)(sym_operator)
+
     dumper("before-imass", sym_operator)
     sym_operator = mappers.InverseMassContractor()(sym_operator)
 
diff --git a/grudge/symbolic/compiler.py b/grudge/symbolic/compiler.py
index 450b3cd4..340ffb3a 100644
--- a/grudge/symbolic/compiler.py
+++ b/grudge/symbolic/compiler.py
@@ -222,7 +222,8 @@ class RankDataSwapAssign(Instruction):
         self.field = field
         self.i_remote_rank = op.i_remote_part
         self.dd_out = op.dd_out
-        self.tag = self.MPI_TAG_GRUDGE_DATA + op.mpi_tag_offset
+        self.send_tag = self.MPI_TAG_GRUDGE_DATA + op.send_tag_offset
+        self.recv_tag = self.MPI_TAG_GRUDGE_DATA + op.recv_tag_offset
         self.comment = "Swap data with rank %02d" % self.i_remote_rank
 
     @memoize_method
@@ -235,9 +236,11 @@ class RankDataSwapAssign(Instruction):
 
     def __str__(self):
         return ("{\n"
-                "    /* %s */\n"
-                "    %s <- %s\n"
-                "}\n" % (self.comment, self.name, self.field))
+              + "   /* %s */\n" % self.comment
+              + "   send_tag = %s\n" % self.send_tag
+              + "   recv_tag = %s\n" % self.recv_tag
+              + "   %s <- %s\n" % (self.name, self.field)
+              + "}")
 
     mapper_method = intern("map_insn_rank_data_swap")
 
@@ -520,7 +523,9 @@ class Code(object):
                 except self.NoInstructionAvailable:
                     if futures:
                         # no insn ready: we need a future to complete to continue
+                        # FIXME: May induce deadlock in RankDataSwapAssign
                         force_future = True
+                        # pass
                     else:
                         # no futures, no available instructions: we're done
                         break
@@ -1170,7 +1175,7 @@ class OperatorCompiler(mappers.IdentityMapper):
         if isinstance(expr.op, sym.RefDiffOperatorBase):
             return self.map_ref_diff_op_binding(expr, codegen_state)
         elif isinstance(expr.op, sym.OppositePartitionFaceSwap):
-            return self.map_rank_data_swap_binding(expr, codegen_state)
+            return self.map_rank_data_swap_binding(expr, codegen_state, name_hint)
         else:
             # make sure operator assignments stand alone and don't get muddled
             # up in vector math
@@ -1229,7 +1234,7 @@ class OperatorCompiler(mappers.IdentityMapper):
 
             return self.expr_to_var[expr]
 
-    def map_rank_data_swap_binding(self, expr, codegen_state):
+    def map_rank_data_swap_binding(self, expr, codegen_state, name_hint):
         try:
             return self.expr_to_var[expr]
         except KeyError:
@@ -1242,7 +1247,7 @@ class OperatorCompiler(mappers.IdentityMapper):
             # self.expr_to_var[field] = field_var
             self.expr_to_var[expr] = self.assign_to_new_var(codegen_state,
                                                             expr.op(field_var),
-                                                            prefix="other")
+                                                            prefix=name_hint)
             return self.expr_to_var[expr]
 
     # }}}
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index 9db1ab31..27713a48 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -336,6 +336,50 @@ class OperatorBinder(CSECachingMapperMixin, IdentityMapper):
 
 # {{{ mappers for distributed computation
 
+class MPITagCollector(CSECachingMapperMixin, IdentityMapper):
+    map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
+
+    def __init__(self, i_local_rank):
+        self.i_local_rank = i_local_rank
+        self.send_tag_lookups = {}
+
+    def map_operator_binding(self, expr):
+        if isinstance(expr.op, op.OppositePartitionFaceSwap):
+            field = self.rec(expr.field)
+            i_remote_rank = expr.op.i_remote_part
+            # FIXME: Come up with a better key
+            # We MUST be sure that tags are UNIQUE for each pair of neighboring ranks
+            key = (field.field.index, self.i_local_rank, i_remote_rank)
+            tag = expr.op.send_tag_offset
+            if i_remote_rank not in self.send_tag_lookups:
+                self.send_tag_lookups[i_remote_rank] = {key: tag}
+            else:
+                assert key not in self.send_tag_lookups[i_remote_rank],\
+                            "Duplicate keys found in tag lookup"
+                self.send_tag_lookups[i_remote_rank][key] = tag
+            return expr
+        else:
+            return IdentityMapper.map_operator_binding(self, expr)
+
+
+class MPITagDistributor(CSECachingMapperMixin, IdentityMapper):
+    map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
+
+    def __init__(self, recv_tag_lookups, i_local_rank):
+        self.recv_tag_lookups = recv_tag_lookups
+        self.i_local_rank = i_local_rank
+
+    def map_operator_binding(self, expr):
+        if isinstance(expr.op, op.OppositePartitionFaceSwap):
+            field = self.rec(expr.field)
+            i_remote_rank = expr.op.i_remote_part
+            key = (field.field.index, i_remote_rank, self.i_local_rank)
+            expr.op.recv_tag_offset = self.recv_tag_lookups[i_remote_rank][key]
+            return expr
+        else:
+            return IdentityMapper.map_operator_binding(self, expr)
+
+
 class DistributedMapper(CSECachingMapperMixin, IdentityMapper):
     map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
 
@@ -379,9 +423,9 @@ class RankGeometryChanger(CSECachingMapperMixin, IdentityMapper):
         if (isinstance(expr.op, op.OppositeInteriorFaceSwap)
                     and expr.op.dd_in == self.prev_dd
                     and expr.op.dd_out == self.prev_dd):
+            field = self.rec(expr.field)
             return op.OppositePartitionFaceSwap(dd_in=self.new_dd,
-                                                dd_out=self.new_dd)(
-                                                        self.rec(expr.field))
+                                                dd_out=self.new_dd)(field)
         elif (isinstance(expr.op, op.InterpolationOperator)
                     and expr.op.dd_out == self.prev_dd):
             return op.InterpolationOperator(dd_in=expr.op.dd_in,
@@ -750,7 +794,7 @@ class StringifyMapper(pymbolic.mapper.stringifier.StringifyMapper):
         return "RefFaceM" + self._format_op_dd(expr)
 
     def map_opposite_partition_face_swap(self, expr, enclosing_prec):
-        return "RankSwap" + self._format_op_dd(expr)
+        return "PartSwap" + self._format_op_dd(expr)
 
     def map_opposite_interior_face_swap(self, expr, enclosing_prec):
         return "OppSwap" + self._format_op_dd(expr)
diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index 7cdb3d2b..041cac39 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -408,8 +408,11 @@ class RefInverseMassOperator(RefMassOperatorBase):
 
 
 # {{{ boundary-related operators
-
 class OppositePartitionFaceSwap(Operator):
+    # FIXME: Static attribute, super hacky
+    from itertools import count
+    _num_instances = count(0)
+
     def __init__(self, dd_in=None, dd_out=None):
         sym = _sym()
 
@@ -427,8 +430,8 @@ class OppositePartitionFaceSwap(Operator):
             raise ValueError("dd_out and dd_in must be identical")
 
         self.i_remote_part = self.dd_in.domain_tag.part_nr
-        # FIXME: We should have a unique offset for each instance on a particular rank
-        self.mpi_tag_offset = 0
+        self.send_tag_offset = next(self._num_instances)
+        # self.recv_tag_offset = -0x3700d3e # Some magic bad value
 
     mapper_method = intern("map_opposite_partition_face_swap")
 
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 3bf012f3..96c460a3 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -78,6 +78,9 @@ def simple_mpi_communication_entrypoint():
         sym.interp(sym.BTAG_ALL, "all_faces")(
             sym.interp("vol", sym.BTAG_ALL)(sym.var("myfunc"))))
 
+    # FIXME: Since this is the second call to bind, something wierd happens with MPITagCollector
+    # and MPITagDistributor. I think it has distributed mesh but does not have any
+    # OppositePartitionFaceSwap operators
     bound_face_swap = bind(vol_discr,
         sym.interp("int_faces", "all_faces")(
             sym.OppositeInteriorFaceSwap("int_faces")(
@@ -85,7 +88,7 @@ def simple_mpi_communication_entrypoint():
             ) - (sym_all_faces_func - sym_bdry_faces_func)
             )
 
-    print(bound_face_swap)
+    # print(bound_face_swap)
     # 1/0
 
     hopefully_zero = bound_face_swap(queue, myfunc=myfunc)
@@ -102,24 +105,24 @@ def simple_mpi_communication_entrypoint():
 def mpi_communication_entrypoint():
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
-    from meshmode.distributed import MPIMeshDistributor
 
     from mpi4py import MPI
     comm = MPI.COMM_WORLD
-    rank = comm.Get_rank()
+    i_local_rank = comm.Get_rank()
     num_parts = comm.Get_size()
 
+    from meshmode.distributed import MPIMeshDistributor
     mesh_dist = MPIMeshDistributor(comm)
 
-    dims = 2
+    dim = 2
     dt = 0.04
     order = 4
 
     if mesh_dist.is_mananger_rank():
         from meshmode.mesh.generation import generate_regular_rect_mesh
-        mesh = generate_regular_rect_mesh(a=(-0.5,)*dims,
-                                          b=(0.5,)*dims,
-                                          n=(16,)*dims)
+        mesh = generate_regular_rect_mesh(a=(-0.5,)*dim,
+                                          b=(0.5,)*dim,
+                                          n=(16,)*dim)
 
         from pymetis import part_graph
         _, p = part_graph(num_parts,
@@ -132,7 +135,7 @@ def mpi_communication_entrypoint():
         local_mesh = mesh_dist.receive_mesh_part()
 
     vol_discr = DGDiscretizationWithBoundaries(cl_ctx, local_mesh, order=order,
-            mpi_communicator=comm)
+                                               mpi_communicator=comm)
 
     source_center = np.array([0.1, 0.22, 0.33])[:local_mesh.dim]
     source_width = 0.05
@@ -176,9 +179,9 @@ def mpi_communication_entrypoint():
 
     dt_stepper = set_up_rk4("w", dt, fields, rhs)
 
-    final_t = 10
+    final_t = 4
     nsteps = int(final_t/dt)
-    print("rank=%d dt=%g nsteps=%d" % (rank, dt, nsteps))
+    print("rank=%d dt=%g nsteps=%d" % (i_local_rank, dt, nsteps))
 
     from grudge.shortcuts import make_visualizer
     vis = make_visualizer(vol_discr, vis_order=order)
@@ -197,21 +200,20 @@ def mpi_communication_entrypoint():
             step += 1
 
             print(step, event.t, norm(queue, u=event.state_component[0]),
-                    time()-t_last_step)
+                  time()-t_last_step)
+
             if step % 10 == 0:
-                vis.write_vtk_file("rank%d-fld-%04d.vtu" % (rank, step),
-                        [
-                            ("u", event.state_component[0]),
-                            ("v", event.state_component[1:]),
-                            ])
+                vis.write_vtk_file("rank%d-fld-%04d.vtu" % (i_local_rank, step),
+                                   [("u", event.state_component[0]),
+                                    ("v", event.state_component[1:])])
             t_last_step = time()
-    logger.debug("Rank %d exiting", rank)
+    logger.debug("Rank %d exiting", i_local_rank)
 
 
 # {{{ MPI test pytest entrypoint
 
 @pytest.mark.mpi
-@pytest.mark.parametrize("num_ranks", [2])
+@pytest.mark.parametrize("num_ranks", [3])
 def test_mpi(num_ranks):
     pytest.importorskip("mpi4py")
 
@@ -227,8 +229,7 @@ def test_mpi(num_ranks):
 
 
 @pytest.mark.mpi
-@pytest.mark.parametrize("num_ranks", [2])
-def test_simple_mpi(num_ranks):
+def test_simple_mpi():
     pytest.importorskip("mpi4py")
 
     from subprocess import check_call
@@ -236,6 +237,7 @@ def test_simple_mpi(num_ranks):
     newenv = os.environ.copy()
     newenv["RUN_WITHIN_MPI"] = "1"
     newenv["TEST_SIMPLE_MPI_COMMUNICATION"] = "1"
+    num_ranks = 2
     check_call([
         "mpiexec", "-np", str(num_ranks), "-x", "RUN_WITHIN_MPI",
         sys.executable, __file__],
-- 
GitLab


From ae2e2e1d322202b729d6a4d59fbd297c7cdf90cb Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Mon, 26 Feb 2018 10:05:39 -0600
Subject: [PATCH 44/83] Fix whitespace

---
 grudge/execution.py            | 5 ++---
 test/test_mpi_communication.py | 3 ---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index a12c6dbe..a7aaf28c 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -334,7 +334,8 @@ class ExecutionMapper(mappers.Evaluator,
 
         # Do all instructions complete before futures?
         # FIXME: We CANNOT have any possibility of deadlock
-        # One option is to add an attribute that tells the scheduler that this should not be foreced
+        # One option is to add an attribute that tells the scheduler that this
+        # should not be foreced
 
         class RecvFuture:
             def __init__(self, recv_req, insn_name, remote_data_host, queue):
@@ -352,7 +353,6 @@ class ExecutionMapper(mappers.Evaluator,
                 remote_data = cl.array.to_device(self.queue, self.remote_data_host)
                 return [(self.insn_name, remote_data)], []
 
-
         class SendFuture:
             def __init__(self, send_request):
                 self.send_request = send_request
@@ -365,7 +365,6 @@ class ExecutionMapper(mappers.Evaluator,
                 self.send_request.wait()
                 return [], []
 
-
         return [], [RecvFuture(recv_req, insn.name, remote_data_host, self.queue),
                     SendFuture(send_req)]
 
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 96c460a3..db2b8fc8 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -78,9 +78,6 @@ def simple_mpi_communication_entrypoint():
         sym.interp(sym.BTAG_ALL, "all_faces")(
             sym.interp("vol", sym.BTAG_ALL)(sym.var("myfunc"))))
 
-    # FIXME: Since this is the second call to bind, something wierd happens with MPITagCollector
-    # and MPITagDistributor. I think it has distributed mesh but does not have any
-    # OppositePartitionFaceSwap operators
     bound_face_swap = bind(vol_discr,
         sym.interp("int_faces", "all_faces")(
             sym.OppositeInteriorFaceSwap("int_faces")(
-- 
GitLab


From aa4b9a56184e609c38598be14136f7b4a1017b60 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Tue, 27 Feb 2018 09:25:04 -0600
Subject: [PATCH 45/83] Fix tag distribution

---
 grudge/execution.py                 | 11 ++---
 grudge/symbolic/mappers/__init__.py | 64 +++++++++++++++++++++++------
 grudge/symbolic/operators.py        |  8 +---
 grudge/symbolic/primitives.py       |  3 ++
 4 files changed, 60 insertions(+), 26 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index a7aaf28c..daf3eb3e 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -577,25 +577,20 @@ def process_sym_operator(discrwb, sym_operator, post_bind_mapper=None,
     connected_parts = get_connected_partitions(volume_mesh)
     sym_operator = mappers.DistributedMapper(connected_parts)(sym_operator)
 
-    # TODO
-    # This MPI communication my not be necessary. The goal is to define unique and
-    # consistent tags for each OppSwap. This could be achieved by defining some
-    # ordering of these opperators and assigning tags accordingly.
+    # Communicate send and recv tags between ranks
     comm = discrwb.mpi_communicator
     i_local_rank = comm.Get_rank()
 
-    # NOTE: MPITagCollector does not modify sym_operator
     tag_mapper = mappers.MPITagCollector(i_local_rank)
     sym_operator = tag_mapper(sym_operator)
 
     if len(tag_mapper.send_tag_lookups) > 0:
-        # TODO: Tag should probably be global
+        # TODO: Tag should be global
         MPI_TAG_SEND_TAGS = 1729
         send_reqs = []
         for i_remote_rank in connected_parts:
             send_tags = tag_mapper.send_tag_lookups[i_remote_rank]
-            send_reqs.append(comm.isend(send_tags, source=i_remote_rank,
-                                                   tag=MPI_TAG_SEND_TAGS))
+            send_reqs.append(comm.isend(send_tags, i_remote_rank, MPI_TAG_SEND_TAGS))
 
         recv_tag_lookups = {}
         for i_remote_rank in connected_parts:
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index 27713a48..6b251252 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -336,6 +336,45 @@ class OperatorBinder(CSECachingMapperMixin, IdentityMapper):
 
 # {{{ mappers for distributed computation
 
+def make_key_from_expr(expr, i_send_rank, i_recv_rank, clean_btag):
+    from copy import deepcopy
+    expr = deepcopy(expr)
+
+    class BTAGCleaner(IdentityMapper):
+        def __init__(self):
+            from meshmode.mesh import BTAG_PARTITION
+            self.prev_dd = sym.as_dofdesc(BTAG_PARTITION(i_recv_rank))
+            self.new_dd = sym.as_dofdesc(BTAG_PARTITION(i_send_rank))
+
+        def map_operator_binding(self, expr):
+            if (isinstance(expr.op, op.OppositeInteriorFaceSwap)
+                        and expr.op.dd_in == self.prev_dd
+                        and expr.op.dd_out == self.prev_dd):
+                field = self.rec(expr.field)
+                return op.OppositePartitionFaceSwap(dd_in=self.new_dd,
+                                                    dd_out=self.new_dd)(field)
+            elif (isinstance(expr.op, op.InterpolationOperator)
+                        and expr.op.dd_out == self.prev_dd):
+                return op.InterpolationOperator(dd_in=expr.op.dd_in,
+                                                dd_out=self.new_dd)(expr.field)
+            elif (isinstance(expr.op, op.RefDiffOperator)
+                        and expr.op.dd_out == self.prev_dd
+                        and expr.op.dd_in == self.prev_dd):
+                return op.RefDiffOperator(expr.op.rst_axis,
+                                          dd_in=self.new_dd,
+                                          dd_out=self.new_dd)(self.rec(expr.field))
+
+        def map_node_coordinate_component(self, expr):
+            if expr.dd == self.prev_dd:
+                return type(expr)(expr.axis, self.new_dd)
+    if clean_btag:
+        # FIXME: Maybe there is a better way to do this
+        # We need to change BTAG_PARTITION so that when expr is sent over to the
+        # other rank, it matches one of its own expressions
+        expr = BTAGCleaner()(expr)
+    return (expr, i_send_rank, i_recv_rank)
+
+
 class MPITagCollector(CSECachingMapperMixin, IdentityMapper):
     map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
 
@@ -345,18 +384,17 @@ class MPITagCollector(CSECachingMapperMixin, IdentityMapper):
 
     def map_operator_binding(self, expr):
         if isinstance(expr.op, op.OppositePartitionFaceSwap):
-            field = self.rec(expr.field)
             i_remote_rank = expr.op.i_remote_part
-            # FIXME: Come up with a better key
-            # We MUST be sure that tags are UNIQUE for each pair of neighboring ranks
-            key = (field.field.index, self.i_local_rank, i_remote_rank)
-            tag = expr.op.send_tag_offset
+            key = make_key_from_expr(self.rec(expr.field),
+                                     i_send_rank=self.i_local_rank,
+                                     i_recv_rank=i_remote_rank,
+                                     clean_btag=True)
             if i_remote_rank not in self.send_tag_lookups:
-                self.send_tag_lookups[i_remote_rank] = {key: tag}
-            else:
-                assert key not in self.send_tag_lookups[i_remote_rank],\
-                            "Duplicate keys found in tag lookup"
-                self.send_tag_lookups[i_remote_rank][key] = tag
+                self.send_tag_lookups[i_remote_rank] = {}
+            assert key not in self.send_tag_lookups[i_remote_rank],\
+                        "Duplicate keys found in tag lookup"
+            tag = expr.op.send_tag_offset = len(self.send_tag_lookups[i_remote_rank])
+            self.send_tag_lookups[i_remote_rank][key] = tag
             return expr
         else:
             return IdentityMapper.map_operator_binding(self, expr)
@@ -371,9 +409,11 @@ class MPITagDistributor(CSECachingMapperMixin, IdentityMapper):
 
     def map_operator_binding(self, expr):
         if isinstance(expr.op, op.OppositePartitionFaceSwap):
-            field = self.rec(expr.field)
             i_remote_rank = expr.op.i_remote_part
-            key = (field.field.index, i_remote_rank, self.i_local_rank)
+            key = make_key_from_expr(self.rec(expr.field),
+                                     i_send_rank=i_remote_rank,
+                                     i_recv_rank=self.i_local_rank,
+                                     clean_btag=False)
             expr.op.recv_tag_offset = self.recv_tag_lookups[i_remote_rank][key]
             return expr
         else:
diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index 041cac39..41b057d3 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -97,6 +97,8 @@ class ElementwiseLinearOperator(Operator):
 
 
 class InterpolationOperator(Operator):
+    init_arg_names = ("dd_in", "dd_out")
+
     def __init__(self, dd_in, dd_out):
         official_dd_in = _sym().as_dofdesc(dd_in)
         official_dd_out = _sym().as_dofdesc(dd_out)
@@ -409,10 +411,6 @@ class RefInverseMassOperator(RefMassOperatorBase):
 
 # {{{ boundary-related operators
 class OppositePartitionFaceSwap(Operator):
-    # FIXME: Static attribute, super hacky
-    from itertools import count
-    _num_instances = count(0)
-
     def __init__(self, dd_in=None, dd_out=None):
         sym = _sym()
 
@@ -430,8 +428,6 @@ class OppositePartitionFaceSwap(Operator):
             raise ValueError("dd_out and dd_in must be identical")
 
         self.i_remote_part = self.dd_in.domain_tag.part_nr
-        self.send_tag_offset = next(self._num_instances)
-        # self.recv_tag_offset = -0x3700d3e # Some magic bad value
 
     mapper_method = intern("map_opposite_partition_face_swap")
 
diff --git a/grudge/symbolic/primitives.py b/grudge/symbolic/primitives.py
index 974833c2..6f3661a0 100644
--- a/grudge/symbolic/primitives.py
+++ b/grudge/symbolic/primitives.py
@@ -307,6 +307,7 @@ class cse_scope(cse_scope_base):  # noqa
 class Variable(HasDOFDesc, ExpressionBase, pymbolic.primitives.Variable):
     """A user-supplied input variable with a known :class:`DOFDesc`.
     """
+    init_arg_names = ("name", "dd")
 
     def __init__(self, name, dd=None):
         if dd is None:
@@ -370,6 +371,8 @@ cos = CFunction("cos")
 # {{{ technical helpers
 
 class OperatorBinding(ExpressionBase):
+    init_arg_names = ("op", "field")
+
     def __init__(self, op, field):
         self.op = op
         self.field = field
-- 
GitLab


From fb2117b377d72b478842ec1de05b342bdb3c4d92 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Thu, 1 Mar 2018 20:38:03 -0600
Subject: [PATCH 46/83] Comment print statements

---
 grudge/execution.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index daf3eb3e..da1113fb 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -332,11 +332,6 @@ class ExecutionMapper(mappers.Evaluator,
         remote_data_host = np.empty_like(local_data)
         recv_req = comm.Irecv(remote_data_host, insn.i_remote_rank, insn.recv_tag)
 
-        # Do all instructions complete before futures?
-        # FIXME: We CANNOT have any possibility of deadlock
-        # One option is to add an attribute that tells the scheduler that this
-        # should not be foreced
-
         class RecvFuture:
             def __init__(self, recv_req, insn_name, remote_data_host, queue):
                 self.receive_request = recv_req
@@ -587,11 +582,13 @@ def process_sym_operator(discrwb, sym_operator, post_bind_mapper=None,
     if len(tag_mapper.send_tag_lookups) > 0:
         # TODO: Tag should be global
         MPI_TAG_SEND_TAGS = 1729
+        # print("Rank %d distributing tags" % i_local_rank)
         send_reqs = []
         for i_remote_rank in connected_parts:
             send_tags = tag_mapper.send_tag_lookups[i_remote_rank]
             send_reqs.append(comm.isend(send_tags, i_remote_rank, MPI_TAG_SEND_TAGS))
 
+        # print("Rank %d receiving tags" % i_local_rank)
         recv_tag_lookups = {}
         for i_remote_rank in connected_parts:
             recv_tags = comm.recv(source=i_remote_rank, tag=MPI_TAG_SEND_TAGS)
-- 
GitLab


From 3d76ebf304759e4580a744fb32337ad00ee2363a Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Sun, 4 Mar 2018 11:28:35 -0600
Subject: [PATCH 47/83] Add benchmark testing

---
 test/benchmark_mpi.py | 102 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 test/benchmark_mpi.py

diff --git a/test/benchmark_mpi.py b/test/benchmark_mpi.py
new file mode 100644
index 00000000..49ba0cac
--- /dev/null
+++ b/test/benchmark_mpi.py
@@ -0,0 +1,102 @@
+import os
+import numpy as np
+import pyopencl as cl
+
+from grudge import sym, bind, DGDiscretizationWithBoundaries
+from grudge.shortcuts import set_up_rk4
+
+
+def simple_wave_entrypoint(dim=2, order=4, n=16):
+    cl_ctx = cl.create_some_context()
+    queue = cl.CommandQueue(cl_ctx)
+
+    from mpi4py import MPI
+    comm = MPI.COMM_WORLD
+    i_local_rank = comm.Get_rank()
+    num_parts = comm.Get_size()
+
+    from meshmode.distributed import MPIMeshDistributor
+    mesh_dist = MPIMeshDistributor(comm)
+
+    if mesh_dist.is_mananger_rank():
+        from meshmode.mesh.generation import generate_regular_rect_mesh
+        mesh = generate_regular_rect_mesh(a=(-0.5,)*dim,
+                                          b=(0.5,)*dim,
+                                          n=(n,)*dim)
+
+        from pymetis import part_graph
+        _, p = part_graph(num_parts,
+                          xadj=mesh.nodal_adjacency.neighbors_starts.tolist(),
+                          adjncy=mesh.nodal_adjacency.neighbors.tolist())
+        part_per_element = np.array(p)
+
+        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)
+    else:
+        local_mesh = mesh_dist.receive_mesh_part()
+
+    vol_discr = DGDiscretizationWithBoundaries(cl_ctx, local_mesh, order=order,
+                                               mpi_communicator=comm)
+
+    source_center = np.array([0.1, 0.22, 0.33])[:local_mesh.dim]
+    source_width = 0.05
+    source_omega = 3
+
+    sym_x = sym.nodes(local_mesh.dim)
+    sym_source_center_dist = sym_x - source_center
+    sym_t = sym.ScalarVariable("t")
+
+    from grudge.models.wave import StrongWaveOperator
+    from meshmode.mesh import BTAG_ALL, BTAG_NONE
+    op = StrongWaveOperator(-0.1, vol_discr.dim,
+            source_f=(
+                sym.sin(source_omega*sym_t)
+                * sym.exp(
+                    -np.dot(sym_source_center_dist, sym_source_center_dist)
+                    / source_width**2)),
+            dirichlet_tag=BTAG_NONE,
+            neumann_tag=BTAG_NONE,
+            radiation_tag=BTAG_ALL,
+            flux_type="upwind")
+
+    from pytools.obj_array import join_fields
+    fields = join_fields(vol_discr.zeros(queue),
+            [vol_discr.zeros(queue) for i in range(vol_discr.dim)])
+
+    bound_op = bind(vol_discr, op.sym_operator())
+
+    def rhs(t, w):
+        return bound_op(queue, t=t, w=w)
+
+    dt = 0.04
+    dt_stepper = set_up_rk4("w", dt, fields, rhs)
+
+    final_t = 4
+    nsteps = int(final_t/dt)
+
+    for event in dt_stepper.run(t_end=final_t):
+        pass
+
+
+def benchmark_mpi():
+    import time
+    from subprocess import check_call
+    import sys
+    newenv = os.environ.copy()
+    newenv["RUN_WITHIN_MPI"] = "1"
+    newenv["PYOPENCL_CTX"] = "0"
+    for num_ranks in [1, 2]:
+        start_time = time.time()
+        check_call(["mpiexec", "-np", str(num_ranks),
+                    "-x", "RUN_WITHIN_MPI",
+                    "-x", "PYOPENCL_CTX",
+                    sys.executable, __file__],
+                    env=newenv)
+        print("Execution time with %d rank(s): %f"
+                    % (num_ranks, time.time() - start_time))
+
+
+if __name__ == "__main__":
+    if "RUN_WITHIN_MPI" in os.environ:
+        simple_wave_entrypoint()
+    else:
+        benchmark_mpi()
-- 
GitLab


From cda61fe6a5e7b35b6790564d36b5b9fa154d0048 Mon Sep 17 00:00:00 2001
From: Ellis <eshoag@illinois.edu>
Date: Wed, 7 Mar 2018 23:32:51 -0600
Subject: [PATCH 48/83] Remove static execution

---
 grudge/execution.py         |   2 -
 grudge/symbolic/compiler.py | 152 ++++++++----------------------------
 2 files changed, 31 insertions(+), 123 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index da1113fb..f91b5b59 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -343,7 +343,6 @@ class ExecutionMapper(mappers.Evaluator,
                 return self.receive_request.Test()
 
             def __call__(self):
-                # assert self.is_ready(), "RecvFuture was not ready to be called!"
                 self.receive_request.Wait()
                 remote_data = cl.array.to_device(self.queue, self.remote_data_host)
                 return [(self.insn_name, remote_data)], []
@@ -356,7 +355,6 @@ class ExecutionMapper(mappers.Evaluator,
                 return self.send_request.Test()
 
             def __call__(self):
-                # assert self.is_ready(), "SendFuture was not ready to be called!"
                 self.send_request.wait()
                 return [], []
 
diff --git a/grudge/symbolic/compiler.py b/grudge/symbolic/compiler.py
index 340ffb3a..4a6a58d0 100644
--- a/grudge/symbolic/compiler.py
+++ b/grudge/symbolic/compiler.py
@@ -381,7 +381,7 @@ class Code(object):
     def __init__(self, instructions, result):
         self.instructions = instructions
         self.result = result
-        self.last_schedule = None
+        # self.last_schedule = None
         self.static_schedule_attempts = 5
 
     def dump_dataflow_graph(self):
@@ -477,80 +477,53 @@ class Code(object):
 
         return argmax2(available_insns), discardable_vars
 
-    def execute_dynamic(self, exec_mapper, pre_assign_check=None):
-        """Execute the instruction stream, make all scheduling decisions
-        dynamically. Record the schedule in *self.last_schedule*.
-        """
-        schedule = []
-
+    def execute(self, exec_mapper, pre_assign_check=None):
         context = exec_mapper.context
 
-        next_future_id = 0
         futures = []
         done_insns = set()
 
-        force_future = False
-
-        while True:
-            insn = None
-            discardable_vars = []
-
-            # check futures for completion
-
-            i = 0
-            while i < len(futures):
-                future = futures[i]
-                if force_future or future.is_ready():
-                    futures.pop(i)
+        def try_evaluate_future():
+            for i in range(len(futures)):
+                if futures[i].is_ready():
+                    future = futures.pop(i)
+                    assignments, new_futures = future()
 
-                    insn = self.EvaluateFuture(future.id)
+                    for target, value in assignments:
+                        if pre_assign_check is not None:
+                            pre_assign_check(target, value)
+                        context[target] = value
 
-                    assignments, new_futures = future()
-                    force_future = False
-                    break
-                else:
-                    i += 1
+                    futures.extend(new_futures)
+                    return True
+            return False
 
-                del future
+        while True:
+            try:
+                insn, discardable_vars = self.get_next_step(
+                    frozenset(list(context.keys())),
+                    frozenset(done_insns))
 
-            # if no future got processed, pick the next insn
-            if insn is None:
-                try:
-                    insn, discardable_vars = self.get_next_step(
-                            frozenset(list(context.keys())),
-                            frozenset(done_insns))
-
-                except self.NoInstructionAvailable:
-                    if futures:
-                        # no insn ready: we need a future to complete to continue
-                        # FIXME: May induce deadlock in RankDataSwapAssign
-                        force_future = True
-                        # pass
-                    else:
-                        # no futures, no available instructions: we're done
-                        break
-                else:
-                    for name in discardable_vars:
-                        del context[name]
+                done_insns.add(insn)
+                for name in discardable_vars:
+                    del context[name]
 
-                    done_insns.add(insn)
-                    mapper_method = getattr(exec_mapper, insn.mapper_method)
-                    assignments, new_futures = mapper_method(insn)
+                mapper_method = getattr(exec_mapper, insn.mapper_method)
+                assignments, new_futures = mapper_method(insn)
 
-            if insn is not None:
                 for target, value in assignments:
                     if pre_assign_check is not None:
                         pre_assign_check(target, value)
-
                     context[target] = value
 
                 futures.extend(new_futures)
-
-                schedule.append((discardable_vars, insn, len(new_futures)))
-
-                for future in new_futures:
-                    future.id = next_future_id
-                    next_future_id += 1
+            except self.NoInstructionAvailable:
+                if not futures:
+                    # No more instructions or futures. We are done.
+                    break
+                # Busy wait for a new future
+                while not try_evaluate_future():
+                    pass
 
         if len(done_insns) < len(self.instructions):
             print("Unreachable instructions:")
@@ -560,72 +533,9 @@ class Code(object):
             raise RuntimeError("not all instructions are reachable"
                     "--did you forget to pass a value for a placeholder?")
 
-        if self.static_schedule_attempts:
-            self.last_schedule = schedule
-
         from pytools.obj_array import with_object_array_or_scalar
         return with_object_array_or_scalar(exec_mapper, self.result)
 
-    # }}}
-
-    # {{{ static schedule execution
-
-    class EvaluateFuture(object):
-        """A fake 'instruction' that represents evaluation of a future."""
-        def __init__(self, future_id):
-            self.future_id = future_id
-
-    def execute(self, exec_mapper, pre_assign_check=None):
-        """If we have a saved, static schedule for this instruction stream,
-        execute it. Otherwise, punt to the dynamic scheduler below.
-        """
-
-        if self.last_schedule is None:
-            return self.execute_dynamic(exec_mapper, pre_assign_check)
-
-        context = exec_mapper.context
-        id_to_future = {}
-        next_future_id = 0
-
-        schedule_is_delay_free = True
-
-        for discardable_vars, insn, new_future_count in self.last_schedule:
-            for name in discardable_vars:
-                del context[name]
-
-            if isinstance(insn, self.EvaluateFuture):
-                future = id_to_future.pop(insn.future_id)
-                if not future.is_ready():
-                    schedule_is_delay_free = False
-                assignments, new_futures = future()
-                del future
-            else:
-                mapper_method = getattr(exec_mapper, insn.mapper_method)
-                assignments, new_futures = mapper_method(insn)
-
-            for target, value in assignments:
-                if pre_assign_check is not None:
-                    pre_assign_check(target, value)
-
-                context[target] = value
-
-            if len(new_futures) != new_future_count:
-                raise RuntimeError("static schedule got an unexpected number "
-                        "of futures")
-
-            for future in new_futures:
-                id_to_future[next_future_id] = future
-                next_future_id += 1
-
-        if not schedule_is_delay_free:
-            self.last_schedule = None
-            self.static_schedule_attempts -= 1
-
-        from pytools.obj_array import with_object_array_or_scalar
-        return with_object_array_or_scalar(exec_mapper, self.result)
-
-    # }}}
-
 # }}}
 
 
-- 
GitLab


From f4b2b3352921dad3c0f68e0debe8b398c77770bb Mon Sep 17 00:00:00 2001
From: Ellis Hoag <eshoag2@porter.cs.illinois.edu>
Date: Sun, 11 Mar 2018 17:06:14 -0500
Subject: [PATCH 49/83] Improve set env vars

---
 test/benchmark_mpi.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/test/benchmark_mpi.py b/test/benchmark_mpi.py
index 49ba0cac..3ecfd4fe 100644
--- a/test/benchmark_mpi.py
+++ b/test/benchmark_mpi.py
@@ -6,6 +6,7 @@ from grudge import sym, bind, DGDiscretizationWithBoundaries
 from grudge.shortcuts import set_up_rk4
 
 
+
 def simple_wave_entrypoint(dim=2, order=4, n=16):
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
@@ -81,16 +82,22 @@ def benchmark_mpi():
     import time
     from subprocess import check_call
     import sys
+    environment_vars = [
+        ("RUN_WITHIN_MPI", "1"),
+        ("PYOPENCL_CTX", "0"),
+        ("POCL_AFFINITY", "1")
+    ]
     newenv = os.environ.copy()
-    newenv["RUN_WITHIN_MPI"] = "1"
-    newenv["PYOPENCL_CTX"] = "0"
+    for var, val in environment_vars:
+        newenv[var] = val
     for num_ranks in [1, 2]:
+        sys_call = ["mpiexec", "-np", str(num_ranks),
+                    *sum([["-x", var] for var, _ in environment_vars], []),
+                    sys.executable, __file__]
+        print("Running command:")
+        print(*sys_call)
         start_time = time.time()
-        check_call(["mpiexec", "-np", str(num_ranks),
-                    "-x", "RUN_WITHIN_MPI",
-                    "-x", "PYOPENCL_CTX",
-                    sys.executable, __file__],
-                    env=newenv)
+        check_call(sys_call, env=newenv)
         print("Execution time with %d rank(s): %f"
                     % (num_ranks, time.time() - start_time))
 
-- 
GitLab


From 99d52ef3c60eb76cf7a30a094aa437107950d726 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <eshoag2@illinois.edu>
Date: Sun, 18 Mar 2018 19:53:16 -0500
Subject: [PATCH 50/83] Working

---
 test/benchmark_mpi.py | 35 +++++++----------------------------
 1 file changed, 7 insertions(+), 28 deletions(-)

diff --git a/test/benchmark_mpi.py b/test/benchmark_mpi.py
index 3ecfd4fe..95ad8a4e 100644
--- a/test/benchmark_mpi.py
+++ b/test/benchmark_mpi.py
@@ -7,7 +7,7 @@ from grudge.shortcuts import set_up_rk4
 
 
-def simple_wave_entrypoint(dim=2, order=4, n=16):
+def simple_wave_entrypoint(dim=2, order=4, n=256):
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
 
@@ -77,33 +77,12 @@ def simple_wave_entrypoint(dim=2, order=4, n=16):
     for event in dt_stepper.run(t_end=final_t):
         pass
 
-
-def benchmark_mpi():
-    import time
-    from subprocess import check_call
-    import sys
-    environment_vars = [
-        ("RUN_WITHIN_MPI", "1"),
-        ("PYOPENCL_CTX", "0"),
-        ("POCL_AFFINITY", "1")
-    ]
-    newenv = os.environ.copy()
-    for var, val in environment_vars:
-        newenv[var] = val
-    for num_ranks in [1, 2]:
-        sys_call = ["mpiexec", "-np", str(num_ranks),
-                    *sum([["-x", var] for var, _ in environment_vars], []),
-                    sys.executable, __file__]
-        print("Running command:")
-        print(*sys_call)
-        start_time = time.time()
-        check_call(sys_call, env=newenv)
-        print("Execution time with %d rank(s): %f"
-                    % (num_ranks, time.time() - start_time))
-
-
 if __name__ == "__main__":
     if "RUN_WITHIN_MPI" in os.environ:
-        simple_wave_entrypoint()
+        import sys
+        mesh_size = 64
+        if len(sys.argv) == 2:
+            mesh_size = int(sys.argv[1])
+        simple_wave_entrypoint(n=mesh_size)
     else:
-        benchmark_mpi()
+        assert 0, "Must run within mpi"
-- 
GitLab


From 11603c9eef5d2bb666c087b5b2686ce8cb76f668 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <eshoag2@illinois.edu>
Date: Sun, 18 Mar 2018 20:13:35 -0500
Subject: [PATCH 51/83] Fix non-distributed test cases

---
 grudge/execution.py         | 63 ++++++++++++++++++-------------------
 grudge/symbolic/compiler.py | 10 +++---
 test/benchmark_mpi.py       |  4 +--
 3 files changed, 36 insertions(+), 41 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index f91b5b59..4b270fef 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -35,9 +35,7 @@ from grudge import sym
 import logging
 logger = logging.getLogger(__name__)
 
-
-# TODO: Maybe we should move this somewhere else.
-# MPI_TAG_GRUDGE_DATA = 0x3700d3e
+MPI_TAG_SEND_TAGS = 1729
 
 
 # {{{ exec mapper
@@ -568,35 +566,36 @@ def process_sym_operator(discrwb, sym_operator, post_bind_mapper=None,
     volume_mesh = discrwb.discr_from_dd("vol").mesh
     from meshmode.distributed import get_connected_partitions
     connected_parts = get_connected_partitions(volume_mesh)
-    sym_operator = mappers.DistributedMapper(connected_parts)(sym_operator)
-
-    # Communicate send and recv tags between ranks
-    comm = discrwb.mpi_communicator
-    i_local_rank = comm.Get_rank()
-
-    tag_mapper = mappers.MPITagCollector(i_local_rank)
-    sym_operator = tag_mapper(sym_operator)
-
-    if len(tag_mapper.send_tag_lookups) > 0:
-        # TODO: Tag should be global
-        MPI_TAG_SEND_TAGS = 1729
-        # print("Rank %d distributing tags" % i_local_rank)
-        send_reqs = []
-        for i_remote_rank in connected_parts:
-            send_tags = tag_mapper.send_tag_lookups[i_remote_rank]
-            send_reqs.append(comm.isend(send_tags, i_remote_rank, MPI_TAG_SEND_TAGS))
-
-        # print("Rank %d receiving tags" % i_local_rank)
-        recv_tag_lookups = {}
-        for i_remote_rank in connected_parts:
-            recv_tags = comm.recv(source=i_remote_rank, tag=MPI_TAG_SEND_TAGS)
-            recv_tag_lookups[i_remote_rank] = recv_tags
-
-        for req in send_reqs:
-            req.wait()
-
-        sym_operator = mappers.MPITagDistributor(recv_tag_lookups,
-                                                 i_local_rank)(sym_operator)
+    if connected_parts:
+        sym_operator = mappers.DistributedMapper(connected_parts)(sym_operator)
+
+        # Communicate send and recv tags between ranks
+        comm = discrwb.mpi_communicator
+        i_local_rank = comm.Get_rank()
+
+        tag_mapper = mappers.MPITagCollector(i_local_rank)
+        sym_operator = tag_mapper(sym_operator)
+
+        if len(tag_mapper.send_tag_lookups) > 0:
+            # print("Rank %d distributing tags" % i_local_rank)
+            send_reqs = []
+            for i_remote_rank in connected_parts:
+                send_tags = tag_mapper.send_tag_lookups[i_remote_rank]
+                send_reqs.append(comm.isend(send_tags,
+                                            i_remote_rank,
+                                            MPI_TAG_SEND_TAGS))
+
+            # print("Rank %d receiving tags" % i_local_rank)
+            recv_tag_lookups = {}
+            for i_remote_rank in connected_parts:
+                recv_tags = comm.recv(source=i_remote_rank, tag=MPI_TAG_SEND_TAGS)
+                recv_tag_lookups[i_remote_rank] = recv_tags
+
+            for req in send_reqs:
+                req.wait()
+
+            sym_operator = mappers.MPITagDistributor(recv_tag_lookups,
+                                                     i_local_rank)(sym_operator)
 
     dumper("before-imass", sym_operator)
     sym_operator = mappers.InverseMassContractor()(sym_operator)
diff --git a/grudge/symbolic/compiler.py b/grudge/symbolic/compiler.py
index 4a6a58d0..e74b69f1 100644
--- a/grudge/symbolic/compiler.py
+++ b/grudge/symbolic/compiler.py
@@ -214,16 +214,16 @@ class RankDataSwapAssign(Instruction):
     .. attribute:: dd_out
     .. attribute:: comment
     """
-    # TODO: Is this number ok? We probably want it to be global.
-    MPI_TAG_GRUDGE_DATA = 0x3700d3e
+    # TODO: We need to be sure this does not conflict with some other tag.
+    MPI_TAG_GRUDGE_DATA_BASE = 0x3700d3e
 
     def __init__(self, name, field, op):
         self.name = name
         self.field = field
         self.i_remote_rank = op.i_remote_part
         self.dd_out = op.dd_out
-        self.send_tag = self.MPI_TAG_GRUDGE_DATA + op.send_tag_offset
-        self.recv_tag = self.MPI_TAG_GRUDGE_DATA + op.recv_tag_offset
+        self.send_tag = self.MPI_TAG_GRUDGE_DATA_BASE + op.send_tag_offset
+        self.recv_tag = self.MPI_TAG_GRUDGE_DATA_BASE + op.recv_tag_offset
         self.comment = "Swap data with rank %02d" % self.i_remote_rank
 
     @memoize_method
@@ -1153,8 +1153,6 @@ class OperatorCompiler(mappers.IdentityMapper):
             field_insn = RankDataSwapAssign(name=name, field=field, op=expr.op)
             codegen_state.get_code_list(self).append(field_insn)
             field_var = Variable(field_insn.name)
-            # TODO: Do I need this?
-            # self.expr_to_var[field] = field_var
             self.expr_to_var[expr] = self.assign_to_new_var(codegen_state,
                                                             expr.op(field_var),
                                                             prefix=name_hint)
diff --git a/test/benchmark_mpi.py b/test/benchmark_mpi.py
index 95ad8a4e..3e99246d 100644
--- a/test/benchmark_mpi.py
+++ b/test/benchmark_mpi.py
@@ -6,14 +6,12 @@ from grudge import sym, bind, DGDiscretizationWithBoundaries
 from grudge.shortcuts import set_up_rk4
 
 
-
 def simple_wave_entrypoint(dim=2, order=4, n=256):
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
 
     from mpi4py import MPI
     comm = MPI.COMM_WORLD
-    i_local_rank = comm.Get_rank()
     num_parts = comm.Get_size()
 
     from meshmode.distributed import MPIMeshDistributor
@@ -72,11 +70,11 @@ def simple_wave_entrypoint(dim=2, order=4, n=256):
     dt_stepper = set_up_rk4("w", dt, fields, rhs)
 
     final_t = 4
-    nsteps = int(final_t/dt)
 
     for event in dt_stepper.run(t_end=final_t):
         pass
 
+
 if __name__ == "__main__":
     if "RUN_WITHIN_MPI" in os.environ:
         import sys
-- 
GitLab


From 950af9d20909282a743e00b6caf958fc84519dc8 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <eshoag2@illinois.edu>
Date: Mon, 19 Mar 2018 23:53:31 -0500
Subject: [PATCH 52/83] Fix tests

---
 test/test_mpi_communication.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index db2b8fc8..8cc06686 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -199,10 +199,10 @@ def mpi_communication_entrypoint():
             print(step, event.t, norm(queue, u=event.state_component[0]),
                   time()-t_last_step)
 
-            if step % 10 == 0:
-                vis.write_vtk_file("rank%d-fld-%04d.vtu" % (i_local_rank, step),
-                                   [("u", event.state_component[0]),
-                                    ("v", event.state_component[1:])])
+            # if step % 10 == 0:
+            #     vis.write_vtk_file("rank%d-fld-%04d.vtu" % (i_local_rank, step),
+            #                        [("u", event.state_component[0]),
+            #                         ("v", event.state_component[1:])])
             t_last_step = time()
     logger.debug("Rank %d exiting", i_local_rank)
 
-- 
GitLab


From 0c7ea587f12f8b02091cef020d5ef2bcbea76c96 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <eshoag2@illinois.edu>
Date: Mon, 19 Mar 2018 23:57:04 -0500
Subject: [PATCH 53/83] Fix flake8

---
 test/test_mpi_communication.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 8cc06686..a439aacf 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -180,8 +180,8 @@ def mpi_communication_entrypoint():
     nsteps = int(final_t/dt)
     print("rank=%d dt=%g nsteps=%d" % (i_local_rank, dt, nsteps))
 
-    from grudge.shortcuts import make_visualizer
-    vis = make_visualizer(vol_discr, vis_order=order)
+    # from grudge.shortcuts import make_visualizer
+    # vis = make_visualizer(vol_discr, vis_order=order)
 
     step = 0
 
-- 
GitLab


From 12a75c484f2136351afad969ca5eecbdff3db827 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <eshoag2@illinois.edu>
Date: Tue, 20 Mar 2018 00:25:47 -0500
Subject: [PATCH 54/83] Skip bad test

---
 test/test_mpi_communication.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index a439aacf..6635b917 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -36,6 +36,7 @@ from grudge import sym, bind, DGDiscretizationWithBoundaries
 from grudge.shortcuts import set_up_rk4
 
 
+@pytest.mark.skip()
 def simple_mpi_communication_entrypoint():
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
@@ -99,6 +100,7 @@ def simple_mpi_communication_entrypoint():
     assert error < 1e-14
 
 
+@pytest.mark.skip()
 def mpi_communication_entrypoint():
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
@@ -209,8 +211,9 @@ def mpi_communication_entrypoint():
 
 # {{{ MPI test pytest entrypoint
 
-@pytest.mark.mpi
-@pytest.mark.parametrize("num_ranks", [3])
+# @pytest.mark.mpi
+# @pytest.mark.parametrize("num_ranks", [3])
+@pytest.mark.skip()
 def test_mpi(num_ranks):
     pytest.importorskip("mpi4py")
 
-- 
GitLab


From 55a346f3f2f8b69e8db8446deba3e983a46e6f7b Mon Sep 17 00:00:00 2001
From: Ellis Hoag <eshoag2@illinois.edu>
Date: Mon, 2 Apr 2018 17:50:12 -0500
Subject: [PATCH 55/83] Move benchmark code

---
 test/benchmark_mpi.py | 86 -------------------------------------------
 1 file changed, 86 deletions(-)
 delete mode 100644 test/benchmark_mpi.py

diff --git a/test/benchmark_mpi.py b/test/benchmark_mpi.py
deleted file mode 100644
index 3e99246d..00000000
--- a/test/benchmark_mpi.py
+++ /dev/null
@@ -1,86 +0,0 @@
-import os
-import numpy as np
-import pyopencl as cl
-
-from grudge import sym, bind, DGDiscretizationWithBoundaries
-from grudge.shortcuts import set_up_rk4
-
-
-def simple_wave_entrypoint(dim=2, order=4, n=256):
-    cl_ctx = cl.create_some_context()
-    queue = cl.CommandQueue(cl_ctx)
-
-    from mpi4py import MPI
-    comm = MPI.COMM_WORLD
-    num_parts = comm.Get_size()
-
-    from meshmode.distributed import MPIMeshDistributor
-    mesh_dist = MPIMeshDistributor(comm)
-
-    if mesh_dist.is_mananger_rank():
-        from meshmode.mesh.generation import generate_regular_rect_mesh
-        mesh = generate_regular_rect_mesh(a=(-0.5,)*dim,
-                                          b=(0.5,)*dim,
-                                          n=(n,)*dim)
-
-        from pymetis import part_graph
-        _, p = part_graph(num_parts,
-                          xadj=mesh.nodal_adjacency.neighbors_starts.tolist(),
-                          adjncy=mesh.nodal_adjacency.neighbors.tolist())
-        part_per_element = np.array(p)
-
-        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)
-    else:
-        local_mesh = mesh_dist.receive_mesh_part()
-
-    vol_discr = DGDiscretizationWithBoundaries(cl_ctx, local_mesh, order=order,
-                                               mpi_communicator=comm)
-
-    source_center = np.array([0.1, 0.22, 0.33])[:local_mesh.dim]
-    source_width = 0.05
-    source_omega = 3
-
-    sym_x = sym.nodes(local_mesh.dim)
-    sym_source_center_dist = sym_x - source_center
-    sym_t = sym.ScalarVariable("t")
-
-    from grudge.models.wave import StrongWaveOperator
-    from meshmode.mesh import BTAG_ALL, BTAG_NONE
-    op = StrongWaveOperator(-0.1, vol_discr.dim,
-            source_f=(
-                sym.sin(source_omega*sym_t)
-                * sym.exp(
-                    -np.dot(sym_source_center_dist, sym_source_center_dist)
-                    / source_width**2)),
-            dirichlet_tag=BTAG_NONE,
-            neumann_tag=BTAG_NONE,
-            radiation_tag=BTAG_ALL,
-            flux_type="upwind")
-
-    from pytools.obj_array import join_fields
-    fields = join_fields(vol_discr.zeros(queue),
-            [vol_discr.zeros(queue) for i in range(vol_discr.dim)])
-
-    bound_op = bind(vol_discr, op.sym_operator())
-
-    def rhs(t, w):
-        return bound_op(queue, t=t, w=w)
-
-    dt = 0.04
-    dt_stepper = set_up_rk4("w", dt, fields, rhs)
-
-    final_t = 4
-
-    for event in dt_stepper.run(t_end=final_t):
-        pass
-
-
-if __name__ == "__main__":
-    if "RUN_WITHIN_MPI" in os.environ:
-        import sys
-        mesh_size = 64
-        if len(sys.argv) == 2:
-            mesh_size = int(sys.argv[1])
-        simple_wave_entrypoint(n=mesh_size)
-    else:
-        assert 0, "Must run within mpi"
-- 
GitLab


From 7a79576dbea16ff03717e8393fc03c0645af3613 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <eshoag2@illinois.edu>
Date: Tue, 3 Apr 2018 21:47:00 -0500
Subject: [PATCH 56/83] Try to fix gitlab

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index ee4c5287..55dc3a81 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,3 +11,4 @@ git+https://github.com/inducer/modepy.git
 # FIXME: Revert to this when merged
 #git+https://github.com/inducer/meshmode.git
 git+https://gitlab.tiker.net/eshoag2/meshmode.git@partition
+pymetis
-- 
GitLab


From 7214f5c554fff83b42e5c46496bf5b9cbb74a35d Mon Sep 17 00:00:00 2001
From: Ellis Hoag <eshoag2@illinois.edu>
Date: Tue, 3 Apr 2018 21:55:15 -0500
Subject: [PATCH 57/83] Fix gitlab.....for now

---
 requirements.txt               | 1 -
 test/test_mpi_communication.py | 5 ++++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 55dc3a81..ee4c5287 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,4 +11,3 @@ git+https://github.com/inducer/modepy.git
 # FIXME: Revert to this when merged
 #git+https://github.com/inducer/meshmode.git
 git+https://gitlab.tiker.net/eshoag2/meshmode.git@partition
-pymetis
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 6635b917..fb468c74 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -213,6 +213,7 @@ def mpi_communication_entrypoint():
 
 # @pytest.mark.mpi
 # @pytest.mark.parametrize("num_ranks", [3])
+# FIXME: gitlab runs forever on this.
 @pytest.mark.skip()
 def test_mpi(num_ranks):
     pytest.importorskip("mpi4py")
@@ -228,7 +229,9 @@ def test_mpi(num_ranks):
         env=newenv)
 
 
-@pytest.mark.mpi
+# @pytest.mark.mpi
+# FIXME: gitlab runs forever on this.
+@pytest.mark.skip()
 def test_simple_mpi():
     pytest.importorskip("mpi4py")
 
-- 
GitLab


From 14a0c0cb177d4a7c02850822832ab815745bfe1b Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag@gmail.com>
Date: Thu, 5 Apr 2018 15:36:11 -0500
Subject: [PATCH 58/83] Add profile tool to execute

---
 grudge/execution.py            |  4 +--
 grudge/symbolic/compiler.py    | 62 ++++++++++++++++++++++++----------
 test/test_mpi_communication.py | 30 +++++++++++-----
 3 files changed, 68 insertions(+), 28 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index 4b270fef..5be80e9f 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -483,7 +483,7 @@ class BoundOperator(object):
                 + sep
                 + str(self.eval_code))
 
-    def __call__(self, queue, **context):
+    def __call__(self, queue, profile_data=None, **context):
         import pyopencl.array as cl_array
 
         def replace_queue(a):
@@ -512,7 +512,7 @@ class BoundOperator(object):
             new_context[name] = with_object_array_or_scalar(replace_queue, var)
 
         return self.eval_code.execute(
-                ExecutionMapper(queue, new_context, self))
+                ExecutionMapper(queue, new_context, self), profile_data=profile_data)
 
 # }}}
 
diff --git a/grudge/symbolic/compiler.py b/grudge/symbolic/compiler.py
index e74b69f1..a312709c 100644
--- a/grudge/symbolic/compiler.py
+++ b/grudge/symbolic/compiler.py
@@ -477,33 +477,29 @@ class Code(object):
 
         return argmax2(available_insns), discardable_vars
 
-    def execute(self, exec_mapper, pre_assign_check=None):
+    def execute(self, exec_mapper, pre_assign_check=None, profile_data=None):
+        if profile_data is not None:
+            from time import time
+            start_time = time()
+            if profile_data == {}:
+                profile_data['insn_eval_time'] = 0
+                profile_data['future_eval_time'] = 0
+                profile_data['busy_wait_time'] = 0
+                profile_data['total_time'] = 0
         context = exec_mapper.context
 
         futures = []
         done_insns = set()
 
-        def try_evaluate_future():
-            for i in range(len(futures)):
-                if futures[i].is_ready():
-                    future = futures.pop(i)
-                    assignments, new_futures = future()
-
-                    for target, value in assignments:
-                        if pre_assign_check is not None:
-                            pre_assign_check(target, value)
-                        context[target] = value
-
-                    futures.extend(new_futures)
-                    return True
-            return False
-
         while True:
             try:
                 insn, discardable_vars = self.get_next_step(
                     frozenset(list(context.keys())),
                     frozenset(done_insns))
 
+                if profile_data is not None:
+                    insn_start_time = time()
+
                 done_insns.add(insn)
                 for name in discardable_vars:
                     del context[name]
@@ -517,13 +513,38 @@ class Code(object):
                     context[target] = value
 
                 futures.extend(new_futures)
+                if profile_data is not None:
+                    profile_data['insn_eval_time'] += time() - insn_start_time
             except self.NoInstructionAvailable:
                 if not futures:
                     # No more instructions or futures. We are done.
                     break
                 # Busy wait for a new future
-                while not try_evaluate_future():
-                    pass
+                if profile_data is not None:
+                    busy_wait_start_time = time()
+
+                did_eval_future = False
+                while not did_eval_future:
+                    for i in range(len(futures)):
+                        if futures[i].is_ready():
+                            if profile_data is not None:
+                                profile_data['busy_wait_time'] += time() - busy_wait_start_time
+                                future_start_time = time()
+
+                            future = futures.pop(i)
+                            assignments, new_futures = future()
+
+                            for target, value in assignments:
+                                if pre_assign_check is not None:
+                                    pre_assign_check(target, value)
+                                context[target] = value
+
+                            futures.extend(new_futures)
+                            did_eval_future = True
+
+                            if profile_data is not None:
+                                profile_data['future_eval_time'] += time() - future_start_time
+                            break
 
         if len(done_insns) < len(self.instructions):
             print("Unreachable instructions:")
@@ -533,7 +554,12 @@ class Code(object):
             raise RuntimeError("not all instructions are reachable"
                     "--did you forget to pass a value for a placeholder?")
 
+        if profile_data is not None:
+            profile_data['total_time'] += time() - start_time
+
         from pytools.obj_array import with_object_array_or_scalar
+        if profile_data is not None:
+            return with_object_array_or_scalar(exec_mapper, self.result), profile_data
         return with_object_array_or_scalar(exec_mapper, self.result)
 
 # }}}
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index fb468c74..c8aa48c3 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -192,14 +192,18 @@ def mpi_communication_entrypoint():
     from time import time
     t_last_step = time()
 
+    profile_data = {}
+
     for event in dt_stepper.run(t_end=final_t):
         if isinstance(event, dt_stepper.StateComputed):
             assert event.component_id == "w"
 
             step += 1
-
-            print(step, event.t, norm(queue, u=event.state_component[0]),
-                  time()-t_last_step)
+            n, profile_data = norm(queue, profile_data=profile_data, u=event.state_component[0])
+            if i_local_rank == 0:
+                print(step, event.t, n,
+                      time()-t_last_step)
+                print(profile_data)
 
             # if step % 10 == 0:
             #     vis.write_vtk_file("rank%d-fld-%04d.vtu" % (i_local_rank, step),
@@ -208,13 +212,23 @@ def mpi_communication_entrypoint():
             t_last_step = time()
     logger.debug("Rank %d exiting", i_local_rank)
 
+    print("""execute() for rank %d:
+            \tInstruction Evaluation: %f%%
+            \tFuture Evaluation: %f%%
+            \tBusy Wait: %f%%
+            \tTotal: %f seconds""" % (i_local_rank,
+                                      profile_data['insn_eval_time'] / profile_data['total_time'] * 100,
+                                      profile_data['future_eval_time'] / profile_data['total_time'] * 100,
+                                      profile_data['busy_wait_time'] / profile_data['total_time'] * 100,
+                                      profile_data['total_time']))
+
 
 # {{{ MPI test pytest entrypoint
 
-# @pytest.mark.mpi
-# @pytest.mark.parametrize("num_ranks", [3])
+@pytest.mark.mpi
+@pytest.mark.parametrize("num_ranks", [3])
 # FIXME: gitlab runs forever on this.
-@pytest.mark.skip()
+# @pytest.mark.skip()
 def test_mpi(num_ranks):
     pytest.importorskip("mpi4py")
 
@@ -229,9 +243,9 @@ def test_mpi(num_ranks):
         env=newenv)
 
 
-# @pytest.mark.mpi
+@pytest.mark.mpi
 # FIXME: gitlab runs forever on this.
-@pytest.mark.skip()
+# @pytest.mark.skip()
 def test_simple_mpi():
     pytest.importorskip("mpi4py")
 
-- 
GitLab


From dec6f1fda16d810d7f87763913a340cfcf7f0146 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag@gmail.com>
Date: Thu, 5 Apr 2018 21:57:06 -0500
Subject: [PATCH 59/83] Print profile data and test funtion on gitlab

---
 grudge/symbolic/compiler.py    |  9 ++++++---
 test/test_mpi_communication.py | 37 +++++++++++++++++++---------------
 2 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/grudge/symbolic/compiler.py b/grudge/symbolic/compiler.py
index a312709c..0b47d685 100644
--- a/grudge/symbolic/compiler.py
+++ b/grudge/symbolic/compiler.py
@@ -528,7 +528,8 @@ class Code(object):
                     for i in range(len(futures)):
                         if futures[i].is_ready():
                             if profile_data is not None:
-                                profile_data['busy_wait_time'] += time() - busy_wait_start_time
+                                profile_data['busy_wait_time'] +=\
+                                        time() - busy_wait_start_time
                                 future_start_time = time()
 
                             future = futures.pop(i)
@@ -543,7 +544,8 @@ class Code(object):
                             did_eval_future = True
 
                             if profile_data is not None:
-                                profile_data['future_eval_time'] += time() - future_start_time
+                                profile_data['future_eval_time'] +=\
+                                        time() - future_start_time
                             break
 
         if len(done_insns) < len(self.instructions):
@@ -559,7 +561,8 @@ class Code(object):
 
         from pytools.obj_array import with_object_array_or_scalar
         if profile_data is not None:
-            return with_object_array_or_scalar(exec_mapper, self.result), profile_data
+            return (with_object_array_or_scalar(exec_mapper, self.result),
+                    profile_data)
         return with_object_array_or_scalar(exec_mapper, self.result)
 
 # }}}
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index c8aa48c3..e6bdef13 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -174,14 +174,17 @@ def mpi_communication_entrypoint():
     # 1/0
 
     def rhs(t, w):
-        return bound_op(queue, t=t, w=w)
+        val, rhs.profile_data = bound_op(queue, profile_data=rhs.profile_data, t=t, w=w)
+        return val
+    rhs.profile_data = {}
 
     dt_stepper = set_up_rk4("w", dt, fields, rhs)
 
     final_t = 4
     nsteps = int(final_t/dt)
     print("rank=%d dt=%g nsteps=%d" % (i_local_rank, dt, nsteps))
-
+    # NOTE: Testing function in getlab....
+    return
     # from grudge.shortcuts import make_visualizer
     # vis = make_visualizer(vol_discr, vis_order=order)
 
@@ -192,35 +195,37 @@ def mpi_communication_entrypoint():
     from time import time
     t_last_step = time()
 
-    profile_data = {}
-
     for event in dt_stepper.run(t_end=final_t):
         if isinstance(event, dt_stepper.StateComputed):
             assert event.component_id == "w"
 
             step += 1
-            n, profile_data = norm(queue, profile_data=profile_data, u=event.state_component[0])
-            if i_local_rank == 0:
-                print(step, event.t, n,
-                      time()-t_last_step)
-                print(profile_data)
+            print(step, event.t, norm(queue, u=event.state_component[0]),
+                  time()-t_last_step)
+            # if mesh_dist.is_mananger_rank():
+            #     print(rhs.profile_data)
 
             # if step % 10 == 0:
             #     vis.write_vtk_file("rank%d-fld-%04d.vtu" % (i_local_rank, step),
             #                        [("u", event.state_component[0]),
             #                         ("v", event.state_component[1:])])
             t_last_step = time()
-    logger.debug("Rank %d exiting", i_local_rank)
 
-    print("""execute() for rank %d:
+    def print_profile_data(data):
+        print("""execute() for rank %d:
             \tInstruction Evaluation: %f%%
             \tFuture Evaluation: %f%%
             \tBusy Wait: %f%%
-            \tTotal: %f seconds""" % (i_local_rank,
-                                      profile_data['insn_eval_time'] / profile_data['total_time'] * 100,
-                                      profile_data['future_eval_time'] / profile_data['total_time'] * 100,
-                                      profile_data['busy_wait_time'] / profile_data['total_time'] * 100,
-                                      profile_data['total_time']))
+            \tTotal: %f seconds""" %
+            (i_local_rank,
+             data['insn_eval_time'] / data['total_time'] * 100,
+             data['future_eval_time'] / data['total_time'] * 100,
+             data['busy_wait_time'] / data['total_time'] * 100,
+             data['total_time']))
+
+    # if mesh_dist.is_mananger_rank():
+    print_profile_data(rhs.profile_data)
+    logger.debug("Rank %d exiting", i_local_rank)
 
 
 # {{{ MPI test pytest entrypoint
-- 
GitLab


From 282c92847b82b9b55abf209fcafe38151057ee53 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag@gmail.com>
Date: Thu, 5 Apr 2018 23:43:36 -0500
Subject: [PATCH 60/83] Remove commented lines

---
 test/test_mpi_communication.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index e6bdef13..10e69726 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -183,8 +183,7 @@ def mpi_communication_entrypoint():
     final_t = 4
     nsteps = int(final_t/dt)
     print("rank=%d dt=%g nsteps=%d" % (i_local_rank, dt, nsteps))
-    # NOTE: Testing function in getlab....
-    return
+
     # from grudge.shortcuts import make_visualizer
     # vis = make_visualizer(vol_discr, vis_order=order)
 
@@ -202,8 +201,6 @@ def mpi_communication_entrypoint():
             step += 1
             print(step, event.t, norm(queue, u=event.state_component[0]),
                   time()-t_last_step)
-            # if mesh_dist.is_mananger_rank():
-            #     print(rhs.profile_data)
 
             # if step % 10 == 0:
             #     vis.write_vtk_file("rank%d-fld-%04d.vtu" % (i_local_rank, step),
@@ -223,7 +220,6 @@ def mpi_communication_entrypoint():
              data['busy_wait_time'] / data['total_time'] * 100,
              data['total_time']))
 
-    # if mesh_dist.is_mananger_rank():
     print_profile_data(rhs.profile_data)
     logger.debug("Rank %d exiting", i_local_rank)
 
-- 
GitLab


From 7549b22c87dd58d2696a677610bb087114b33e14 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <eshoag2@illinois.edu>
Date: Fri, 6 Apr 2018 00:08:28 -0500
Subject: [PATCH 61/83] Working

---
 grudge/symbolic/compiler.py    | 10 ++++------
 test/test_mpi_communication.py | 13 +++++++------
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/grudge/symbolic/compiler.py b/grudge/symbolic/compiler.py
index 0b47d685..d6d0e3fb 100644
--- a/grudge/symbolic/compiler.py
+++ b/grudge/symbolic/compiler.py
@@ -493,13 +493,13 @@ class Code(object):
 
         while True:
             try:
+                if profile_data is not None:
+                    insn_start_time = time()
+                    
                 insn, discardable_vars = self.get_next_step(
                     frozenset(list(context.keys())),
                     frozenset(done_insns))
 
-                if profile_data is not None:
-                    insn_start_time = time()
-
                 done_insns.add(insn)
                 for name in discardable_vars:
                     del context[name]
@@ -556,11 +556,9 @@ class Code(object):
             raise RuntimeError("not all instructions are reachable"
                     "--did you forget to pass a value for a placeholder?")
 
-        if profile_data is not None:
-            profile_data['total_time'] += time() - start_time
-
         from pytools.obj_array import with_object_array_or_scalar
         if profile_data is not None:
+            profile_data['total_time'] += time() - start_time
             return (with_object_array_or_scalar(exec_mapper, self.result),
                     profile_data)
         return with_object_array_or_scalar(exec_mapper, self.result)
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 10e69726..46070302 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -174,7 +174,8 @@ def mpi_communication_entrypoint():
     # 1/0
 
     def rhs(t, w):
-        val, rhs.profile_data = bound_op(queue, profile_data=rhs.profile_data, t=t, w=w)
+        val, rhs.profile_data = bound_op(queue, profile_data=rhs.profile_data,
+                                         t=t, w=w)
         return val
     rhs.profile_data = {}
 
@@ -226,10 +227,10 @@ def mpi_communication_entrypoint():
 
 # {{{ MPI test pytest entrypoint
 
-@pytest.mark.mpi
-@pytest.mark.parametrize("num_ranks", [3])
+# @pytest.mark.mpi
+# @pytest.mark.parametrize("num_ranks", [3])
 # FIXME: gitlab runs forever on this.
-# @pytest.mark.skip()
+@pytest.mark.skip()
 def test_mpi(num_ranks):
     pytest.importorskip("mpi4py")
 
@@ -244,9 +245,9 @@ def test_mpi(num_ranks):
         env=newenv)
 
 
-@pytest.mark.mpi
+# @pytest.mark.mpi
 # FIXME: gitlab runs forever on this.
-# @pytest.mark.skip()
+@pytest.mark.skip()
 def test_simple_mpi():
     pytest.importorskip("mpi4py")
 
-- 
GitLab


From 5a447d779b880224fc01c59e5a898d644523bc0e Mon Sep 17 00:00:00 2001
From: Ellis Hoag <eshoag2@illinois.edu>
Date: Fri, 6 Apr 2018 10:21:27 -0500
Subject: [PATCH 62/83] Fix whitespace

---
 grudge/symbolic/compiler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/grudge/symbolic/compiler.py b/grudge/symbolic/compiler.py
index d6d0e3fb..a85c8926 100644
--- a/grudge/symbolic/compiler.py
+++ b/grudge/symbolic/compiler.py
@@ -495,7 +495,7 @@ class Code(object):
             try:
                 if profile_data is not None:
                     insn_start_time = time()
-                    
+
                 insn, discardable_vars = self.get_next_step(
                     frozenset(list(context.keys())),
                     frozenset(done_insns))
-- 
GitLab


From c91e4f8620ad836e0fa4daf2b56c8b0377cbfd22 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag@gmail.com>
Date: Mon, 16 Apr 2018 23:48:13 -0500
Subject: [PATCH 63/83] Working

---
 test/test_mpi_communication.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 46070302..7777d14d 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -168,6 +168,14 @@ def mpi_communication_entrypoint():
     #           Fails because: "found faces without boundary conditions"
     # op.check_bc_coverage(local_mesh)
 
+    from pytools.log import LogManager, \
+            add_general_quantities, \
+            add_run_info
+    log_filename = None
+    logmgr = LogManager(log_filename, "w", comm)
+    add_run_info(logmgr)
+    add_general_quantities(logmgr)
+
     # print(sym.pretty(op.sym_operator()))
     bound_op = bind(vol_discr, op.sym_operator())
     # print(bound_op)
@@ -196,6 +204,8 @@ def mpi_communication_entrypoint():
     t_last_step = time()
 
     for event in dt_stepper.run(t_end=final_t):
+        logmgr.tick_before()
+        logmgr.tick_after()
         if isinstance(event, dt_stepper.StateComputed):
             assert event.component_id == "w"
 
@@ -222,15 +232,16 @@ def mpi_communication_entrypoint():
              data['total_time']))
 
     print_profile_data(rhs.profile_data)
+    logmgr.close()
     logger.debug("Rank %d exiting", i_local_rank)
 
 
 # {{{ MPI test pytest entrypoint
 
-# @pytest.mark.mpi
-# @pytest.mark.parametrize("num_ranks", [3])
+@pytest.mark.mpi
+@pytest.mark.parametrize("num_ranks", [3])
 # FIXME: gitlab runs forever on this.
-@pytest.mark.skip()
+# @pytest.mark.skip()
 def test_mpi(num_ranks):
     pytest.importorskip("mpi4py")
 
@@ -245,9 +256,9 @@ def test_mpi(num_ranks):
         env=newenv)
 
 
-# @pytest.mark.mpi
+@pytest.mark.mpi
 # FIXME: gitlab runs forever on this.
-@pytest.mark.skip()
+# @pytest.mark.skip()
 def test_simple_mpi():
     pytest.importorskip("mpi4py")
 
-- 
GitLab


From 478f6c705ee2e32d4afac3eb11f3a5be7489dd90 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag@gmail.com>
Date: Mon, 23 Apr 2018 13:59:21 -0500
Subject: [PATCH 64/83] Working

---
 grudge/execution.py            |  5 +++--
 grudge/symbolic/compiler.py    |  7 ++++++-
 test/test_mpi_communication.py | 13 +++++++++++--
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index 5be80e9f..fae28410 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -483,7 +483,7 @@ class BoundOperator(object):
                 + sep
                 + str(self.eval_code))
 
-    def __call__(self, queue, profile_data=None, **context):
+    def __call__(self, queue, profile_data=None, log_quantities=None, **context):
         import pyopencl.array as cl_array
 
         def replace_queue(a):
@@ -512,7 +512,8 @@ class BoundOperator(object):
             new_context[name] = with_object_array_or_scalar(replace_queue, var)
 
         return self.eval_code.execute(
-                ExecutionMapper(queue, new_context, self), profile_data=profile_data)
+                ExecutionMapper(queue, new_context, self), profile_data=profile_data,
+                                log_quantities=log_quantities)
 
 # }}}
 
diff --git a/grudge/symbolic/compiler.py b/grudge/symbolic/compiler.py
index a85c8926..439731f6 100644
--- a/grudge/symbolic/compiler.py
+++ b/grudge/symbolic/compiler.py
@@ -477,7 +477,7 @@ class Code(object):
 
         return argmax2(available_insns), discardable_vars
 
-    def execute(self, exec_mapper, pre_assign_check=None, profile_data=None):
+    def execute(self, exec_mapper, pre_assign_check=None, profile_data=None, log_quantities=None):
         if profile_data is not None:
             from time import time
             start_time = time()
@@ -505,6 +505,11 @@ class Code(object):
                     del context[name]
 
                 mapper_method = getattr(exec_mapper, insn.mapper_method)
+                if log_quantities is not None:
+                    from pytools.log import time_and_count_function
+                    mapper_method = time_and_count_function(mapper_method,
+                                                    log_quantities["timer"],
+                                                    log_quantities["counter"])
                 assignments, new_futures = mapper_method(insn)
 
                 for target, value in assignments:
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 7777d14d..70883b5b 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -170,11 +170,19 @@ def mpi_communication_entrypoint():
 
     from pytools.log import LogManager, \
             add_general_quantities, \
-            add_run_info
-    log_filename = None
+            add_run_info, \
+            IntervalTimer, EventCounter
+    # log_filename = None
+    log_filename = 'grudge_log.dat'
     logmgr = LogManager(log_filename, "w", comm)
     add_run_info(logmgr)
     add_general_quantities(logmgr)
+    log_quantities = {"timer": IntervalTimer("insn_timer",
+                                "Time spent evaluating instructions"),
+                      "counter": EventCounter("insn_counter",
+                                "Number of instructions evaluated")}
+    for quantity in log_quantities.values():
+        logmgr.add_quantity(quantity)
 
     # print(sym.pretty(op.sym_operator()))
     bound_op = bind(vol_discr, op.sym_operator())
@@ -183,6 +191,7 @@ def mpi_communication_entrypoint():
 
     def rhs(t, w):
         val, rhs.profile_data = bound_op(queue, profile_data=rhs.profile_data,
+                                         log_quantities=log_quantities,
                                          t=t, w=w)
         return val
     rhs.profile_data = {}
-- 
GitLab


From 568f766d395672f5979dc599c4e0ff31a2bc1c02 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag@gmail.com>
Date: Tue, 24 Apr 2018 12:03:59 -0500
Subject: [PATCH 65/83] working

---
 grudge/symbolic/compiler.py    | 28 ++++++++++++++++++++++++----
 test/test_mpi_communication.py | 32 +++++++++++++++++++++-----------
 2 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/grudge/symbolic/compiler.py b/grudge/symbolic/compiler.py
index 439731f6..4a858f1a 100644
--- a/grudge/symbolic/compiler.py
+++ b/grudge/symbolic/compiler.py
@@ -486,6 +486,8 @@ class Code(object):
                 profile_data['future_eval_time'] = 0
                 profile_data['busy_wait_time'] = 0
                 profile_data['total_time'] = 0
+        if log_quantities is not None:
+            exec_sub_timer = log_quantities["exec_timer"].start_sub_timer()
         context = exec_mapper.context
 
         futures = []
@@ -495,6 +497,9 @@ class Code(object):
             try:
                 if profile_data is not None:
                     insn_start_time = time()
+                if log_quantities is not None:
+                    insn_sub_timer =\
+                                log_quantities["insn_eval_timer"].start_sub_timer()
 
                 insn, discardable_vars = self.get_next_step(
                     frozenset(list(context.keys())),
@@ -506,10 +511,11 @@ class Code(object):
 
                 mapper_method = getattr(exec_mapper, insn.mapper_method)
                 if log_quantities is not None:
-                    from pytools.log import time_and_count_function
-                    mapper_method = time_and_count_function(mapper_method,
-                                                    log_quantities["timer"],
-                                                    log_quantities["counter"])
+                    if isinstance(insn, RankDataSwapAssign):
+                        from pytools.log import time_and_count_function
+                        mapper_method = time_and_count_function(mapper_method,
+                                        log_quantities["rank_data_swap_timer"],
+                                        log_quantities["rank_data_swap_counter"])
                 assignments, new_futures = mapper_method(insn)
 
                 for target, value in assignments:
@@ -520,6 +526,8 @@ class Code(object):
                 futures.extend(new_futures)
                 if profile_data is not None:
                     profile_data['insn_eval_time'] += time() - insn_start_time
+                if log_quantities is not None:
+                    insn_sub_timer.stop().submit()
             except self.NoInstructionAvailable:
                 if not futures:
                     # No more instructions or futures. We are done.
@@ -527,6 +535,9 @@ class Code(object):
                 # Busy wait for a new future
                 if profile_data is not None:
                     busy_wait_start_time = time()
+                if log_quantities is not None:
+                    busy_sub_timer =\
+                            log_quantities["busy_wait_timer"].start_sub_timer()
 
                 did_eval_future = False
                 while not did_eval_future:
@@ -536,6 +547,11 @@ class Code(object):
                                 profile_data['busy_wait_time'] +=\
                                         time() - busy_wait_start_time
                                 future_start_time = time()
+                            if log_quantities is not None:
+                                busy_sub_timer.stop().submit()
+                                future_sub_timer =\
+                                            log_quantities["future_eval_timer"]\
+                                                                .start_sub_timer()
 
                             future = futures.pop(i)
                             assignments, new_futures = future()
@@ -551,6 +567,8 @@ class Code(object):
                             if profile_data is not None:
                                 profile_data['future_eval_time'] +=\
                                         time() - future_start_time
+                            if log_quantities is not None:
+                                future_sub_timer.stop().submit()
                             break
 
         if len(done_insns) < len(self.instructions):
@@ -566,6 +584,8 @@ class Code(object):
             profile_data['total_time'] += time() - start_time
             return (with_object_array_or_scalar(exec_mapper, self.result),
                     profile_data)
+        if log_quantities is not None:
+            exec_sub_timer.stop().submit()
         return with_object_array_or_scalar(exec_mapper, self.result)
 
 # }}}
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 70883b5b..13c6614a 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -172,15 +172,24 @@ def mpi_communication_entrypoint():
             add_general_quantities, \
             add_run_info, \
             IntervalTimer, EventCounter
-    # log_filename = None
-    log_filename = 'grudge_log.dat'
+    log_filename = None
+    # log_filename = 'grudge_log.dat'
     logmgr = LogManager(log_filename, "w", comm)
     add_run_info(logmgr)
     add_general_quantities(logmgr)
-    log_quantities = {"timer": IntervalTimer("insn_timer",
-                                "Time spent evaluating instructions"),
-                      "counter": EventCounter("insn_counter",
-                                "Number of instructions evaluated")}
+    log_quantities =\
+    {"rank_data_swap_timer": IntervalTimer("rank_data_swap_timer",
+                            "Time spent evaluating RankDataSwapAssign"),
+    "rank_data_swap_counter": EventCounter("rank_data_swap_counter",
+                            "Number of RankDataSwapAssign instructions evaluated"),
+    "exec_timer": IntervalTimer("exec_timer",
+                            "Total time spent executing instructions"),
+    "insn_eval_timer": IntervalTimer("insn_eval_timer",
+                            "Time spend evaluating instructions"),
+    "future_eval_timer": IntervalTimer("future_eval_timer",
+                            "Time spent evaluating futures"),
+    "busy_wait_timer": IntervalTimer("busy_wait_timer",
+                            "Time wasted doing busy wait")}
     for quantity in log_quantities.values():
         logmgr.add_quantity(quantity)
 
@@ -213,6 +222,7 @@ def mpi_communication_entrypoint():
     t_last_step = time()
 
     for event in dt_stepper.run(t_end=final_t):
+        # FIXME: I think these ticks need to be put somewhere else
         logmgr.tick_before()
         logmgr.tick_after()
         if isinstance(event, dt_stepper.StateComputed):
@@ -247,10 +257,10 @@ def mpi_communication_entrypoint():
 
 # {{{ MPI test pytest entrypoint
 
-@pytest.mark.mpi
-@pytest.mark.parametrize("num_ranks", [3])
+# @pytest.mark.mpi
+# @pytest.mark.parametrize("num_ranks", [3])
 # FIXME: gitlab runs forever on this.
-# @pytest.mark.skip()
+@pytest.mark.skip()
 def test_mpi(num_ranks):
     pytest.importorskip("mpi4py")
 
@@ -265,9 +275,9 @@ def test_mpi(num_ranks):
         env=newenv)
 
 
-@pytest.mark.mpi
+# @pytest.mark.mpi
 # FIXME: gitlab runs forever on this.
-# @pytest.mark.skip()
+@pytest.mark.skip()
 def test_simple_mpi():
     pytest.importorskip("mpi4py")
 
-- 
GitLab


From 3d692880e29ecfb8e9d037ca78ada0dae134a08c Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag@gmail.com>
Date: Wed, 25 Apr 2018 14:54:29 -0500
Subject: [PATCH 66/83] Fix whitespace

---
 grudge/execution.py         | 5 +++--
 grudge/symbolic/compiler.py | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index fae28410..26382620 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -512,8 +512,9 @@ class BoundOperator(object):
             new_context[name] = with_object_array_or_scalar(replace_queue, var)
 
         return self.eval_code.execute(
-                ExecutionMapper(queue, new_context, self), profile_data=profile_data,
-                                log_quantities=log_quantities)
+                ExecutionMapper(queue, new_context, self),
+                profile_data=profile_data,
+                log_quantities=log_quantities)
 
 # }}}
 
diff --git a/grudge/symbolic/compiler.py b/grudge/symbolic/compiler.py
index 4a858f1a..391f7e98 100644
--- a/grudge/symbolic/compiler.py
+++ b/grudge/symbolic/compiler.py
@@ -477,7 +477,8 @@ class Code(object):
 
         return argmax2(available_insns), discardable_vars
 
-    def execute(self, exec_mapper, pre_assign_check=None, profile_data=None, log_quantities=None):
+    def execute(self, exec_mapper, pre_assign_check=None, profile_data=None,
+                log_quantities=None):
         if profile_data is not None:
             from time import time
             start_time = time()
-- 
GitLab


From a97dc1975f9ade9e0dd62cd139649ad8f0afa7a5 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag@gmail.com>
Date: Sat, 28 Apr 2018 09:00:21 -0500
Subject: [PATCH 67/83] working

---
 test/test_mpi_communication.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 13c6614a..d2a60620 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -221,10 +221,9 @@ def mpi_communication_entrypoint():
     from time import time
     t_last_step = time()
 
+    logmgr.tick_before()
     for event in dt_stepper.run(t_end=final_t):
         # FIXME: I think these ticks need to be put somewhere else
-        logmgr.tick_before()
-        logmgr.tick_after()
         if isinstance(event, dt_stepper.StateComputed):
             assert event.component_id == "w"
 
@@ -237,6 +236,10 @@ def mpi_communication_entrypoint():
             #                        [("u", event.state_component[0]),
             #                         ("v", event.state_component[1:])])
             t_last_step = time()
+        logmgr.tick_after()
+        logmgr.tick_before()
+    logmgr.tick_after()
+
 
     def print_profile_data(data):
         print("""execute() for rank %d:
@@ -257,10 +260,10 @@ def mpi_communication_entrypoint():
 
 # {{{ MPI test pytest entrypoint
 
-# @pytest.mark.mpi
-# @pytest.mark.parametrize("num_ranks", [3])
+@pytest.mark.mpi
+@pytest.mark.parametrize("num_ranks", [3])
 # FIXME: gitlab runs forever on this.
-@pytest.mark.skip()
+# @pytest.mark.skip()
 def test_mpi(num_ranks):
     pytest.importorskip("mpi4py")
 
-- 
GitLab


From 5403ffcf6791aef749d42f017d1230d54615ea56 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <eshoag2@illinois.edu>
Date: Mon, 30 Apr 2018 14:54:04 -0500
Subject: [PATCH 68/83] Fix whitespace

---
 test/test_mpi_communication.py | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index d2a60620..8b06b9eb 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -173,23 +173,24 @@ def mpi_communication_entrypoint():
             add_run_info, \
             IntervalTimer, EventCounter
     log_filename = None
+    # NOTE: LogManager hangs when using a file on a shared directory.
     # log_filename = 'grudge_log.dat'
     logmgr = LogManager(log_filename, "w", comm)
     add_run_info(logmgr)
     add_general_quantities(logmgr)
     log_quantities =\
-    {"rank_data_swap_timer": IntervalTimer("rank_data_swap_timer",
-                            "Time spent evaluating RankDataSwapAssign"),
-    "rank_data_swap_counter": EventCounter("rank_data_swap_counter",
-                            "Number of RankDataSwapAssign instructions evaluated"),
-    "exec_timer": IntervalTimer("exec_timer",
-                            "Total time spent executing instructions"),
-    "insn_eval_timer": IntervalTimer("insn_eval_timer",
-                            "Time spend evaluating instructions"),
-    "future_eval_timer": IntervalTimer("future_eval_timer",
-                            "Time spent evaluating futures"),
-    "busy_wait_timer": IntervalTimer("busy_wait_timer",
-                            "Time wasted doing busy wait")}
+        {"rank_data_swap_timer": IntervalTimer("rank_data_swap_timer",
+        "Time spent evaluating RankDataSwapAssign"),
+        "rank_data_swap_counter": EventCounter("rank_data_swap_counter",
+        "Number of RankDataSwapAssign instructions evaluated"),
+        "exec_timer": IntervalTimer("exec_timer",
+        "Total time spent executing instructions"),
+        "insn_eval_timer": IntervalTimer("insn_eval_timer",
+        "Time spend evaluating instructions"),
+        "future_eval_timer": IntervalTimer("future_eval_timer",
+        "Time spent evaluating futures"),
+        "busy_wait_timer": IntervalTimer("busy_wait_timer",
+        "Time wasted doing busy wait")}
     for quantity in log_quantities.values():
         logmgr.add_quantity(quantity)
 
@@ -223,7 +224,6 @@ def mpi_communication_entrypoint():
 
     logmgr.tick_before()
     for event in dt_stepper.run(t_end=final_t):
-        # FIXME: I think these ticks need to be put somewhere else
         if isinstance(event, dt_stepper.StateComputed):
             assert event.component_id == "w"
 
@@ -240,7 +240,6 @@ def mpi_communication_entrypoint():
         logmgr.tick_before()
     logmgr.tick_after()
 
-
     def print_profile_data(data):
         print("""execute() for rank %d:
             \tInstruction Evaluation: %f%%
@@ -260,10 +259,10 @@ def mpi_communication_entrypoint():
 
 # {{{ MPI test pytest entrypoint
 
-@pytest.mark.mpi
-@pytest.mark.parametrize("num_ranks", [3])
+# @pytest.mark.mpi
+# @pytest.mark.parametrize("num_ranks", [3])
 # FIXME: gitlab runs forever on this.
-# @pytest.mark.skip()
+@pytest.mark.skip()
 def test_mpi(num_ranks):
     pytest.importorskip("mpi4py")
 
-- 
GitLab


From 4206f1db6ef32f7ee2cc5ed457371a8ed876968a Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag@gmail.com>
Date: Thu, 10 May 2018 13:16:42 -0500
Subject: [PATCH 69/83] execute() now submits exec timer before returning

---
 grudge/symbolic/compiler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/grudge/symbolic/compiler.py b/grudge/symbolic/compiler.py
index 391f7e98..be8e4378 100644
--- a/grudge/symbolic/compiler.py
+++ b/grudge/symbolic/compiler.py
@@ -580,13 +580,13 @@ class Code(object):
             raise RuntimeError("not all instructions are reachable"
                     "--did you forget to pass a value for a placeholder?")
 
+        if log_quantities is not None:
+            exec_sub_timer.stop().submit()
         from pytools.obj_array import with_object_array_or_scalar
         if profile_data is not None:
             profile_data['total_time'] += time() - start_time
             return (with_object_array_or_scalar(exec_mapper, self.result),
                     profile_data)
-        if log_quantities is not None:
-            exec_sub_timer.stop().submit()
         return with_object_array_or_scalar(exec_mapper, self.result)
 
 # }}}
-- 
GitLab


From 9842b8691506a208d6827d714fe0a761f15fa2f3 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag@gmail.com>
Date: Thu, 10 May 2018 15:09:39 -0500
Subject: [PATCH 70/83] Fix log step counter

---
 test/test_mpi_communication.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 8b06b9eb..6860fe47 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -236,8 +236,8 @@ def mpi_communication_entrypoint():
             #                        [("u", event.state_component[0]),
             #                         ("v", event.state_component[1:])])
             t_last_step = time()
-        logmgr.tick_after()
-        logmgr.tick_before()
+            logmgr.tick_after()
+            logmgr.tick_before()
     logmgr.tick_after()
 
     def print_profile_data(data):
-- 
GitLab


From aaccb9661e140005cb274c4b07a901bccd7acf76 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag@gmail.com>
Date: Thu, 10 May 2018 15:29:17 -0500
Subject: [PATCH 71/83] Add benchmarking code to for MPI

---
 examples/benchmark_grudge/benchmark_mpi.py | 134 +++++++++++++++++++++
 examples/benchmark_grudge/run_benchmark.sh | 122 +++++++++++++++++++
 2 files changed, 256 insertions(+)
 create mode 100644 examples/benchmark_grudge/benchmark_mpi.py
 create mode 100755 examples/benchmark_grudge/run_benchmark.sh

diff --git a/examples/benchmark_grudge/benchmark_mpi.py b/examples/benchmark_grudge/benchmark_mpi.py
new file mode 100644
index 00000000..38612322
--- /dev/null
+++ b/examples/benchmark_grudge/benchmark_mpi.py
@@ -0,0 +1,134 @@
+import os
+import numpy as np
+import pyopencl as cl
+
+from grudge import sym, bind, DGDiscretizationWithBoundaries
+from grudge.shortcuts import set_up_rk4
+
+
+def simple_wave_entrypoint(dim=2, num_elems=256, order=4, num_steps=30,
+                           log_filename="grudge.dat"):
+    cl_ctx = cl.create_some_context()
+    queue = cl.CommandQueue(cl_ctx)
+
+    from mpi4py import MPI
+    comm = MPI.COMM_WORLD
+    num_parts = comm.Get_size()
+    n = int(num_elems ** (1./dim))
+
+    from meshmode.distributed import MPIMeshDistributor
+    mesh_dist = MPIMeshDistributor(comm)
+
+    if mesh_dist.is_mananger_rank():
+        from meshmode.mesh.generation import generate_regular_rect_mesh
+        mesh = generate_regular_rect_mesh(a=(-0.5,)*dim,
+                                          b=(0.5,)*dim,
+                                          n=(n,)*dim)
+
+        from pymetis import part_graph
+        _, p = part_graph(num_parts,
+                          xadj=mesh.nodal_adjacency.neighbors_starts.tolist(),
+                          adjncy=mesh.nodal_adjacency.neighbors.tolist())
+        part_per_element = np.array(p)
+
+        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)
+    else:
+        local_mesh = mesh_dist.receive_mesh_part()
+
+    vol_discr = DGDiscretizationWithBoundaries(cl_ctx, local_mesh, order=order,
+                                               mpi_communicator=comm)
+
+    source_center = np.array([0.1, 0.22, 0.33])[:local_mesh.dim]
+    source_width = 0.05
+    source_omega = 3
+
+    sym_x = sym.nodes(local_mesh.dim)
+    sym_source_center_dist = sym_x - source_center
+    sym_t = sym.ScalarVariable("t")
+
+    from grudge.models.wave import StrongWaveOperator
+    from meshmode.mesh import BTAG_ALL, BTAG_NONE
+    op = StrongWaveOperator(-0.1, vol_discr.dim,
+            source_f=(
+                sym.sin(source_omega*sym_t)
+                * sym.exp(
+                    -np.dot(sym_source_center_dist, sym_source_center_dist)
+                    / source_width**2)),
+            dirichlet_tag=BTAG_NONE,
+            neumann_tag=BTAG_NONE,
+            radiation_tag=BTAG_ALL,
+            flux_type="upwind")
+
+    from pytools.obj_array import join_fields
+    fields = join_fields(vol_discr.zeros(queue),
+            [vol_discr.zeros(queue) for i in range(vol_discr.dim)])
+
+    from pytools.log import LogManager, \
+            add_general_quantities, \
+            add_run_info, \
+            IntervalTimer, EventCounter
+    # NOTE: LogManager hangs when using a file on a shared directory.
+    logmgr = LogManager(log_filename, "w", comm)
+    add_run_info(logmgr)
+    add_general_quantities(logmgr)
+    log_quantities =\
+        {"rank_data_swap_timer": IntervalTimer("rank_data_swap_timer",
+                        "Time spent evaluating RankDataSwapAssign"),
+        "rank_data_swap_counter": EventCounter("rank_data_swap_counter",
+                        "Number of RankDataSwapAssign instructions evaluated"),
+        "exec_timer": IntervalTimer("exec_timer",
+                        "Total time spent executing instructions"),
+        "insn_eval_timer": IntervalTimer("insn_eval_timer",
+                        "Time spend evaluating instructions"),
+        "future_eval_timer": IntervalTimer("future_eval_timer",
+                        "Time spent evaluating futures"),
+        "busy_wait_timer": IntervalTimer("busy_wait_timer",
+                        "Time wasted doing busy wait")}
+    for quantity in log_quantities.values():
+        logmgr.add_quantity(quantity)
+
+    bound_op = bind(vol_discr, op.sym_operator())
+
+    def rhs(t, w):
+        val, rhs.profile_data = bound_op(queue, profile_data=rhs.profile_data,
+                                                log_quantities=log_quantities,
+                                                t=t, w=w)
+        return val
+    rhs.profile_data = {}
+
+    dt = 0.04
+    dt_stepper = set_up_rk4("w", dt, fields, rhs)
+
+    logmgr.tick_before()
+    for event in dt_stepper.run(t_end=dt * num_steps):
+        if isinstance(event, dt_stepper.StateComputed):
+            logmgr.tick_after()
+            logmgr.tick_before()
+    logmgr.tick_after()
+
+    def print_profile_data(data):
+        print("""execute() for rank %d:
+            \tInstruction Evaluation: %f%%
+            \tFuture Evaluation: %f%%
+            \tBusy Wait: %f%%
+            \tTotal: %f seconds""" %
+            (comm.Get_rank(),
+             data['insn_eval_time'] / data['total_time'] * 100,
+             data['future_eval_time'] / data['total_time'] * 100,
+             data['busy_wait_time'] / data['total_time'] * 100,
+             data['total_time']))
+
+    print_profile_data(rhs.profile_data)
+    logmgr.close()
+
+
+if __name__ == "__main__":
+    assert "RUN_WITHIN_MPI" in os.environ, "Must run within mpi"
+    import sys
+    assert len(sys.argv) == 5, \
+        "Usage: %s %s num_elems order num_steps logfile" \
+        % (sys.executable, sys.argv[0])
+    simple_wave_entrypoint(num_elems=int(sys.argv[1]),
+                           order=int(sys.argv[2]),
+                           num_steps=int(sys.argv[3]),
+                           log_filename=sys.argv[4])
diff --git a/examples/benchmark_grudge/run_benchmark.sh b/examples/benchmark_grudge/run_benchmark.sh
new file mode 100755
index 00000000..6c535dfd
--- /dev/null
+++ b/examples/benchmark_grudge/run_benchmark.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+
+# Weak scaling: We run our code on one computer, then we buy a second computer
+# and we can run twice as much code in the same amount of time.
+
+# Strong scaling: We run our code on one computer, then we buy a second computer
+# and we can run the same code in half the time.
+
+# Examples:
+# ./run_benchmark.sh -t WEAK -n 100 -r 20 -s 1000 -l ~/weak_scaling.dat -o weak_scaling.txt
+# ./run_benchmark.sh -t STRONG -n 100 -r 20 -s 1000 -l ~/strong_scaling.dat -o strong_scaling.txt
+
+set -eu
+
+# NOTE: benchmark_mpi.py hangs when logfile is in a shared directory.
+USAGE="Usage: $0 -t <WEAK|STRONG> -n num_elems -r order -s num_steps -l logfile -o outfile"
+while getopts "t:n:r:s:l:o:" OPT; do
+  case $OPT in
+    t)
+      case $OPTARG in
+        WEAK)
+          SCALING_TYPE='WEAK'
+          ;;
+        STRONG)
+          SCALING_TYPE='STRONG'
+          ;;
+        *)
+          echo $USAGE
+          exit 1
+          ;;
+      esac
+      ;;
+    n)
+      NUM_ELEMS=$OPTARG
+      ;;
+    r)
+      ORDER=$OPTARG
+      ;;
+    s)
+      NUM_STEPS=$OPTARG
+      ;;
+    l)
+      LOGFILE=$OPTARG
+      ;;
+    o)
+      OUTFILE=$OPTARG
+      ;;
+    *)
+      echo $USAGE
+      exit 1
+      ;;
+  esac
+done
+
+
+# NOTE: We want to make sure we run grudge in the right environment.
+SHARED="/home/eshoag2/shared"
+source $SHARED/miniconda3/bin/activate inteq
+PYTHON=$(which python)
+BENCHMARK_MPI="$SHARED/grudge/examples/benchmark_grudge/benchmark_mpi.py"
+
+# Assume HOSTS_LIST is sorted in increasing order starting with one host.
+HOSTS_LIST="\
+porter \
+porter,stout \
+porter,stout,koelsch"
+
+ENVIRONMENT_VARS="\
+-x RUN_WITHIN_MPI=1 \
+-x PYOPENCL_CTX=0 \
+-x POCL_AFFINITY=1"
+
+PERF_EVENTS="\
+cpu-cycles,\
+instructions,\
+task-clock"
+
+TEMPDIR=$(mktemp -d)
+trap 'rm -rf $TEMPDIR' EXIT HUP INT QUIT TERM
+
+echo "$(date): Testing $SCALING_TYPE scaling" | tee -a $OUTFILE
+
+NUM_HOSTS=1
+BASE_NUM_ELEMS=$NUM_ELEMS
+for HOSTS in $HOSTS_LIST; do
+
+  if [ $SCALING_TYPE = 'WEAK' ]; then
+    NUM_ELEMS=$(echo $BASE_NUM_ELEMS $NUM_HOSTS | awk '{ print $1 * $2 }')
+  fi
+
+  BENCHMARK_CMD="$PYTHON $BENCHMARK_MPI $NUM_ELEMS $ORDER $NUM_STEPS $LOGFILE.trial$NUM_HOSTS"
+  MPI_CMD="mpiexec --output-filename $TEMPDIR/output -H $HOSTS $ENVIRONMENT_VARS $BENCHMARK_CMD"
+  echo "Executing: $MPI_CMD"
+
+  # NOTE: perf does not follow mpi accross different nodes.
+  # Instead, perf will follow all processes on the porter node.
+  echo "====================Using $NUM_HOSTS host(s)===================" >> $OUTFILE
+  START_TIME=$(date +%s)
+  perf stat --append -o $OUTFILE -e $PERF_EVENTS $MPI_CMD
+  DURATION=$(($(date +%s) - $START_TIME))
+  echo "Finished in $DURATION seconds"
+
+  echo "===================Output of Python===================" >> $OUTFILE
+  cat $TEMPDIR/* >> $OUTFILE
+  echo "======================================================" >> $OUTFILE
+  rm $TEMPDIR/*
+
+  if [ $NUM_HOSTS -eq 1 ]; then
+    BASE_DURATION=$DURATION
+  fi
+
+  # Efficiency is expected / actual
+  if [ $SCALING_TYPE = 'STRONG' ]; then
+    EFFICIENCY=$(echo $DURATION $BASE_DURATION $NUM_HOSTS | awk '{ print $2 / ($3 * $1) * 100"%" }')
+  elif [ $SCALING_TYPE = 'WEAK' ]; then
+    EFFICIENCY=$(echo $DURATION $BASE_DURATION | awk '{ print $2 / $1 * 100"%" }')
+  fi
+
+  echo "Efficiency for $SCALING_TYPE scaling is $EFFICIENCY for $NUM_HOSTS host(s)." | tee -a $OUTFILE
+
+  ((NUM_HOSTS++))
+done
-- 
GitLab


From cf9d21ae4837ab07a0696c5e7b78a11555410b2e Mon Sep 17 00:00:00 2001
From: Ellis Hoag <eshoag2@illinois.edu>
Date: Thu, 10 May 2018 19:38:15 -0500
Subject: [PATCH 72/83] Fix bugs and add comments

---
 examples/benchmark_grudge/run_benchmark.sh | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/examples/benchmark_grudge/run_benchmark.sh b/examples/benchmark_grudge/run_benchmark.sh
index 6c535dfd..72eaca2b 100755
--- a/examples/benchmark_grudge/run_benchmark.sh
+++ b/examples/benchmark_grudge/run_benchmark.sh
@@ -89,7 +89,8 @@ for HOSTS in $HOSTS_LIST; do
   fi
 
   BENCHMARK_CMD="$PYTHON $BENCHMARK_MPI $NUM_ELEMS $ORDER $NUM_STEPS $LOGFILE.trial$NUM_HOSTS"
-  MPI_CMD="mpiexec --output-filename $TEMPDIR/output -H $HOSTS $ENVIRONMENT_VARS $BENCHMARK_CMD"
+  # NOTE: mpiexec recently updated so some things might act weird.
+  MPI_CMD="mpiexec --output-filename $TEMPDIR -H $HOSTS $ENVIRONMENT_VARS $BENCHMARK_CMD"
   echo "Executing: $MPI_CMD"
 
   # NOTE: perf does not follow mpi accross different nodes.
@@ -101,9 +102,9 @@ for HOSTS in $HOSTS_LIST; do
   echo "Finished in $DURATION seconds"
 
   echo "===================Output of Python===================" >> $OUTFILE
-  cat $TEMPDIR/* >> $OUTFILE
+  find $TEMPDIR -type f -exec cat {} \; >> $OUTFILE
   echo "======================================================" >> $OUTFILE
-  rm $TEMPDIR/*
+  rm -rf $TEMPDIR/*
 
   if [ $NUM_HOSTS -eq 1 ]; then
     BASE_DURATION=$DURATION
-- 
GitLab


From d567ba28ea5fb47a9a7b9f69147d044f09b3fccd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Fri, 22 Jun 2018 20:00:10 -0400
Subject: [PATCH 73/83] Revert requirements.txt back to upstream meshmode

---
 requirements.txt | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index ee4c5287..deb09394 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,7 +7,4 @@ git+https://gitlab.tiker.net/inducer/dagrt.git
 git+https://gitlab.tiker.net/inducer/leap.git
 git+https://github.com/inducer/meshpy.git
 git+https://github.com/inducer/modepy.git
-
-# FIXME: Revert to this when merged
-#git+https://github.com/inducer/meshmode.git
-git+https://gitlab.tiker.net/eshoag2/meshmode.git@partition
+git+https://github.com/inducer/meshmode.git
-- 
GitLab


From de7c600acb8aedeff2559d78c283a820447e03b2 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 26 Jun 2018 23:08:25 -0500
Subject: [PATCH 74/83] Add pytest_cache to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index ec1e4cd2..94648fab 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,3 +32,4 @@ run-debug-*
 *.dat
 
 .cache
+.pytest_cache
-- 
GitLab


From 170add4eec392ec7a6a0d794b0c3a2e3e82666e6 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 26 Jun 2018 23:09:10 -0500
Subject: [PATCH 75/83] Add mgmt rank interface to
 DGDiscretizationWithBoundaries

---
 grudge/discretization.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/grudge/discretization.py b/grudge/discretization.py
index b8796e75..20fc0505 100644
--- a/grudge/discretization.py
+++ b/grudge/discretization.py
@@ -87,6 +87,16 @@ class DGDiscretizationWithBoundaries(DiscretizationBase):
 
         self.mpi_communicator = mpi_communicator
 
+    def get_management_rank_index(self):
+        return 0
+
+    def is_management_rank(self):
+        if self.mpi_communicator is None:
+            return True
+        else:
+            return self.mpi_communicator.Get_rank() \
+                    == self._get_management_rank_index()
+
     def _set_up_distributed_communication(self, mpi_communicator, queue):
         from_dd = sym.DOFDesc("vol", sym.QTAG_NONE)
 
-- 
GitLab


From 849b9397c0f5744a0796af53afd041d3a8bd2e07 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 26 Jun 2018 23:10:47 -0500
Subject: [PATCH 76/83] Do not use func-local classes for MPI futures

---
 examples/wave/wave-min-mpi.py | 143 +++++++++++++++++++++++++
 examples/wave/wave.py         | 191 ----------------------------------
 grudge/execution.py           |  63 ++++++-----
 3 files changed, 178 insertions(+), 219 deletions(-)
 create mode 100644 examples/wave/wave-min-mpi.py
 delete mode 100644 examples/wave/wave.py

diff --git a/examples/wave/wave-min-mpi.py b/examples/wave/wave-min-mpi.py
new file mode 100644
index 00000000..26d22226
--- /dev/null
+++ b/examples/wave/wave-min-mpi.py
@@ -0,0 +1,143 @@
+"""Minimal example of a grudge driver."""
+
+from __future__ import division, print_function
+
+__copyright__ = "Copyright (C) 2015 Andreas Kloeckner"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+
+import numpy as np
+import pyopencl as cl
+from grudge.shortcuts import set_up_rk4
+from grudge import sym, bind, DGDiscretizationWithBoundaries
+from mpi4py import MPI
+
+
+def main(write_output=True, order=4):
+    cl_ctx = cl.create_some_context()
+    queue = cl.CommandQueue(cl_ctx)
+
+    comm = MPI.COMM_WORLD
+    num_parts = comm.Get_size()
+
+    from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
+    mesh_dist = MPIMeshDistributor(comm)
+
+    if mesh_dist.is_mananger_rank():
+        dims = 2
+        from meshmode.mesh.generation import generate_regular_rect_mesh
+        mesh = generate_regular_rect_mesh(
+                a=(-0.5,)*dims,
+                b=(0.5,)*dims,
+                n=(16,)*dims)
+
+        print("%d elements" % mesh.nelements)
+
+        part_per_element = get_partition_by_pymetis(mesh, num_parts)
+
+        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)
+
+        del mesh
+
+    else:
+        local_mesh = mesh_dist.receive_mesh_part()
+
+    discr = DGDiscretizationWithBoundaries(cl_ctx, local_mesh, order=order,
+            mpi_communicator=comm)
+
+    if local_mesh.dim == 2:
+        dt = 0.04
+    elif local_mesh.dim == 3:
+        dt = 0.02
+
+    source_center = np.array([0.1, 0.22, 0.33])[:local_mesh.dim]
+    source_width = 0.05
+    source_omega = 3
+
+    sym_x = sym.nodes(local_mesh.dim)
+    sym_source_center_dist = sym_x - source_center
+    sym_t = sym.ScalarVariable("t")
+
+    from grudge.models.wave import StrongWaveOperator
+    from meshmode.mesh import BTAG_ALL, BTAG_NONE
+    op = StrongWaveOperator(-0.1, discr.dim,
+            source_f=(
+                sym.sin(source_omega*sym_t)
+                * sym.exp(
+                    -np.dot(sym_source_center_dist, sym_source_center_dist)
+                    / source_width**2)),
+            dirichlet_tag=BTAG_NONE,
+            neumann_tag=BTAG_NONE,
+            radiation_tag=BTAG_ALL,
+            flux_type="upwind")
+
+    queue = cl.CommandQueue(discr.cl_context)
+    from pytools.obj_array import join_fields
+    fields = join_fields(discr.zeros(queue),
+            [discr.zeros(queue) for i in range(discr.dim)])
+
+    # FIXME
+    #dt = op.estimate_rk4_timestep(discr, fields=fields)
+
+    op.check_bc_coverage(local_mesh)
+
+    # print(sym.pretty(op.sym_operator()))
+    bound_op = bind(discr, op.sym_operator())
+
+    def rhs(t, w):
+        return bound_op(queue, t=t, w=w)
+
+    dt_stepper = set_up_rk4("w", dt, fields, rhs)
+
+    final_t = 10
+    nsteps = int(final_t/dt)
+    print("dt=%g nsteps=%d" % (dt, nsteps))
+
+    from grudge.shortcuts import make_visualizer
+    vis = make_visualizer(discr, vis_order=order)
+
+    step = 0
+
+    norm = bind(discr, sym.norm(2, sym.var("u")))
+
+    from time import time
+    t_last_step = time()
+
+    for event in dt_stepper.run(t_end=final_t):
+        if isinstance(event, dt_stepper.StateComputed):
+            assert event.component_id == "w"
+
+            step += 1
+
+            print(step, event.t, norm(queue, u=event.state_component[0]),
+                    time()-t_last_step)
+            if step % 10 == 0:
+                vis.write_vtk_file("fld-%04d.vtu" % step,
+                        [
+                            ("u", event.state_component[0]),
+                            ("v", event.state_component[1:]),
+                            ])
+            t_last_step = time()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/wave/wave.py b/examples/wave/wave.py
deleted file mode 100644
index 3d206d71..00000000
--- a/examples/wave/wave.py
+++ /dev/null
@@ -1,191 +0,0 @@
-# Copyright (C) 2007 Andreas Kloeckner
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-
-from __future__ import division
-from __future__ import absolute_import
-from __future__ import print_function
-import numpy as np
-from grudge.mesh import BTAG_ALL, BTAG_NONE
-from six.moves import range
-
-
-def main(write_output=True,
-        dir_tag=BTAG_NONE, neu_tag=TAG_NONE, rad_tag=BTAG_ALL,
-        flux_type_arg="upwind", dtype=np.float64, debug=[]):
-    from math import sin, cos, pi, exp, sqrt  # noqa
-
-    from grudge.backends import guess_run_context
-    rcon = guess_run_context()
-
-    dim = 2
-
-    if dim == 1:
-        if rcon.is_head_rank:
-            from grudge.mesh.generator import make_uniform_1d_mesh
-            mesh = make_uniform_1d_mesh(-10, 10, 500)
-    elif dim == 2:
-        from grudge.mesh.generator import make_rect_mesh
-        if rcon.is_head_rank:
-            mesh = make_rect_mesh(a=(-0.5, -0.5), b=(0.5, 0.5), max_area=0.008)
-    elif dim == 3:
-        if rcon.is_head_rank:
-            from grudge.mesh.generator import make_ball_mesh
-            mesh = make_ball_mesh(max_volume=0.0005)
-    else:
-        raise RuntimeError("bad number of dimensions")
-
-    if rcon.is_head_rank:
-        print("%d elements" % len(mesh.elements))
-        mesh_data = rcon.distribute_mesh(mesh)
-    else:
-        mesh_data = rcon.receive_mesh()
-
-    from grudge.timestep.runge_kutta import LSRK4TimeStepper
-    stepper = LSRK4TimeStepper(dtype=dtype)
-
-    from grudge.models.wave import StrongWaveOperator
-    from grudge.mesh import BTAG_ALL, BTAG_NONE  # noqa
-
-    source_center = np.array([0.1, 0.22])
-    source_width = 0.05
-    source_omega = 3
-
-    import grudge.symbolic as sym
-    sym_x = sym.nodes(2)
-    sym_source_center_dist = sym_x - source_center
-
-    op = StrongWaveOperator(-1, dim,
-            source_f=
-            sym.CFunction("sin")(source_omega*sym.ScalarParameter("t"))
-            * sym.CFunction("exp")(
-                -np.dot(sym_source_center_dist, sym_source_center_dist)
-                / source_width**2),
-            dirichlet_tag=dir_tag,
-            neumann_tag=neu_tag,
-            radiation_tag=rad_tag,
-            flux_type=flux_type_arg
-            )
-
-    discr = rcon.make_discretization(mesh_data, order=4, debug=debug,
-            default_scalar_type=dtype,
-            tune_for=op.sym_operator())
-
-    from grudge.visualization import VtkVisualizer
-    if write_output:
-        vis = VtkVisualizer(discr, rcon, "fld")
-
-    from grudge.tools import join_fields
-    fields = join_fields(discr.volume_zeros(dtype=dtype),
-            [discr.volume_zeros(dtype=dtype) for i in range(discr.dimensions)])
-
-    # {{{ diagnostics setup
-
-    from pytools.log import LogManager, \
-            add_general_quantities, \
-            add_simulation_quantities, \
-            add_run_info
-
-    if write_output:
-        log_file_name = "wave.dat"
-    else:
-        log_file_name = None
-
-    logmgr = LogManager(log_file_name, "w", rcon.communicator)
-    add_run_info(logmgr)
-    add_general_quantities(logmgr)
-    add_simulation_quantities(logmgr)
-    discr.add_instrumentation(logmgr)
-
-    from pytools.log import IntervalTimer
-    vis_timer = IntervalTimer("t_vis", "Time spent visualizing")
-    logmgr.add_quantity(vis_timer)
-    stepper.add_instrumentation(logmgr)
-
-    from grudge.log import LpNorm
-    u_getter = lambda: fields[0]
-    logmgr.add_quantity(LpNorm(u_getter, discr, 1, name="l1_u"))
-    logmgr.add_quantity(LpNorm(u_getter, discr, name="l2_u"))
-
-    logmgr.add_watches(["step.max", "t_sim.max", "l2_u", "t_step.max"])
-
-    # }}}
-
-    # {{{ timestep loop
-
-    rhs = op.bind(discr)
-    try:
-        from grudge.timestep import times_and_steps
-        step_it = times_and_steps(
-                final_time=4, logmgr=logmgr,
-                max_dt_getter=lambda t: op.estimate_timestep(discr,
-                    stepper=stepper, t=t, fields=fields))
-
-        for step, t, dt in step_it:
-            if step % 10 == 0 and write_output:
-                visf = vis.make_file("fld-%04d" % step)
-
-                vis.add_data(visf,
-                        [
-                            ("u", discr.convert_volume(fields[0], kind="numpy")),
-                            ("v", discr.convert_volume(fields[1:], kind="numpy")),
-                        ],
-                        time=t,
-                        step=step)
-                visf.close()
-
-            fields = stepper(fields, t, dt, rhs)
-
-        assert discr.norm(fields) < 1
-        assert fields[0].dtype == dtype
-
-    finally:
-        if write_output:
-            vis.close()
-
-        logmgr.close()
-        discr.close()
-
-    # }}}
-
-if __name__ == "__main__":
-    main(True, BTAG_ALL, BTAG_NONE, TAG_NONE, "upwind", np.float64,
-            debug=["cuda_no_plan", "dump_optemplate_stages"])
-
-
-# {{{ entry points for py.test
-
-def test_wave():
-    from pytools.test import mark_test
-    mark_long = mark_test.long
-
-    yield ("dirichlet wave equation with SP data", mark_long(main),
-            False, BTAG_ALL, BTAG_NONE, TAG_NONE, "upwind", np.float64)
-    yield ("dirichlet wave equation with SP complex data", mark_long(main),
-            False, BTAG_ALL, BTAG_NONE, TAG_NONE, "upwind", np.complex64)
-    yield ("dirichlet wave equation with DP complex data", mark_long(main),
-            False, BTAG_ALL, BTAG_NONE, TAG_NONE, "upwind", np.complex128)
-    for flux_type in ["upwind", "central"]:
-        yield ("dirichlet wave equation with %s flux" % flux_type,
-                mark_long(main),
-                False, BTAG_ALL, BTAG_NONE, TAG_NONE, flux_type)
-    yield ("neumann wave equation", mark_long(main),
-            False, BTAG_NONE, BTAG_ALL, TAG_NONE)
-    yield ("radiation-bc wave equation", mark_long(main),
-            False, BTAG_NONE, TAG_NONE, BTAG_ALL)
-
-# }}}
-
-# ij
diff --git a/grudge/execution.py b/grudge/execution.py
index 875db9d9..f756d21b 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -333,34 +333,9 @@ class ExecutionMapper(mappers.Evaluator,
         remote_data_host = np.empty_like(local_data)
         recv_req = comm.Irecv(remote_data_host, insn.i_remote_rank, insn.recv_tag)
 
-        class RecvFuture:
-            def __init__(self, recv_req, insn_name, remote_data_host, queue):
-                self.receive_request = recv_req
-                self.insn_name = insn_name
-                self.remote_data_host = remote_data_host
-                self.queue = queue
-
-            def is_ready(self):
-                return self.receive_request.Test()
-
-            def __call__(self):
-                self.receive_request.Wait()
-                remote_data = cl.array.to_device(self.queue, self.remote_data_host)
-                return [(self.insn_name, remote_data)], []
-
-        class SendFuture:
-            def __init__(self, send_request):
-                self.send_request = send_request
-
-            def is_ready(self):
-                return self.send_request.Test()
-
-            def __call__(self):
-                self.send_request.wait()
-                return [], []
-
-        return [], [RecvFuture(recv_req, insn.name, remote_data_host, self.queue),
-                    SendFuture(send_req)]
+        return [], [
+                MPIRecvFuture(recv_req, insn.name, remote_data_host, self.queue),
+                MPISendFuture(send_req)]
 
     def map_insn_loopy_kernel(self, insn):
         kwargs = {}
@@ -463,6 +438,38 @@ class ExecutionMapper(mappers.Evaluator,
 # }}}
 
 
+# {{{ futures
+
+class MPIRecvFuture(object):
+    def __init__(self, recv_req, insn_name, remote_data_host, queue):
+        self.receive_request = recv_req
+        self.insn_name = insn_name
+        self.remote_data_host = remote_data_host
+        self.queue = queue
+
+    def is_ready(self):
+        return self.receive_request.Test()
+
+    def __call__(self):
+        self.receive_request.Wait()
+        remote_data = cl.array.to_device(self.queue, self.remote_data_host)
+        return [(self.insn_name, remote_data)], []
+
+
+class MPISendFuture(object):
+    def __init__(self, send_request):
+        self.send_request = send_request
+
+    def is_ready(self):
+        return self.send_request.Test()
+
+    def __call__(self):
+        self.send_request.wait()
+        return [], []
+
+# }}}
+
+
 # {{{ bound operator
 
 class BoundOperator(object):
-- 
GitLab


From 0022dbe4cf94a833a3141d19cd13dcc8fbedfae7 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 26 Jun 2018 23:13:02 -0500
Subject: [PATCH 77/83] Un-skip MPI tests

---
 test/test_mpi_communication.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 6860fe47..0ab13f05 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -36,7 +36,6 @@ from grudge import sym, bind, DGDiscretizationWithBoundaries
 from grudge.shortcuts import set_up_rk4
 
 
-@pytest.mark.skip()
 def simple_mpi_communication_entrypoint():
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
@@ -100,7 +99,6 @@ def simple_mpi_communication_entrypoint():
     assert error < 1e-14
 
 
-@pytest.mark.skip()
 def mpi_communication_entrypoint():
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
@@ -259,10 +257,8 @@ def mpi_communication_entrypoint():
 
 # {{{ MPI test pytest entrypoint
 
-# @pytest.mark.mpi
-# @pytest.mark.parametrize("num_ranks", [3])
-# FIXME: gitlab runs forever on this.
-@pytest.mark.skip()
+@pytest.mark.mpi
+@pytest.mark.parametrize("num_ranks", [3])
 def test_mpi(num_ranks):
     pytest.importorskip("mpi4py")
 
@@ -277,9 +273,8 @@ def test_mpi(num_ranks):
         env=newenv)
 
 
-# @pytest.mark.mpi
-# FIXME: gitlab runs forever on this.
-@pytest.mark.skip()
+@pytest.mark.mpi
+@pytest.mark.parametrize("num_ranks", [3])
 def test_simple_mpi():
     pytest.importorskip("mpi4py")
 
-- 
GitLab


From fab72f8626b2d9e9b4a7a9cd7bb6355dc6dc31c2 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 26 Jun 2018 23:13:27 -0500
Subject: [PATCH 78/83] Bump Py3 to 3.6 in CI, separate out MPI CI jobs

---
 .gitlab-ci.yml | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 98eb9c5d..1d6bb49c 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,8 +1,8 @@
-Python 2.7 POCL MPI:
+Python 2.7 POCL:
   script:
   - export PY_EXE=python2.7
   - export PYOPENCL_TEST=portable
-  - export EXTRA_INSTALL="numpy mako mpi4py"
+  - export EXTRA_INSTALL="numpy mako"
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
   - ". ./build-and-test-py-project.sh"
   tags:
@@ -12,30 +12,47 @@ Python 2.7 POCL MPI:
   except:
   - tags
 
-Python 3.5 POCL MPI:
+Python 3.6 POCL:
   script:
-  - export PY_EXE=python3.5
+  - export PY_EXE=python3.6
   - export PYOPENCL_TEST=portable
-  - export EXTRA_INSTALL="numpy mako mpi4py"
+  - export EXTRA_INSTALL="numpy mako"
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
   - ". ./build-and-test-py-project.sh"
   tags:
-  - python3.5
+  - python3.6
   - pocl
   - mpi
   except:
   - tags
 
-Python 3.5 POCL:
+Python 2.7 POCL MPI:
   script:
-  - export PY_EXE=python3.5
+  - export PY_EXE=python2.7
   - export PYOPENCL_TEST=portable
-  - export EXTRA_INSTALL="numpy mako"
+  - export EXTRA_INSTALL="numpy mako mpi4py pymetis"
+  - export PYTEST_ADDOPTS="-k mpi"
+  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
+  - ". ./build-and-test-py-project.sh"
+  tags:
+  - python2.7
+  - pocl
+  - mpi
+  except:
+  - tags
+
+Python 3.6 POCL MPI:
+  script:
+  - export PY_EXE=python3.6
+  - export PYOPENCL_TEST=portable
+  - export EXTRA_INSTALL="numpy mako mpi4py pymetis"
+  - export PYTEST_ADDOPTS="-k mpi"
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
   - ". ./build-and-test-py-project.sh"
   tags:
-  - python3.5
+  - python3.6
   - pocl
+  - mpi
   except:
   - tags
 
@@ -45,7 +62,7 @@ Documentation:
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-docs.sh
   - ". ./build-docs.sh"
   tags:
-  - python3.5
+  - python3.6
   only:
   - master
 
@@ -54,6 +71,6 @@ Flake8:
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh
   - ". ./prepare-and-run-flake8.sh grudge test"
   tags:
-  - python3.5
+  - python3.6
   except:
   - tags
-- 
GitLab


From 4d6710a821a5edb378f0a551bd985e3b863b6511 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 26 Jun 2018 23:14:19 -0500
Subject: [PATCH 79/83] MPI tests: Use get_partition_by_pymetis

---
 test/test_mpi_communication.py | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 0ab13f05..7a1f3a41 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -39,7 +39,7 @@ from grudge.shortcuts import set_up_rk4
 def simple_mpi_communication_entrypoint():
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
-    from meshmode.distributed import MPIMeshDistributor
+    from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
 
     from mpi4py import MPI
     comm = MPI.COMM_WORLD
@@ -53,11 +53,7 @@ def simple_mpi_communication_entrypoint():
                                           b=(1,)*2,
                                           n=(3,)*2)
 
-        from pymetis import part_graph
-        _, p = part_graph(num_parts,
-                          xadj=mesh.nodal_adjacency.neighbors_starts.tolist(),
-                          adjncy=mesh.nodal_adjacency.neighbors.tolist())
-        part_per_element = np.array(p)
+        part_per_element = get_partition_by_pymetis(mesh, num_parts)
 
         local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)
     else:
@@ -108,7 +104,7 @@ def mpi_communication_entrypoint():
     i_local_rank = comm.Get_rank()
     num_parts = comm.Get_size()
 
-    from meshmode.distributed import MPIMeshDistributor
+    from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
     mesh_dist = MPIMeshDistributor(comm)
 
     dim = 2
@@ -121,11 +117,7 @@ def mpi_communication_entrypoint():
                                           b=(0.5,)*dim,
                                           n=(16,)*dim)
 
-        from pymetis import part_graph
-        _, p = part_graph(num_parts,
-                          xadj=mesh.nodal_adjacency.neighbors_starts.tolist(),
-                          adjncy=mesh.nodal_adjacency.neighbors.tolist())
-        part_per_element = np.array(p)
+        part_per_element = get_partition_by_pymetis(mesh, num_parts)
 
         local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)
     else:
@@ -261,6 +253,7 @@ def mpi_communication_entrypoint():
 @pytest.mark.parametrize("num_ranks", [3])
 def test_mpi(num_ranks):
     pytest.importorskip("mpi4py")
+    pytest.importorskip("pymetis")
 
     from subprocess import check_call
     import sys
@@ -277,6 +270,7 @@ def test_mpi(num_ranks):
 @pytest.mark.parametrize("num_ranks", [3])
 def test_simple_mpi():
     pytest.importorskip("mpi4py")
+    pytest.importorskip("pymetis")
 
     from subprocess import check_call
     import sys
-- 
GitLab


From c1e63a5d2d7653f71f228bafa4246ccb48a79788 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 26 Jun 2018 23:15:23 -0500
Subject: [PATCH 80/83] Add an MPI example

---
 examples/wave/wave-min-mpi.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/examples/wave/wave-min-mpi.py b/examples/wave/wave-min-mpi.py
index 26d22226..04d0b8a3 100644
--- a/examples/wave/wave-min-mpi.py
+++ b/examples/wave/wave-min-mpi.py
@@ -122,6 +122,8 @@ def main(write_output=True, order=4):
     from time import time
     t_last_step = time()
 
+    rank = comm.Get_rank()
+
     for event in dt_stepper.run(t_end=final_t):
         if isinstance(event, dt_stepper.StateComputed):
             assert event.component_id == "w"
@@ -131,7 +133,11 @@ def main(write_output=True, order=4):
             print(step, event.t, norm(queue, u=event.state_component[0]),
                     time()-t_last_step)
             if step % 10 == 0:
-                vis.write_vtk_file("fld-%04d.vtu" % step,
+                vis.write_vtk_file(
+                        "fld-%03d-%04d.vtu" % (
+                            rank,
+                            step,
+                            ),
                         [
                             ("u", event.state_component[0]),
                             ("v", event.state_component[1:]),
-- 
GitLab


From a52b08854727e0a8671a20508240c4a513d32cfe Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 26 Jun 2018 23:31:12 -0500
Subject: [PATCH 81/83] Refactor/simplify distributed tag assignment logic

---
 grudge/execution.py                 |  54 ++++++------
 grudge/symbolic/compiler.py         |  22 +++--
 grudge/symbolic/mappers/__init__.py | 132 ++++++++++++----------------
 grudge/symbolic/operators.py        |  78 ++++++++++++----
 grudge/symbolic/primitives.py       |   2 +
 5 files changed, 154 insertions(+), 134 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index f756d21b..9d665cb3 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -534,6 +534,7 @@ class BoundOperator(object):
 def process_sym_operator(discrwb, sym_operator, post_bind_mapper=None,
         dumper=lambda name, sym_operator: None):
 
+    orig_sym_operator = sym_operator
     import grudge.symbolic.mappers as mappers
 
     dumper("before-bind", sym_operator)
@@ -541,6 +542,30 @@ def process_sym_operator(discrwb, sym_operator, post_bind_mapper=None,
 
     mappers.ErrorChecker(discrwb.mesh)(sym_operator)
 
+    sym_operator = \
+            mappers.OppositeInteriorFaceSwapUniqueIDAssigner()(sym_operator)
+
+    # {{{ broadcast root rank's symn_operator
+
+    # also make sure all ranks had same orig_sym_operator
+
+    if discrwb.mpi_communicator is not None:
+        (mgmt_rank_orig_sym_operator, mgmt_rank_sym_operator) = \
+                discrwb.mpi_communicator.bcast(
+                    (orig_sym_operator, sym_operator),
+                    discrwb.get_management_rank_index())
+
+        from pytools.obj_array import is_equal as is_oa_equal
+        if not is_oa_equal(mgmt_rank_orig_sym_operator, orig_sym_operator):
+            raise ValueError("rank %d received a different symbolic "
+                    "operator to bind from rank %d"
+                    % (discrwb.mpi_communicator.Get_rank(),
+                        discrwb.get_management_rank_index()))
+
+        sym_operator = mgmt_rank_sym_operator
+
+    # }}}
+
     if post_bind_mapper is not None:
         dumper("before-postbind", sym_operator)
         sym_operator = post_bind_mapper(sym_operator)
@@ -578,37 +603,10 @@ def process_sym_operator(discrwb, sym_operator, post_bind_mapper=None,
     volume_mesh = discrwb.discr_from_dd("vol").mesh
     from meshmode.distributed import get_connected_partitions
     connected_parts = get_connected_partitions(volume_mesh)
+
     if connected_parts:
         sym_operator = mappers.DistributedMapper(connected_parts)(sym_operator)
 
-        # Communicate send and recv tags between ranks
-        comm = discrwb.mpi_communicator
-        i_local_rank = comm.Get_rank()
-
-        tag_mapper = mappers.MPITagCollector(i_local_rank)
-        sym_operator = tag_mapper(sym_operator)
-
-        if len(tag_mapper.send_tag_lookups) > 0:
-            # print("Rank %d distributing tags" % i_local_rank)
-            send_reqs = []
-            for i_remote_rank in connected_parts:
-                send_tags = tag_mapper.send_tag_lookups[i_remote_rank]
-                send_reqs.append(comm.isend(send_tags,
-                                            i_remote_rank,
-                                            MPI_TAG_SEND_TAGS))
-
-            # print("Rank %d receiving tags" % i_local_rank)
-            recv_tag_lookups = {}
-            for i_remote_rank in connected_parts:
-                recv_tags = comm.recv(source=i_remote_rank, tag=MPI_TAG_SEND_TAGS)
-                recv_tag_lookups[i_remote_rank] = recv_tags
-
-            for req in send_reqs:
-                req.wait()
-
-            sym_operator = mappers.MPITagDistributor(recv_tag_lookups,
-                                                     i_local_rank)(sym_operator)
-
     dumper("before-imass", sym_operator)
     sym_operator = mappers.InverseMassContractor()(sym_operator)
 
diff --git a/grudge/symbolic/compiler.py b/grudge/symbolic/compiler.py
index 99de2997..976beed9 100644
--- a/grudge/symbolic/compiler.py
+++ b/grudge/symbolic/compiler.py
@@ -209,11 +209,6 @@ class RankDataSwapAssign(Instruction):
 
         The number of the remote rank that this instruction swaps data with.
 
-    .. attribute:: mpi_tag_offset
-
-        A tag offset for mpi that should be unique for each instance within
-        a particular rank.
-
     .. attribute:: dd_out
     .. attribute:: comment
     """
@@ -225,8 +220,8 @@ class RankDataSwapAssign(Instruction):
         self.field = field
         self.i_remote_rank = op.i_remote_part
         self.dd_out = op.dd_out
-        self.send_tag = self.MPI_TAG_GRUDGE_DATA_BASE + op.send_tag_offset
-        self.recv_tag = self.MPI_TAG_GRUDGE_DATA_BASE + op.recv_tag_offset
+        self.send_tag = self.MPI_TAG_GRUDGE_DATA_BASE + op.unique_id
+        self.recv_tag = self.MPI_TAG_GRUDGE_DATA_BASE + op.unique_id
         self.comment = "Swap data with rank %02d" % self.i_remote_rank
 
     @memoize_method
@@ -502,8 +497,8 @@ class Code(object):
                 if profile_data is not None:
                     insn_start_time = time()
                 if log_quantities is not None:
-                    insn_sub_timer =\
-                                log_quantities["insn_eval_timer"].start_sub_timer()
+                    insn_sub_timer = \
+                            log_quantities["insn_eval_timer"].start_sub_timer()
 
                 insn, discardable_vars = self.get_next_step(
                     frozenset(list(context.keys())),
@@ -517,9 +512,11 @@ class Code(object):
                 if log_quantities is not None:
                     if isinstance(insn, RankDataSwapAssign):
                         from pytools.log import time_and_count_function
-                        mapper_method = time_and_count_function(mapper_method,
-                                        log_quantities["rank_data_swap_timer"],
-                                        log_quantities["rank_data_swap_counter"])
+                        mapper_method = time_and_count_function(
+                                mapper_method,
+                                log_quantities["rank_data_swap_timer"],
+                                log_quantities["rank_data_swap_counter"])
+
                 assignments, new_futures = mapper_method(insn)
 
                 for target, value in assignments:
@@ -536,6 +533,7 @@ class Code(object):
                 if not futures:
                     # No more instructions or futures. We are done.
                     break
+
                 # Busy wait for a new future
                 if profile_data is not None:
                     busy_wait_start_time = time()
diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py
index 6b251252..5304d647 100644
--- a/grudge/symbolic/mappers/__init__.py
+++ b/grudge/symbolic/mappers/__init__.py
@@ -334,90 +334,70 @@ class OperatorBinder(CSECachingMapperMixin, IdentityMapper):
 # }}}
 
 
+# {{{ dof desc (dd) replacement
+
+class DOFDescReplacer(IdentityMapper):
+    def __init__(self, prev_dd, new_dd):
+        self.prev_dd = prev_dd
+        self.new_dd = new_dd
+
+    def map_operator_binding(self, expr):
+        if (isinstance(expr.op, op.OppositeInteriorFaceSwap)
+                    and expr.op.dd_in == self.prev_dd
+                    and expr.op.dd_out == self.prev_dd):
+            field = self.rec(expr.field)
+            return op.OppositePartitionFaceSwap(dd_in=self.new_dd,
+                                                dd_out=self.new_dd)(field)
+        elif (isinstance(expr.op, op.InterpolationOperator)
+                    and expr.op.dd_out == self.prev_dd):
+            return op.InterpolationOperator(dd_in=expr.op.dd_in,
+                                            dd_out=self.new_dd)(expr.field)
+        elif (isinstance(expr.op, op.RefDiffOperatorBase)
+                    and expr.op.dd_out == self.prev_dd
+                    and expr.op.dd_in == self.prev_dd):
+            return type(expr.op)(expr.op.rst_axis,
+                                      dd_in=self.new_dd,
+                                      dd_out=self.new_dd)(self.rec(expr.field))
+
+    def map_node_coordinate_component(self, expr):
+        if expr.dd == self.prev_dd:
+            return type(expr)(expr.axis, self.new_dd)
+
+# }}}
+
+
 # {{{ mappers for distributed computation
 
-def make_key_from_expr(expr, i_send_rank, i_recv_rank, clean_btag):
-    from copy import deepcopy
-    expr = deepcopy(expr)
-
-    class BTAGCleaner(IdentityMapper):
-        def __init__(self):
-            from meshmode.mesh import BTAG_PARTITION
-            self.prev_dd = sym.as_dofdesc(BTAG_PARTITION(i_recv_rank))
-            self.new_dd = sym.as_dofdesc(BTAG_PARTITION(i_send_rank))
-
-        def map_operator_binding(self, expr):
-            if (isinstance(expr.op, op.OppositeInteriorFaceSwap)
-                        and expr.op.dd_in == self.prev_dd
-                        and expr.op.dd_out == self.prev_dd):
-                field = self.rec(expr.field)
-                return op.OppositePartitionFaceSwap(dd_in=self.new_dd,
-                                                    dd_out=self.new_dd)(field)
-            elif (isinstance(expr.op, op.InterpolationOperator)
-                        and expr.op.dd_out == self.prev_dd):
-                return op.InterpolationOperator(dd_in=expr.op.dd_in,
-                                                dd_out=self.new_dd)(expr.field)
-            elif (isinstance(expr.op, op.RefDiffOperator)
-                        and expr.op.dd_out == self.prev_dd
-                        and expr.op.dd_in == self.prev_dd):
-                return op.RefDiffOperator(expr.op.rst_axis,
-                                          dd_in=self.new_dd,
-                                          dd_out=self.new_dd)(self.rec(expr.field))
-
-        def map_node_coordinate_component(self, expr):
-            if expr.dd == self.prev_dd:
-                return type(expr)(expr.axis, self.new_dd)
-    if clean_btag:
-        # FIXME: Maybe there is a better way to do this
-        # We need to change BTAG_PARTITION so that when expr is sent over to the
-        # other rank, it matches one of its own expressions
-        expr = BTAGCleaner()(expr)
-    return (expr, i_send_rank, i_recv_rank)
-
-
-class MPITagCollector(CSECachingMapperMixin, IdentityMapper):
+class OppositeInteriorFaceSwapUniqueIDAssigner(
+        CSECachingMapperMixin, IdentityMapper):
     map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
 
-    def __init__(self, i_local_rank):
-        self.i_local_rank = i_local_rank
-        self.send_tag_lookups = {}
+    def __init__(self):
+        super(OppositeInteriorFaceSwapUniqueIDAssigner, self).__init__()
+        self._next_id = 0
+        self.seen_ids = set()
 
-    def map_operator_binding(self, expr):
-        if isinstance(expr.op, op.OppositePartitionFaceSwap):
-            i_remote_rank = expr.op.i_remote_part
-            key = make_key_from_expr(self.rec(expr.field),
-                                     i_send_rank=self.i_local_rank,
-                                     i_recv_rank=i_remote_rank,
-                                     clean_btag=True)
-            if i_remote_rank not in self.send_tag_lookups:
-                self.send_tag_lookups[i_remote_rank] = {}
-            assert key not in self.send_tag_lookups[i_remote_rank],\
-                        "Duplicate keys found in tag lookup"
-            tag = expr.op.send_tag_offset = len(self.send_tag_lookups[i_remote_rank])
-            self.send_tag_lookups[i_remote_rank][key] = tag
-            return expr
-        else:
-            return IdentityMapper.map_operator_binding(self, expr)
+    def next_id(self):
+        while self._next_id in self.seen_ids:
+            self._next_id += 1
 
+        result = self._next_id
+        self._next_id += 1
+        self.seen_ids.add(result)
 
-class MPITagDistributor(CSECachingMapperMixin, IdentityMapper):
-    map_common_subexpression_uncached = IdentityMapper.map_common_subexpression
+        return result
 
-    def __init__(self, recv_tag_lookups, i_local_rank):
-        self.recv_tag_lookups = recv_tag_lookups
-        self.i_local_rank = i_local_rank
+    def map_opposite_interior_face_swap(self, expr):
+        if expr.unique_id is not None:
+            if expr.unique_id in self.seen_ids:
+                raise ValueError("OppositeInteriorFaceSwap unique ID '%d' "
+                        "is not unique" % expr.unique_id)
 
-    def map_operator_binding(self, expr):
-        if isinstance(expr.op, op.OppositePartitionFaceSwap):
-            i_remote_rank = expr.op.i_remote_part
-            key = make_key_from_expr(self.rec(expr.field),
-                                     i_send_rank=i_remote_rank,
-                                     i_recv_rank=self.i_local_rank,
-                                     clean_btag=False)
-            expr.op.recv_tag_offset = self.recv_tag_lookups[i_remote_rank][key]
+            self.seen_ids.add(expr.unique_id)
             return expr
+
         else:
-            return IdentityMapper.map_operator_binding(self, expr)
+            return type(expr)(expr.dd_in, expr.dd_out, self.next_id())
 
 
 class DistributedMapper(CSECachingMapperMixin, IdentityMapper):
@@ -464,8 +444,10 @@ class RankGeometryChanger(CSECachingMapperMixin, IdentityMapper):
                     and expr.op.dd_in == self.prev_dd
                     and expr.op.dd_out == self.prev_dd):
             field = self.rec(expr.field)
-            return op.OppositePartitionFaceSwap(dd_in=self.new_dd,
-                                                dd_out=self.new_dd)(field)
+            return op.OppositePartitionFaceSwap(
+                    dd_in=self.new_dd,
+                    dd_out=self.new_dd,
+                    unique_id=expr.op.unique_id)(field)
         elif (isinstance(expr.op, op.InterpolationOperator)
                     and expr.op.dd_out == self.prev_dd):
             return op.InterpolationOperator(dd_in=expr.op.dd_in,
diff --git a/grudge/symbolic/operators.py b/grudge/symbolic/operators.py
index 41b057d3..53fb1422 100644
--- a/grudge/symbolic/operators.py
+++ b/grudge/symbolic/operators.py
@@ -83,6 +83,8 @@ class Operator(pymbolic.primitives.Expression):
                 dd_in=dd_in or self.dd_in,
                 dd_out=dd_out or self.dd_out)
 
+    init_arg_names = ("dd_in", "dd_out")
+
     def __getinitargs__(self):
         return (self.dd_in, self.dd_out,)
 
@@ -97,8 +99,6 @@ class ElementwiseLinearOperator(Operator):
 
 
 class InterpolationOperator(Operator):
-    init_arg_names = ("dd_in", "dd_out")
-
     def __init__(self, dd_in, dd_out):
         official_dd_in = _sym().as_dofdesc(dd_in)
         official_dd_out = _sym().as_dofdesc(dd_out)
@@ -107,6 +107,7 @@ class InterpolationOperator(Operator):
             " does not do anything.".format(official_dd_in, official_dd_out))
 
         super(InterpolationOperator, self).__init__(dd_in, dd_out)
+
     mapper_method = intern("map_interpolation")
 
 
@@ -165,6 +166,8 @@ class DiffOperatorBase(Operator):
 
         self.xyz_axis = xyz_axis
 
+    init_arg_names = ("xyz_axis", "dd_in", "dd_out")
+
     def __getinitargs__(self):
         return (self.xyz_axis, self.dd_in, self.dd_out)
 
@@ -216,6 +219,8 @@ class RefDiffOperatorBase(ElementwiseLinearOperator):
 
         self.rst_axis = rst_axis
 
+    init_arg_names = ("rst_axis", "dd_in", "dd_out")
+
     def __getinitargs__(self):
         return (self.rst_axis, self.dd_in, self.dd_out)
 
@@ -410,8 +415,53 @@ class RefInverseMassOperator(RefMassOperatorBase):
 
 
 # {{{ boundary-related operators
+
+class OppositeInteriorFaceSwap(Operator):
+    """
+    .. attribute:: unique_id
+
+        An integer identifying this specific instances of
+        :class:`OppositePartitionFaceSwap` within an entire bound symbolic
+        operator. Is assigned automatically by :func:`grudge.bind`
+        if not already set by the user. This will become
+        :class:`OppositePartitionFaceSwap.unique_id` in distributed
+        runs.
+    """
+
+    def __init__(self, dd_in=None, dd_out=None, unique_id=None):
+        sym = _sym()
+
+        if dd_in is None:
+            dd_in = sym.DOFDesc(sym.FACE_RESTR_INTERIOR, None)
+        if dd_out is None:
+            dd_out = dd_in
+
+        super(OppositeInteriorFaceSwap, self).__init__(dd_in, dd_out)
+        if self.dd_in.domain_tag is not sym.FACE_RESTR_INTERIOR:
+            raise ValueError("dd_in must be an interior faces domain")
+        if self.dd_out != self.dd_in:
+            raise ValueError("dd_out and dd_in must be identical")
+
+        assert unique_id is None or isinstance(unique_id, int)
+        self.unique_id = unique_id
+
+    init_arg_names = ("dd_in", "dd_out", "unique_id")
+
+    def __getinitargs__(self):
+        return (self.dd_in, self.dd_out, self.unique_id)
+
+    mapper_method = intern("map_opposite_interior_face_swap")
+
+
 class OppositePartitionFaceSwap(Operator):
-    def __init__(self, dd_in=None, dd_out=None):
+    """
+    .. attribute:: unique_id
+
+        An integer corresponding to the :attr:`OppositeInteriorFaceSwap.unique_id`
+        which led to the creation of this object. This integer is used as an
+        MPI tag offset to keep different subexpressions apart in MPI traffic.
+    """
+    def __init__(self, dd_in=None, dd_out=None, unique_id=None):
         sym = _sym()
 
         if dd_in is None and dd_out is None:
@@ -429,25 +479,15 @@ class OppositePartitionFaceSwap(Operator):
 
         self.i_remote_part = self.dd_in.domain_tag.part_nr
 
-    mapper_method = intern("map_opposite_partition_face_swap")
-
+        assert unique_id is None or isinstance(unique_id, int)
+        self.unique_id = unique_id
 
-class OppositeInteriorFaceSwap(Operator):
-    def __init__(self, dd_in=None, dd_out=None):
-        sym = _sym()
+    init_arg_names = ("dd_in", "dd_out", "unique_id")
 
-        if dd_in is None:
-            dd_in = sym.DOFDesc(sym.FACE_RESTR_INTERIOR, None)
-        if dd_out is None:
-            dd_out = dd_in
-
-        super(OppositeInteriorFaceSwap, self).__init__(dd_in, dd_out)
-        if self.dd_in.domain_tag is not sym.FACE_RESTR_INTERIOR:
-            raise ValueError("dd_in must be an interior faces domain")
-        if self.dd_out != self.dd_in:
-            raise ValueError("dd_out and dd_in must be identical")
+    def __getinitargs__(self):
+        return (self.dd_in, self.dd_out, self.unique_id)
 
-    mapper_method = intern("map_opposite_interior_face_swap")
+    mapper_method = intern("map_opposite_partition_face_swap")
 
 
 class FaceMassOperatorBase(ElementwiseLinearOperator):
diff --git a/grudge/symbolic/primitives.py b/grudge/symbolic/primitives.py
index 5b6f63c2..35c45268 100644
--- a/grudge/symbolic/primitives.py
+++ b/grudge/symbolic/primitives.py
@@ -445,6 +445,8 @@ class NodeCoordinateComponent(DiscretizationProperty):
 
         assert dd.domain_tag is not None
 
+    init_arg_names = ("axis", "dd")
+
     def __getinitargs__(self):
         return (self.axis, self.dd)
 
-- 
GitLab


From fb0b60fecd6350b58530ae8dc6516b61e1d72422 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 26 Jun 2018 23:59:32 -0500
Subject: [PATCH 82/83] Fix MPI test invocations

---
 test/test_mpi_communication.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index 7a1f3a41..091111ed 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -250,7 +250,7 @@ def mpi_communication_entrypoint():
 # {{{ MPI test pytest entrypoint
 
 @pytest.mark.mpi
-@pytest.mark.parametrize("num_ranks", [3])
+@pytest.mark.parametrize("num_ranks", [2])
 def test_mpi(num_ranks):
     pytest.importorskip("mpi4py")
     pytest.importorskip("pymetis")
@@ -267,8 +267,8 @@ def test_mpi(num_ranks):
 
 
 @pytest.mark.mpi
-@pytest.mark.parametrize("num_ranks", [3])
-def test_simple_mpi():
+@pytest.mark.parametrize("num_ranks", [2])
+def test_simple_mpi(num_ranks):
     pytest.importorskip("mpi4py")
     pytest.importorskip("pymetis")
 
@@ -277,7 +277,6 @@ def test_simple_mpi():
     newenv = os.environ.copy()
     newenv["RUN_WITHIN_MPI"] = "1"
     newenv["TEST_SIMPLE_MPI_COMMUNICATION"] = "1"
-    num_ranks = 2
     check_call([
         "mpiexec", "-np", str(num_ranks), "-x", "RUN_WITHIN_MPI",
         sys.executable, __file__],
-- 
GitLab


From f07ecbbe6b879bda9e8c59f09671b1d32c7c11e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= <inform@tiker.net>
Date: Wed, 27 Jun 2018 02:11:01 -0400
Subject: [PATCH 83/83] Require up-to-date pytools

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 7fb0ad45..1f9ecbd0 100644
--- a/setup.py
+++ b/setup.py
@@ -46,7 +46,7 @@ def main():
 
           install_requires=[
               "pytest>=2.3",
-              "pytools>=2015.1.4",
+              "pytools>=2018.5.2",
               "modepy>=2013.3",
               "meshmode>=2013.3",
               "pyopencl>=2013.1",
-- 
GitLab