From 14a0c0cb177d4a7c02850822832ab815745bfe1b Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag@gmail.com>
Date: Thu, 5 Apr 2018 15:36:11 -0500
Subject: [PATCH] Add profile tool to execute

---
 grudge/execution.py            |  4 +--
 grudge/symbolic/compiler.py    | 62 ++++++++++++++++++++++++----------
 test/test_mpi_communication.py | 30 +++++++++++-----
 3 files changed, 68 insertions(+), 28 deletions(-)

diff --git a/grudge/execution.py b/grudge/execution.py
index 4b270fef..5be80e9f 100644
--- a/grudge/execution.py
+++ b/grudge/execution.py
@@ -483,7 +483,7 @@ class BoundOperator(object):
                 + sep
                 + str(self.eval_code))
 
-    def __call__(self, queue, **context):
+    def __call__(self, queue, profile_data=None, **context):
         import pyopencl.array as cl_array
 
         def replace_queue(a):
@@ -512,7 +512,7 @@ class BoundOperator(object):
             new_context[name] = with_object_array_or_scalar(replace_queue, var)
 
         return self.eval_code.execute(
-                ExecutionMapper(queue, new_context, self))
+                ExecutionMapper(queue, new_context, self), profile_data=profile_data)
 
 # }}}
 
diff --git a/grudge/symbolic/compiler.py b/grudge/symbolic/compiler.py
index e74b69f1..a312709c 100644
--- a/grudge/symbolic/compiler.py
+++ b/grudge/symbolic/compiler.py
@@ -477,33 +477,29 @@ class Code(object):
 
         return argmax2(available_insns), discardable_vars
 
-    def execute(self, exec_mapper, pre_assign_check=None):
+    def execute(self, exec_mapper, pre_assign_check=None, profile_data=None):
+        if profile_data is not None:
+            from time import time
+            start_time = time()
+            if profile_data == {}:
+                profile_data['insn_eval_time'] = 0
+                profile_data['future_eval_time'] = 0
+                profile_data['busy_wait_time'] = 0
+                profile_data['total_time'] = 0
         context = exec_mapper.context
 
         futures = []
         done_insns = set()
 
-        def try_evaluate_future():
-            for i in range(len(futures)):
-                if futures[i].is_ready():
-                    future = futures.pop(i)
-                    assignments, new_futures = future()
-
-                    for target, value in assignments:
-                        if pre_assign_check is not None:
-                            pre_assign_check(target, value)
-                        context[target] = value
-
-                    futures.extend(new_futures)
-                    return True
-            return False
-
         while True:
             try:
                 insn, discardable_vars = self.get_next_step(
                     frozenset(list(context.keys())),
                     frozenset(done_insns))
 
+                if profile_data is not None:
+                    insn_start_time = time()
+
                 done_insns.add(insn)
                 for name in discardable_vars:
                     del context[name]
@@ -517,13 +513,38 @@ class Code(object):
                     context[target] = value
 
                 futures.extend(new_futures)
+                if profile_data is not None:
+                    profile_data['insn_eval_time'] += time() - insn_start_time
             except self.NoInstructionAvailable:
                 if not futures:
                     # No more instructions or futures. We are done.
                     break
                 # Busy wait for a new future
-                while not try_evaluate_future():
-                    pass
+                if profile_data is not None:
+                    busy_wait_start_time = time()
+
+                did_eval_future = False
+                while not did_eval_future:
+                    for i in range(len(futures)):
+                        if futures[i].is_ready():
+                            if profile_data is not None:
+                                profile_data['busy_wait_time'] += time() - busy_wait_start_time
+                                future_start_time = time()
+
+                            future = futures.pop(i)
+                            assignments, new_futures = future()
+
+                            for target, value in assignments:
+                                if pre_assign_check is not None:
+                                    pre_assign_check(target, value)
+                                context[target] = value
+
+                            futures.extend(new_futures)
+                            did_eval_future = True
+
+                            if profile_data is not None:
+                                profile_data['future_eval_time'] += time() - future_start_time
+                            break
 
         if len(done_insns) < len(self.instructions):
             print("Unreachable instructions:")
@@ -533,7 +554,12 @@ class Code(object):
             raise RuntimeError("not all instructions are reachable"
                     "--did you forget to pass a value for a placeholder?")
 
+        if profile_data is not None:
+            profile_data['total_time'] += time() - start_time
+
         from pytools.obj_array import with_object_array_or_scalar
+        if profile_data is not None:
+            return with_object_array_or_scalar(exec_mapper, self.result), profile_data
         return with_object_array_or_scalar(exec_mapper, self.result)
 
 # }}}
diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py
index fb468c74..c8aa48c3 100644
--- a/test/test_mpi_communication.py
+++ b/test/test_mpi_communication.py
@@ -192,14 +192,18 @@ def mpi_communication_entrypoint():
     from time import time
     t_last_step = time()
 
+    profile_data = {}
+
     for event in dt_stepper.run(t_end=final_t):
         if isinstance(event, dt_stepper.StateComputed):
             assert event.component_id == "w"
 
             step += 1
-
-            print(step, event.t, norm(queue, u=event.state_component[0]),
-                  time()-t_last_step)
+            n, profile_data = norm(queue, profile_data=profile_data, u=event.state_component[0])
+            if i_local_rank == 0:
+                print(step, event.t, n,
+                      time()-t_last_step)
+                print(profile_data)
 
             # if step % 10 == 0:
             #     vis.write_vtk_file("rank%d-fld-%04d.vtu" % (i_local_rank, step),
@@ -208,13 +212,23 @@ def mpi_communication_entrypoint():
             t_last_step = time()
     logger.debug("Rank %d exiting", i_local_rank)
 
+    print("""execute() for rank %d:
+            \tInstruction Evaluation: %f%%
+            \tFuture Evaluation: %f%%
+            \tBusy Wait: %f%%
+            \tTotal: %f seconds""" % (i_local_rank,
+                                      profile_data['insn_eval_time'] / profile_data['total_time'] * 100,
+                                      profile_data['future_eval_time'] / profile_data['total_time'] * 100,
+                                      profile_data['busy_wait_time'] / profile_data['total_time'] * 100,
+                                      profile_data['total_time']))
+
 
 # {{{ MPI test pytest entrypoint
 
-# @pytest.mark.mpi
-# @pytest.mark.parametrize("num_ranks", [3])
+@pytest.mark.mpi
+@pytest.mark.parametrize("num_ranks", [3])
 # FIXME: gitlab runs forever on this.
-@pytest.mark.skip()
+# @pytest.mark.skip()
 def test_mpi(num_ranks):
     pytest.importorskip("mpi4py")
 
@@ -229,9 +243,9 @@ def test_mpi(num_ranks):
         env=newenv)
 
 
-# @pytest.mark.mpi
+@pytest.mark.mpi
 # FIXME: gitlab runs forever on this.
-@pytest.mark.skip()
+# @pytest.mark.skip()
 def test_simple_mpi():
     pytest.importorskip("mpi4py")
 
-- 
GitLab