From 6fb2849ef8018e50241ac8b607b5f463ba418354 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 24 Aug 2015 21:23:59 -0500
Subject: [PATCH] Stats functions: return bare dictionary, export helper
 functions

---
 doc/reference.rst   |  2 ++
 doc/tutorial.rst    |  2 +-
 loopy/__init__.py   |  5 +++--
 loopy/statistics.py | 46 ++++++++++++++++++++++++++++++++++-----------
 4 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/doc/reference.rst b/doc/reference.rst
index e79f17554..9aa9727cd 100644
--- a/doc/reference.rst
+++ b/doc/reference.rst
@@ -537,6 +537,8 @@ Obtaining Kernel Statistics
 
 .. autofunction:: get_gmem_access_poly
 
+.. autofunction:: sum_mem_access_to_bytes
+
 .. autofunction:: get_barrier_poly
 
 .. vim: tw=75:spell
diff --git a/doc/tutorial.rst b/doc/tutorial.rst
index ca22c2a64..9c8026813 100644
--- a/doc/tutorial.rst
+++ b/doc/tutorial.rst
@@ -1264,7 +1264,7 @@ continue using the kernel from the previous example:
 
     >>> from loopy.statistics import get_gmem_access_poly
     >>> load_store_map = get_gmem_access_poly(knl)
-    >>> print(load_store_map)
+    >>> print(lp.stringify_stats_mapping(load_store_map))
     (dtype('float32'), 'uniform', 'load') : [n, m, l] -> { 3 * n * m * l : n >= 1 and m >= 1 and l >= 1 }
     (dtype('float32'), 'uniform', 'store') : [n, m, l] -> { n * m * l : n >= 1 and m >= 1 and l >= 1 }
     (dtype('float64'), 'uniform', 'load') : [n, m, l] -> { 2 * n * m : n >= 1 and m >= 1 and l >= 1 }
diff --git a/loopy/__init__.py b/loopy/__init__.py
index 8956856d4..77da1e40e 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -64,7 +64,8 @@ from loopy.preprocess import (preprocess_kernel, realize_reduction,
         infer_unknown_types)
 from loopy.schedule import generate_loop_schedules, get_one_scheduled_kernel
 from loopy.statistics import (get_op_poly, get_gmem_access_poly,
-        get_DRAM_access_poly, get_barrier_poly)
+        get_DRAM_access_poly, get_barrier_poly, stringify_stats_mapping,
+        sum_mem_access_to_bytes)
 from loopy.codegen import generate_code, generate_body
 from loopy.compiled import CompiledKernel
 from loopy.options import Options
@@ -105,7 +106,7 @@ __all__ = [
         "generate_code", "generate_body",
 
         "get_op_poly", "get_gmem_access_poly", "get_DRAM_access_poly",
-        "get_barrier_poly",
+        "get_barrier_poly", "stringify_stats_mapping", "sum_mem_access_to_bytes",
 
         "CompiledKernel",
 
diff --git a/loopy/statistics.py b/loopy/statistics.py
index d25ea3eac..240bef8eb 100755
--- a/loopy/statistics.py
+++ b/loopy/statistics.py
@@ -76,16 +76,17 @@ class ToCountMap:
         except KeyError:
             return isl.PwQPolynomial('{ 0 }')
 
-    def __str__(self):
-        result = ""
-        for key in sorted(self.dict.keys(), key=lambda k: str(k)):
-            result += ("%s : %s\n" % (key, self.dict[key]))
-        return result
-
     def __repr__(self):
         return repr(self.dict)
 
 
+def stringify_stats_mapping(m):
+    result = ""
+    for key in sorted(m.keys(), key=lambda k: str(k)):
+        result += ("%s : %s\n" % (key, m[key]))
+    return result
+
+
 class ExpressionOpCounter(CombineMapper):
 
     def __init__(self, knl):
@@ -447,7 +448,7 @@ def get_op_poly(knl):
         domain = (inames_domain.project_out_except(insn_inames, [dim_type.set]))
         ops = op_counter(insn.assignee) + op_counter(insn.expression)
         op_poly = op_poly + ops*count(knl, domain)
-    return op_poly
+    return op_poly.dict
 
 
 def get_gmem_access_poly(knl):  # for now just counting subscripts
@@ -462,10 +463,12 @@ def get_gmem_access_poly(knl):  # for now just counting subscripts
              - The :class:`numpy.dtype` specifies the type of the data being
                accessed.
 
-             - The first string in the map key specifies the DRAM access type as
+             - The first string in the map key specifies the global memory
+               access type as
                *consecutive*, *nonconsecutive*, or *uniform*.
 
-             - The second string in the map key specifies the DRAM access type as a
+             - The second string in the map key specifies the global memory
+               access type as a
                *load*, or a *store*.
 
              - The :class:`islpy.PwQPolynomial` holds the number of DRAM accesses
@@ -514,7 +517,7 @@ def get_gmem_access_poly(knl):  # for now just counting subscripts
             for key, val in six.iteritems(subs_assignee.dict)))
 
         subs_poly = subs_poly + (subs_expr + subs_assignee)*count(knl, domain)
-    return subs_poly
+    return subs_poly.dict
 
 
 def get_DRAM_access_poly(knl):
@@ -524,6 +527,27 @@ def get_DRAM_access_poly(knl):
     return get_gmem_access_poly(knl)
 
 
+def sum_mem_access_to_bytes(m):
+    """Sum the mapping returned by :func:`get_gmem_access_poly` to a mapping
+
+    **{(** :class:`string` **,** :class:`string` **)**
+    **:** :class:`islpy.PwQPolynomial` **}**
+
+    i.e., aggregate the transfer numbers for all types into a single byte count.
+    """
+
+    result = {}
+    for (dtype, kind, direction), v in m.items():
+        new_key = (kind, direction)
+        bytes_transferred = int(dtype.itemsize) * v
+        if new_key in result:
+            result[new_key] += bytes_transferred
+        else:
+            result[new_key] = bytes_transferred
+
+    return result
+
+
 def get_barrier_poly(knl):
 
     """Count the number of barriers each thread encounters in a loopy kernel.
@@ -571,4 +595,4 @@ def get_barrier_poly(knl):
             else:
                 barrier_poly += isl.PwQPolynomial('{ 1 }')
 
-    return barrier_poly
+    return barrier_poly.dict
-- 
GitLab