From 6fb2849ef8018e50241ac8b607b5f463ba418354 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Mon, 24 Aug 2015 21:23:59 -0500 Subject: [PATCH] Stats functions: return bare dictionary, export helper functions --- doc/reference.rst | 2 ++ doc/tutorial.rst | 2 +- loopy/__init__.py | 5 +++-- loopy/statistics.py | 46 ++++++++++++++++++++++++++++++++++----------- 4 files changed, 41 insertions(+), 14 deletions(-) diff --git a/doc/reference.rst b/doc/reference.rst index e79f17554..9aa9727cd 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -537,6 +537,8 @@ Obtaining Kernel Statistics .. autofunction:: get_gmem_access_poly +.. autofunction:: sum_mem_access_to_bytes + .. autofunction:: get_barrier_poly .. vim: tw=75:spell diff --git a/doc/tutorial.rst b/doc/tutorial.rst index ca22c2a64..9c8026813 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -1264,7 +1264,7 @@ continue using the kernel from the previous example: >>> from loopy.statistics import get_gmem_access_poly >>> load_store_map = get_gmem_access_poly(knl) - >>> print(load_store_map) + >>> print(lp.stringify_stats_mapping(load_store_map)) (dtype('float32'), 'uniform', 'load') : [n, m, l] -> { 3 * n * m * l : n >= 1 and m >= 1 and l >= 1 } (dtype('float32'), 'uniform', 'store') : [n, m, l] -> { n * m * l : n >= 1 and m >= 1 and l >= 1 } (dtype('float64'), 'uniform', 'load') : [n, m, l] -> { 2 * n * m : n >= 1 and m >= 1 and l >= 1 } diff --git a/loopy/__init__.py b/loopy/__init__.py index 8956856d4..77da1e40e 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -64,7 +64,8 @@ from loopy.preprocess import (preprocess_kernel, realize_reduction, infer_unknown_types) from loopy.schedule import generate_loop_schedules, get_one_scheduled_kernel from loopy.statistics import (get_op_poly, get_gmem_access_poly, - get_DRAM_access_poly, get_barrier_poly) + get_DRAM_access_poly, get_barrier_poly, stringify_stats_mapping, + sum_mem_access_to_bytes) from loopy.codegen import generate_code, generate_body from loopy.compiled import CompiledKernel from loopy.options import Options @@ -105,7 +106,7 @@ __all__ = [ "generate_code", "generate_body", "get_op_poly", "get_gmem_access_poly", "get_DRAM_access_poly", - "get_barrier_poly", + "get_barrier_poly", "stringify_stats_mapping", "sum_mem_access_to_bytes", "CompiledKernel", diff --git a/loopy/statistics.py b/loopy/statistics.py index d25ea3eac..240bef8eb 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -76,16 +76,17 @@ class ToCountMap: except KeyError: return isl.PwQPolynomial('{ 0 }') - def __str__(self): - result = "" - for key in sorted(self.dict.keys(), key=lambda k: str(k)): - result += ("%s : %s\n" % (key, self.dict[key])) - return result - def __repr__(self): return repr(self.dict) +def stringify_stats_mapping(m): + result = "" + for key in sorted(m.keys(), key=lambda k: str(k)): + result += ("%s : %s\n" % (key, m[key])) + return result + + class ExpressionOpCounter(CombineMapper): def __init__(self, knl): @@ -447,7 +448,7 @@ def get_op_poly(knl): domain = (inames_domain.project_out_except(insn_inames, [dim_type.set])) ops = op_counter(insn.assignee) + op_counter(insn.expression) op_poly = op_poly + ops*count(knl, domain) - return op_poly + return op_poly.dict def get_gmem_access_poly(knl): # for now just counting subscripts @@ -462,10 +463,12 @@ def get_gmem_access_poly(knl): # for now just counting subscripts - The :class:`numpy.dtype` specifies the type of the data being accessed. - - The first string in the map key specifies the DRAM access type as + - The first string in the map key specifies the global memory + access type as *consecutive*, *nonconsecutive*, or *uniform*. - - The second string in the map key specifies the DRAM access type as a + - The second string in the map key specifies the global memory + access type as a *load*, or a *store*. - The :class:`islpy.PwQPolynomial` holds the number of DRAM accesses @@ -514,7 +517,7 @@ def get_gmem_access_poly(knl): # for now just counting subscripts for key, val in six.iteritems(subs_assignee.dict))) subs_poly = subs_poly + (subs_expr + subs_assignee)*count(knl, domain) - return subs_poly + return subs_poly.dict def get_DRAM_access_poly(knl): @@ -524,6 +527,27 @@ def get_DRAM_access_poly(knl): return get_gmem_access_poly(knl) +def sum_mem_access_to_bytes(m): + """Sum the mapping returned by :func:`get_gmem_access_poly` to a mapping + + **{(** :class:`string` **,** :class:`string` **)** + **:** :class:`islpy.PwQPolynomial` **}** + + i.e., aggregate the transfer numbers for all types into a single byte count. + """ + + result = {} + for (dtype, kind, direction), v in m.items(): + new_key = (kind, direction) + bytes_transferred = int(dtype.itemsize) * v + if new_key in result: + result[new_key] += bytes_transferred + else: + result[new_key] = bytes_transferred + + return result + + def get_barrier_poly(knl): """Count the number of barriers each thread encounters in a loopy kernel. @@ -571,4 +595,4 @@ def get_barrier_poly(knl): else: barrier_poly += isl.PwQPolynomial('{ 1 }') - return barrier_poly + return barrier_poly.dict -- GitLab