diff --git a/doc/reference.rst b/doc/reference.rst index c7435bbf7f84570b6672ba1bf42eeab9d495ea56..eb10788cb07d8fb87cee5ce6edc90e11bb9db5c3 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -514,4 +514,13 @@ Controlling caching .. autoclass:: CacheMode +Obtaining Kernel Statistics +--------------------------- + +.. autofunction:: get_op_poly + +.. autofunction:: get_DRAM_access_poly + +.. autofunction:: get_barrier_poly + .. vim: tw=75:spell diff --git a/loopy/__init__.py b/loopy/__init__.py index c63aa5f90d6537496ea9fe4ecb11c441070c91b4..f6f611f6538911678ecdd12578ca9ab17d0657f2 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -63,6 +63,7 @@ from loopy.padding import (split_arg_axis, find_padding_multiple, from loopy.preprocess import (preprocess_kernel, realize_reduction, infer_unknown_types) from loopy.schedule import generate_loop_schedules, get_one_scheduled_kernel +from loopy.statistics import get_op_poly, get_DRAM_access_poly, get_barrier_poly from loopy.codegen import generate_code, generate_body from loopy.compiled import CompiledKernel from loopy.options import Options @@ -102,6 +103,8 @@ __all__ = [ "generate_loop_schedules", "get_one_scheduled_kernel", "generate_code", "generate_body", + "get_op_poly", "get_DRAM_access_poly", "get_barrier_poly", + "CompiledKernel", "auto_test_vs_ref", diff --git a/loopy/compiled.py b/loopy/compiled.py index da659eaba5e3c7c8c99993946da1e7af5bb399bc..a29f357b08e4383c1e6ea1b9b79db8330e8498a0 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -781,10 +781,11 @@ class _CLKernelInfo(Record): class CompiledKernel: def __init__(self, context, kernel): """ - :arg kernel: may be a loopy.LoopKernel, a generator returning kernels - (a warning will be issued if more than one is returned). If the - kernel has not yet been loop-scheduled, that is done, too, with no + :arg kernel: may be a loopy.LoopKernel, a generator returning kernels \ + (a warning will be issued if more than one is returned). If the \ + kernel has not yet been loop-scheduled, that is done, too, with no \ specific arguments. + """ self.context = context diff --git a/loopy/statistics.py b/loopy/statistics.py index 85ac4c77bdad2573b6a7a033aa0e17221e010d59..5c58d66c8786840c24afa4d9575762b17b96a220 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -403,8 +403,32 @@ def count(kernel, bset): return result -# to evaluate poly: poly.eval_with_dict(dictionary) def get_op_poly(knl): + + """Count the number of operations in a loopy kernel. + + :parameter knl: A :class:`loopy.LoopKernel` whose operations are to be counted. + + :return: A mapping of **{** :class:`numpy.dtype` \ + **:** :class:`islpy.PwQPolynomial` **}**. + + - The :class:`islpy.PwQPolynomial` holds the number of operations for \ + the :class:`numpy.dtype` specified in the key (in terms of \ + the :class:`loopy.LoopKernel` *inames*). + + Example usage:: + + poly = get_op_poly(knl) + n = 512 + m = 256 + l = 128 + float32_op_ct = poly.dict[np.dtype(np.float32)].eval_with_dict( + {'n': n, 'm': m, 'l': l}) + float64_op_ct = poly.dict[np.dtype(np.float64)].eval_with_dict( + {'n': n, 'm': m, 'l': l}) + + """ + from loopy.preprocess import preprocess_kernel, infer_unknown_types knl = infer_unknown_types(knl, expect_completion=True) knl = preprocess_kernel(knl) @@ -423,6 +447,44 @@ def get_op_poly(knl): def get_DRAM_access_poly(knl): # for now just counting subscripts + + """Count the number of DRAM accesses in a loopy kernel. + + :parameter knl: A :class:`loopy.LoopKernel` \ + whose DRAM accesses are to be counted. + + :return: A mapping of **{(** \ + :class:`numpy.dtype` **,** :class:`string` **,** :class:`string` \ + **)** **:** :class:`islpy.PwQPolynomial` **}**. + + - The :class:`numpy.dtype` specifies \ + the type of the data being accessed. + + - The first string in the map key specifies the DRAM access type as \ + *consecutive*, *nonconsecutive*, or *uniform*. + + - The second string in the map key specifies the DRAM access type as \ + a *load*, or a *store*. + + - The :class:`islpy.PwQPolynomial` holds the number of DRAM accesses \ + with the characteristics specified in the key (in terms of the \ + :class:`loopy.LoopKernel` *inames*). + + Example usage:: + + subscript_map = get_DRAM_access_poly(knl) + f32_uncoalesced_load = subscript_map.dict[ + (np.dtype(np.float32), 'nonconsecutive', 'load') + ].eval_with_dict({'n': n, 'm': m, 'l': l}) + f32_coalesced_load = subscript_map.dict[ + (np.dtype(np.float32), 'consecutive', 'load') + ].eval_with_dict({'n': n, 'm': m, 'l': l}) + f32_coalesced_store = subscript_map.dict[ + (np.dtype(np.float32), 'consecutive', 'store') + ].eval_with_dict({'n': n, 'm': m, 'l': l}) + + """ + from loopy.preprocess import preprocess_kernel, infer_unknown_types knl = infer_unknown_types(knl, expect_completion=True) knl = preprocess_kernel(knl) @@ -448,6 +510,27 @@ def get_DRAM_access_poly(knl): # for now just counting subscripts def get_barrier_poly(knl): + + """Count the number of barriers in a loopy kernel. + + :parameter knl: A :class:`loopy.LoopKernel` \ + whose barriers are to be counted. + + :return: An :class:`islpy.PwQPolynomial` holding the number of barrier calls \ + made (in terms of the :class:`loopy.LoopKernel` *inames*). + + Example usage:: + + barrier_poly = get_barrier_poly(knl) + + n = 512 + m = 256 + l = 128 + + barrier_count = barrier_poly.eval_with_dict({'n': n, 'm': m, 'l': l}) + + """ + from loopy.preprocess import preprocess_kernel, infer_unknown_types from loopy.schedule import EnterLoop, LeaveLoop, Barrier from operator import mul