From 8406772c33b86457a5a72ddbde71de890d404fbe Mon Sep 17 00:00:00 2001
From: jdsteve2 <jdsteve2@illinois.edu>
Date: Tue, 20 Feb 2018 03:40:54 -0600
Subject: [PATCH] more subgroup_size and count_granularity doc

---
 loopy/statistics.py | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/loopy/statistics.py b/loopy/statistics.py
index a63ee41ad..0607a769e 100755
--- a/loopy/statistics.py
+++ b/loopy/statistics.py
@@ -572,7 +572,13 @@ class MemAccess(Record):
     .. attribute:: count_granularity
 
        A :class:`str` that specifies whether this operation should be counted
-       once per *work-item*, *sub-group*, or *group*.
+       once per *work-item*, *sub-group*, or *group*. A work-item is a single
+       instance of computation executing on a single processor (think
+       'thread'), a collection of which may be grouped together into a
+       work-group. Each work-group executes on a single compute unit with all
+       work-items within the group sharing local memory. A sub-group is an
+       implementation-dependent grouping of work-items within a work-group,
+       analagous to an NVIDIA CUDA warp.
 
     """
 
@@ -1240,6 +1246,13 @@ def get_op_map(knl, numpy_types=True, count_redundant_work=False,
         (Likely desirable for performance modeling, but undesirable for code
         optimization.)
 
+    :arg subgroup_size: (currently unused) A :class:`int` that specifies the
+        sub-group size. An OpenCL sub-group is an implementation-dependent
+        grouping of work-items within a work-group, analagous to an NVIDIA CUDA
+        warp. subgroup_size is used, e.g., when counting a :class:`MemAccess`
+        whose count_granularity specifies that it should only be counted once
+        per sub-group.
+
     :return: A :class:`ToCountMap` of **{** :class:`Op` **:**
         :class:`islpy.PwQPolynomial` **}**.
 
@@ -1554,6 +1567,13 @@ def get_synchronization_map(knl, subgroup_size=None):
 
     :arg knl: A :class:`loopy.LoopKernel` whose barriers are to be counted.
 
+    :arg subgroup_size: (currently unused) A :class:`int` that specifies the
+        sub-group size. An OpenCL sub-group is an implementation-dependent
+        grouping of work-items within a work-group, analagous to an NVIDIA CUDA
+        warp. subgroup_size is used, e.g., when counting a :class:`MemAccess`
+        whose count_granularity specifies that it should only be counted once
+        per sub-group.
+
     :return: A dictionary mapping each type of synchronization event to a
         :class:`islpy.PwQPolynomial` holding the number of events per work-item.
 
-- 
GitLab