From b03a9222826f0dd9b58aaa3b91fe7f4bb55d240a Mon Sep 17 00:00:00 2001 From: jdsteve2 <jdsteve2@illinois.edu> Date: Tue, 20 Feb 2018 03:12:01 -0600 Subject: [PATCH] defined work-item, sub-group, and group --- loopy/statistics.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/loopy/statistics.py b/loopy/statistics.py index 2b5e3876e..d9bbd4b24 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -501,7 +501,12 @@ class Op(Record): .. attribute:: count_granularity A :class:`str` that specifies whether this operation should be counted - once per *work-item*, *sub-group*, or *group*. + once per *work-item*, *sub-group*, or *group*. A work-item is a single + instance of computation executing on a single processor (think 'thread'), + a collection of which may be grouped together into a work-group. Each + work-group executes on a single compute unit with all work-items within + the group sharing local memory. A sub-group is an implementation-dependent + grouping of work-items within a work-group, analagous to an NVIDIA CUDA warp. """ @@ -1320,10 +1325,15 @@ def get_mem_access_map(knl, numpy_types=True, count_redundant_work=False, (Likely desirable for performance modeling, but undesirable for code optimization.) - :arg subgroup_size: A :class:`int` that specifies the sub-group size. This - is used, e.g., when counting a :class:`MemAccess` whose count_granularity - specifies that it should only be counted once per sub-group. The default - sub-group_size is 32. + :arg subgroup_size: A :class:`int` that specifies the sub-group size. An OpenCL + sub-group is an implementation-dependent grouping of work-items within a + work-group, analagous to an NVIDIA CUDA warp. subgroup_size is used, e.g., + when counting a :class:`MemAccess` whose count_granularity specifies that it + should only be counted once per sub-group. If set to None an attempt to find + the sub-group size using the device will be made. A :class:`string` 'guess' + may also be passed as the subgroup_size, in which case get_mem_access_map + will attempt to find the sub-group sizeusing the device and, if + unsuccessful, will make a wild guess. :return: A :class:`ToCountMap` of **{** :class:`MemAccess` **:** :class:`islpy.PwQPolynomial` **}**. -- GitLab