From 8406772c33b86457a5a72ddbde71de890d404fbe Mon Sep 17 00:00:00 2001 From: jdsteve2 <jdsteve2@illinois.edu> Date: Tue, 20 Feb 2018 03:40:54 -0600 Subject: [PATCH] more subgroup_size and count_granularity doc --- loopy/statistics.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/loopy/statistics.py b/loopy/statistics.py index a63ee41ad..0607a769e 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -572,7 +572,13 @@ class MemAccess(Record): .. attribute:: count_granularity A :class:`str` that specifies whether this operation should be counted - once per *work-item*, *sub-group*, or *group*. + once per *work-item*, *sub-group*, or *group*. A work-item is a single + instance of computation executing on a single processor (think + 'thread'), a collection of which may be grouped together into a + work-group. Each work-group executes on a single compute unit with all + work-items within the group sharing local memory. A sub-group is an + implementation-dependent grouping of work-items within a work-group, + analagous to an NVIDIA CUDA warp. """ @@ -1240,6 +1246,13 @@ def get_op_map(knl, numpy_types=True, count_redundant_work=False, (Likely desirable for performance modeling, but undesirable for code optimization.) + :arg subgroup_size: (currently unused) A :class:`int` that specifies the + sub-group size. An OpenCL sub-group is an implementation-dependent + grouping of work-items within a work-group, analagous to an NVIDIA CUDA + warp. subgroup_size is used, e.g., when counting a :class:`MemAccess` + whose count_granularity specifies that it should only be counted once + per sub-group. + :return: A :class:`ToCountMap` of **{** :class:`Op` **:** :class:`islpy.PwQPolynomial` **}**. @@ -1554,6 +1567,13 @@ def get_synchronization_map(knl, subgroup_size=None): :arg knl: A :class:`loopy.LoopKernel` whose barriers are to be counted. + :arg subgroup_size: (currently unused) A :class:`int` that specifies the + sub-group size. An OpenCL sub-group is an implementation-dependent + grouping of work-items within a work-group, analagous to an NVIDIA CUDA + warp. subgroup_size is used, e.g., when counting a :class:`MemAccess` + whose count_granularity specifies that it should only be counted once + per sub-group. + :return: A dictionary mapping each type of synchronization event to a :class:`islpy.PwQPolynomial` holding the number of events per work-item. -- GitLab