From 9da8f5d3baee9644cef511002cc826abb3aace4e Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Fri, 14 Sep 2018 02:40:58 -0500 Subject: [PATCH 1/3] ignoring denominator of FloorDivs in CoefficientCollector --- loopy/symbolic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 8927cd6fb..298d9167e 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1183,6 +1183,12 @@ class CoefficientCollector(CoefficientCollectorBase): raise ExpressionNotAffineError("cannot gather coefficients--" "indirect addressing in use") + def map_floor_div(self, expr): + from warnings import warn + warn("CoefficientCollector encountered FloorDiv, ignoring denominator in " + "expression %s" % (expr)) + return self.rec(expr.numerator) + # }}} -- GitLab From e8391dbb8f57388d32a92353a23c310d012784c2 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sun, 16 Sep 2018 17:03:57 -0500 Subject: [PATCH 2/3] returned loopy.symbolic.CoefficientCollector to initial state, instead using modified coeff collector (that ignores denom in floor div) locally in stats gatherer; also created corresponding test --- loopy/statistics.py | 17 ++++++++++++++-- loopy/symbolic.py | 6 ------ test/test_statistics.py | 44 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 8 deletions(-) diff --git a/loopy/statistics.py b/loopy/statistics.py index 3fecfb778..898cccaa1 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -32,6 +32,7 @@ from functools import reduce from loopy.kernel.data import ( MultiAssignmentBase, TemporaryVariable, AddressSpace) from loopy.diagnostic import warn_with_kernel, LoopyError +from loopy.symbolic import CoefficientCollector from pytools import Record, memoize_method @@ -830,6 +831,19 @@ class ExpressionOpCounter(CounterBase): # }}} +# {{{ modified coefficient collector that ignores denominator of floor div + +class IndexStrideCoefficientCollector(CoefficientCollector): + + def map_floor_div(self, expr): + from warnings import warn + warn("IndexStrideCoefficientCollector encountered FloorDiv, ignoring " + "denominator in expression %s" % (expr)) + return self.rec(expr.numerator) + +# }}} + + def _get_lid_and_gid_strides(knl, array, index): # find all local and global index tags and corresponding inames from loopy.symbolic import get_dependencies @@ -857,7 +871,6 @@ def _get_lid_and_gid_strides(knl, array, index): # where l0, l1, l2, g0, g1, and g2 come from flattened index # [... + g2*gid2 + g1*gid1 + g0*gid0 + ... + l2*lid2 + l1*lid1 + l0*lid0] - from loopy.symbolic import CoefficientCollector from loopy.kernel.array import FixedStrideArrayDimTag from pymbolic.primitives import Variable from loopy.symbolic import simplify_using_aff @@ -871,7 +884,7 @@ def _get_lid_and_gid_strides(knl, array, index): for idx, axis_tag in zip(index, array.dim_tags): # collect index coefficients try: - coeffs = CoefficientCollector()( + coeffs = IndexStrideCoefficientCollector()( simplify_using_aff(knl, idx)) except ExpressionNotAffineError: total_iname_stride = None diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 298d9167e..8927cd6fb 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1183,12 +1183,6 @@ class CoefficientCollector(CoefficientCollectorBase): raise ExpressionNotAffineError("cannot gather coefficients--" "indirect addressing in use") - def map_floor_div(self, expr): - from warnings import warn - warn("CoefficientCollector encountered FloorDiv, ignoring denominator in " - "expression %s" % (expr)) - return self.rec(expr.numerator) - # }}} diff --git a/test/test_statistics.py b/test/test_statistics.py index 3f2366521..5aec9ca5e 100644 --- a/test/test_statistics.py +++ b/test/test_statistics.py @@ -1057,6 +1057,50 @@ def test_all_counters_parallel_matmul(): assert local_mem_s == m*2/bsize*n_subgroups +def test_floor_div_coefficient_collector(): + bsize = 16 + + # kernel that shuffles local mem + knl = lp.make_kernel( + "{[i_outer,j_outer,i_inner,j_inner,r]: " + "0<=i_outer loc[i_inner,j_inner] = 3.14 {id=loc_init}", + "loc[i_inner,(j_inner+r+4) %% %d] = loc[i_inner,(j_inner+r) %% %d]" + " {id=add,dep=loc_init}" % (bsize, bsize), + "out0[i_outer*16+i_inner,j_outer*16+j_inner] = loc[i_inner,j_inner]" + " {id=store,dep=add}", + "end", + "end", + ], + name="local", + lang_version=(2018, 2)) + + knl = lp.add_and_infer_dtypes(knl, dict(out0=np.float32)) + knl = lp.tag_inames(knl, "i_outer:g.1,i_inner:l.1,j_outer:g.0,j_inner:l.0") + + n = 512 + rept = 64 + params = {"n": n, "rept": rept} + group_size = bsize*bsize + n_workgroups = div_ceil(n, bsize)*div_ceil(n, bsize) + subgroups_per_group = div_ceil(group_size, SGS) + n_subgroups = n_workgroups*subgroups_per_group + + # count local f32 accesses + f32_local = lp.get_mem_access_map( + knl, count_redundant_work=True, subgroup_size=SGS + ).filter_by(dtype=[np.float32], mtype=["local"]).eval_and_sum(params) + + # (count-per-sub-group)*n_subgroups + assert f32_local == 2*(rept+1)*n_subgroups + + def test_gather_access_footprint(): knl = lp.make_kernel( "{[i,k,j]: 0<=i,j,k Date: Sun, 9 Dec 2018 18:45:06 -0600 Subject: [PATCH 3/3] prefixing IndexStrideCoefficientCollector with underscore --- loopy/statistics.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/loopy/statistics.py b/loopy/statistics.py index 715ac9fab..10d29daad 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -846,11 +846,11 @@ class ExpressionOpCounter(CounterBase): # {{{ modified coefficient collector that ignores denominator of floor div -class IndexStrideCoefficientCollector(CoefficientCollector): +class _IndexStrideCoefficientCollector(CoefficientCollector): def map_floor_div(self, expr): from warnings import warn - warn("IndexStrideCoefficientCollector encountered FloorDiv, ignoring " + warn("_IndexStrideCoefficientCollector encountered FloorDiv, ignoring " "denominator in expression %s" % (expr)) return self.rec(expr.numerator) @@ -897,7 +897,7 @@ def _get_lid_and_gid_strides(knl, array, index): for idx, axis_tag in zip(index, array.dim_tags): # collect index coefficients try: - coeffs = IndexStrideCoefficientCollector()( + coeffs = _IndexStrideCoefficientCollector()( simplify_using_aff(knl, idx)) except ExpressionNotAffineError: total_iname_stride = None -- GitLab