From 9dfeff33502b5585105615f860e6105fe3c70a78 Mon Sep 17 00:00:00 2001 From: James Stevens Date: Wed, 7 Dec 2016 22:08:21 -0600 Subject: [PATCH 1/8] fixed white space --- loopy/statistics.py | 12 ++++----- test/test_statistics.py | 58 ++++++++++++++++++++--------------------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/loopy/statistics.py b/loopy/statistics.py index f363523b8..157eb70d5 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -581,7 +581,7 @@ class ExpressionOpCounter(CombineMapper): map_bitwise_and = map_bitwise_or def map_if(self, expr): - warn_with_kernel(self.knl, "summing_if_branches_ops", + warn_with_kernel(self.knl, "summing_if_branches_ops", "ExpressionOpCounter counting ops as sum of " "if-statement branches.") return self.rec(expr.condition) + self.rec(expr.then) \ @@ -652,7 +652,7 @@ class LocalSubscriptCounter(CombineMapper): if array.is_local: sub_map[MemAccess(mtype='local', dtype=self.type_inf(expr))] = 1 return sub_map + self.rec(expr.index) - + def map_sum(self, expr): if expr.children: return sum(self.rec(child) for child in expr.children) @@ -665,14 +665,14 @@ class LocalSubscriptCounter(CombineMapper): return self.rec(expr.left)+self.rec(expr.right) def map_if(self, expr): - warn_with_kernel(self.knl, "summing_if_branches_lsubs", + warn_with_kernel(self.knl, "summing_if_branches_lsubs", "LocalSubscriptCounter counting LMEM accesses as sum " "of if-statement branches.") return self.rec(expr.condition) + self.rec(expr.then) \ + self.rec(expr.else_) def map_if_positive(self, expr): - warn_with_kernel(self.knl, "summing_ifpos_branches_lsubs", + warn_with_kernel(self.knl, "summing_ifpos_branches_lsubs", "LocalSubscriptCounter counting LMEM accesses as sum " "of if_pos-statement branches.") return self.rec(expr.criterion) + self.rec(expr.then) \ @@ -818,14 +818,14 @@ class GlobalSubscriptCounter(CombineMapper): map_product = map_sum def map_if(self, expr): - warn_with_kernel(self.knl, "summing_if_branches_gsubs", + warn_with_kernel(self.knl, "summing_if_branches_gsubs", "GlobalSubscriptCounter counting GMEM accesses as " "sum of if-statement branches.") return self.rec(expr.condition) + self.rec(expr.then) \ + self.rec(expr.else_) def map_if_positive(self, expr): - warn_with_kernel(self.knl, "summing_ifpos_branches_gsubs", + warn_with_kernel(self.knl, "summing_ifpos_branches_gsubs", "GlobalSubscriptCounter counting GMEM accesses as " "sum of if_pos-statement branches.") return self.rec(expr.criterion) + self.rec(expr.then) \ diff --git a/test/test_statistics.py b/test/test_statistics.py index fb502045c..ed592842d 100644 --- a/test/test_statistics.py +++ b/test/test_statistics.py @@ -61,7 +61,7 @@ def test_op_counter_basic(): assert f32add == f32mul == f32div == n*m*l assert f64mul == n*m assert i32add == n*m*2 - + def test_op_counter_reduction(): @@ -398,24 +398,24 @@ def test_mem_access_counter_bitwise(): m = 256 l = 128 params = {'n': n, 'm': m, 'l': l} - i32 = mem_map[lp.MemAccess('global', np.int32, + i32 = mem_map[lp.MemAccess('global', np.int32, stride=0, direction='load', variable='a') ].eval_with_dict(params) - i32 += mem_map[lp.MemAccess('global', np.int32, + i32 += mem_map[lp.MemAccess('global', np.int32, stride=0, direction='load', variable='b') ].eval_with_dict(params) - i32 += mem_map[lp.MemAccess('global', np.int32, + i32 += mem_map[lp.MemAccess('global', np.int32, stride=0, direction='load', variable='g') ].eval_with_dict(params) - i32 += mem_map[lp.MemAccess('global', np.dtype(np.int32), + i32 += mem_map[lp.MemAccess('global', np.dtype(np.int32), stride=0, direction='load', variable='h') ].eval_with_dict(params) assert i32 == 4*n*m+2*n*m*l - i32 = mem_map[lp.MemAccess('global', np.int32, + i32 = mem_map[lp.MemAccess('global', np.int32, stride=0, direction='store', variable='c') ].eval_with_dict(params) - i32 += mem_map[lp.MemAccess('global', np.int32, + i32 += mem_map[lp.MemAccess('global', np.int32, stride=0, direction='store', variable='e') ].eval_with_dict(params) assert i32 == n*m+n*m*l @@ -444,20 +444,20 @@ def test_mem_access_counter_mixed(): m = 256 l = 128 params = {'n': n, 'm': m, 'l': l} - f64uniform = mem_map[lp.MemAccess('global', np.float64, + f64uniform = mem_map[lp.MemAccess('global', np.float64, stride=0, direction='load', variable='g') ].eval_with_dict(params) - f64uniform += mem_map[lp.MemAccess('global', np.float64, + f64uniform += mem_map[lp.MemAccess('global', np.float64, stride=0, direction='load', variable='h') ].eval_with_dict(params) - f32uniform = mem_map[lp.MemAccess('global', np.float32, + f32uniform = mem_map[lp.MemAccess('global', np.float32, stride=0, direction='load', variable='x') ].eval_with_dict(params) - f32nonconsec = mem_map[lp.MemAccess('global', np.dtype(np.float32), + f32nonconsec = mem_map[lp.MemAccess('global', np.dtype(np.float32), stride=Variable('m'), direction='load', variable='a') ].eval_with_dict(params) - f32nonconsec += mem_map[lp.MemAccess('global', np.dtype(np.float32), + f32nonconsec += mem_map[lp.MemAccess('global', np.dtype(np.float32), stride=Variable('m'), direction='load', variable='b') ].eval_with_dict(params) @@ -465,10 +465,10 @@ def test_mem_access_counter_mixed(): assert f32uniform == n*m*l/threads assert f32nonconsec == 3*n*m*l - f64uniform = mem_map[lp.MemAccess('global', np.float64, + f64uniform = mem_map[lp.MemAccess('global', np.float64, stride=0, direction='store', variable='e') ].eval_with_dict(params) - f32nonconsec = mem_map[lp.MemAccess('global', np.float32, + f32nonconsec = mem_map[lp.MemAccess('global', np.float32, stride=Variable('m'), direction='store', variable='c') ].eval_with_dict(params) @@ -497,30 +497,30 @@ def test_mem_access_counter_nonconsec(): m = 256 l = 128 params = {'n': n, 'm': m, 'l': l} - f64nonconsec = mem_map[lp.MemAccess('global', np.float64, + f64nonconsec = mem_map[lp.MemAccess('global', np.float64, stride=Variable('m'), direction='load', variable='g') ].eval_with_dict(params) - f64nonconsec += mem_map[lp.MemAccess('global', np.float64, + f64nonconsec += mem_map[lp.MemAccess('global', np.float64, stride=Variable('m'), direction='load', variable='h') ].eval_with_dict(params) - f32nonconsec = mem_map[lp.MemAccess('global', np.dtype(np.float32), + f32nonconsec = mem_map[lp.MemAccess('global', np.dtype(np.float32), stride=Variable('m')*Variable('l'), direction='load', variable='a') ].eval_with_dict(params) - f32nonconsec += mem_map[lp.MemAccess('global', np.dtype(np.float32), + f32nonconsec += mem_map[lp.MemAccess('global', np.dtype(np.float32), stride=Variable('m')*Variable('l'), direction='load', variable='b') ].eval_with_dict(params) assert f64nonconsec == 2*n*m assert f32nonconsec == 3*n*m*l - f64nonconsec = mem_map[lp.MemAccess('global', np.float64, + f64nonconsec = mem_map[lp.MemAccess('global', np.float64, stride=Variable('m'), direction='store', variable='e') ].eval_with_dict(params) - f32nonconsec = mem_map[lp.MemAccess('global', np.float32, + f32nonconsec = mem_map[lp.MemAccess('global', np.float32, stride=Variable('m')*Variable('l'), direction='store', variable='c') ].eval_with_dict(params) @@ -552,25 +552,25 @@ def test_mem_access_counter_consec(): #for k in mem_map: # print(k.mtype, k.dtype, type(k.dtype), k.stride, k.direction, k.variable, " :\n", mem_map[k]) - f64consec = mem_map[lp.MemAccess('global', np.float64, + f64consec = mem_map[lp.MemAccess('global', np.float64, stride=1, direction='load', variable='g') ].eval_with_dict(params) - f64consec += mem_map[lp.MemAccess('global', np.float64, + f64consec += mem_map[lp.MemAccess('global', np.float64, stride=1, direction='load', variable='h') ].eval_with_dict(params) - f32consec = mem_map[lp.MemAccess('global', np.float32, + f32consec = mem_map[lp.MemAccess('global', np.float32, stride=1, direction='load', variable='a') ].eval_with_dict(params) - f32consec += mem_map[lp.MemAccess('global', np.dtype(np.float32), + f32consec += mem_map[lp.MemAccess('global', np.dtype(np.float32), stride=1, direction='load', variable='b') ].eval_with_dict(params) assert f64consec == 2*n*m assert f32consec == 3*n*m*l - f64consec = mem_map[lp.MemAccess('global', np.float64, + f64consec = mem_map[lp.MemAccess('global', np.float64, stride=1, direction='store', variable='e') ].eval_with_dict(params) - f32consec = mem_map[lp.MemAccess('global', np.float32, + f32consec = mem_map[lp.MemAccess('global', np.float32, stride=1, direction='store', variable='c') ].eval_with_dict(params) assert f64consec == n*m @@ -670,16 +670,16 @@ def test_all_counters_parallel_matmul(): op_map = lp.get_mem_access_map(knl) - f32coal = op_map[lp.MemAccess('global', np.float32, + f32coal = op_map[lp.MemAccess('global', np.float32, stride=1, direction='load', variable='b') ].eval_with_dict(params) - f32coal += op_map[lp.MemAccess('global', np.float32, + f32coal += op_map[lp.MemAccess('global', np.float32, stride=1, direction='load', variable='a') ].eval_with_dict(params) assert f32coal == n*m+m*l - f32coal = op_map[lp.MemAccess('global', np.float32, + f32coal = op_map[lp.MemAccess('global', np.float32, stride=1, direction='store', variable='c') ].eval_with_dict(params) -- GitLab From e5d2c3ad00460bef313f779a2453ce693e2dbfd0 Mon Sep 17 00:00:00 2001 From: James Stevens Date: Wed, 7 Dec 2016 22:43:36 -0600 Subject: [PATCH 2/8] fixing flagged style problems --- loopy/statistics.py | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/loopy/statistics.py b/loopy/statistics.py index 157eb70d5..fde8643bf 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -25,8 +25,6 @@ THE SOFTWARE. import six import loopy as lp -import numpy as np -import warnings from islpy import dim_type import islpy as isl from pytools import memoize_in @@ -319,7 +317,6 @@ class ToCountMap(object): return result - def sum(self): """Add all counts in ToCountMap. @@ -335,7 +332,6 @@ class ToCountMap(object): total += v return total - def eval_and_sum(self, params): """Add all counts in :class:`ToCountMap` and evaluate with provided parameter dict. @@ -443,7 +439,8 @@ class MemAccess(object): """ - def __init__(self, mtype=None, dtype=None, stride=None, direction=None, variable=None): + def __init__(self, mtype=None, dtype=None, stride=None, direction=None, + variable=None): self.mtype = mtype self.stride = stride self.direction = direction @@ -501,8 +498,8 @@ class MemAccess(object): variable = 'None' else: variable = self.variable - return "MemAccess("+mtype+", "+dtype+", "+stride+", "+direction+", " \ - +variable+")" + return "MemAccess(" + mtype + ", " + dtype + ", " + stride + ", " \ + + direction + ", " + variable + ")" # {{{ ExpressionOpCounter @@ -574,8 +571,8 @@ class ExpressionOpCounter(CombineMapper): def map_bitwise_or(self, expr): return ToCountMap({Op(dtype=self.type_inf(expr), name='bw'): - len(expr.children)-1} - ) + sum(self.rec(child) for child in expr.children) + len(expr.children)-1}) \ + + sum(self.rec(child) for child in expr.children) map_bitwise_xor = map_bitwise_or map_bitwise_and = map_bitwise_or @@ -596,8 +593,8 @@ class ExpressionOpCounter(CombineMapper): def map_min(self, expr): return ToCountMap({Op(dtype=self.type_inf(expr), name='maxmin'): - len(expr.children)-1} - ) + sum(self.rec(child) for child in expr.children) + len(expr.children)-1}) \ + + sum(self.rec(child) for child in expr.children) map_max = map_min @@ -739,7 +736,7 @@ class GlobalSubscriptCounter(CombineMapper): index = (index,) from loopy.symbolic import get_dependencies - from loopy.kernel.data import LocalIndexTag, GroupIndexTag + from loopy.kernel.data import LocalIndexTag my_inames = get_dependencies(index) & self.knl.all_inames() # find min tag axis @@ -758,7 +755,7 @@ class GlobalSubscriptCounter(CombineMapper): return ToCountMap({MemAccess(mtype='global', dtype=self.type_inf(expr), stride=0, variable=name): 1} - ) + self.rec(expr.index) + ) + self.rec(expr.index) if min_tag_axis != 0: warn_with_kernel(self.knl, "unknown_gmem_stride", @@ -768,7 +765,7 @@ class GlobalSubscriptCounter(CombineMapper): return ToCountMap({MemAccess(mtype='global', dtype=self.type_inf(expr), stride=sys.maxsize, variable=name): 1} - ) + self.rec(expr.index) + ) + self.rec(expr.index) # get local_id associated with minimum tag axis min_lid = None @@ -807,7 +804,7 @@ class GlobalSubscriptCounter(CombineMapper): return ToCountMap({MemAccess(mtype='global', dtype=self.type_inf(expr), stride=total_stride, variable=name): 1} - ) + self.rec(expr.index) + ) + self.rec(expr.index) def map_sum(self, expr): if expr.children: @@ -1203,8 +1200,7 @@ def get_mem_access_map(knl, numpy_types=True): if uniform: from loopy.kernel.data import LocalIndexTag insn_inames = [iname for iname in insn_inames if not - isinstance( - knl.iname_to_tag.get(iname), LocalIndexTag)] + isinstance(knl.iname_to_tag.get(iname), LocalIndexTag)] inames_domain = knl.get_inames_domain(insn_inames) domain = (inames_domain.project_out_except( insn_inames, [dim_type.set])) @@ -1227,7 +1223,7 @@ def get_mem_access_map(knl, numpy_types=True): subs_expr[MemAccess(mtype=key.mtype, dtype=key.dtype, stride=key.stride, direction='load', variable=key.variable) - ] = subs_expr.pop(key) + ] = subs_expr.pop(key) subs_assignee_g = subs_counter_g(insn.assignee) for key in subs_assignee_g.count_map: @@ -1235,7 +1231,7 @@ def get_mem_access_map(knl, numpy_types=True): stride=key.stride, direction='store', variable=key.variable) - ] = subs_assignee_g.pop(key) + ] = subs_assignee_g.pop(key) # for now, don't count writes to local mem insn_inames = knl.insn_inames(insn) @@ -1243,7 +1239,9 @@ def get_mem_access_map(knl, numpy_types=True): # use count excluding local index tags for uniform accesses for key in subs_expr.count_map: map = ToCountMap({key: subs_expr[key]}) - if key.mtype == 'global' and isinstance(key.stride, int) and key.stride == 0: + if (key.mtype == 'global' and + isinstance(key.stride, int) and + key.stride == 0): subs_map = subs_map \ + map*get_insn_count(knl, insn_inames, True) else: @@ -1264,8 +1262,8 @@ def get_mem_access_map(knl, numpy_types=True): dtype=mem_access.dtype.numpy_dtype, stride=mem_access.stride, direction=mem_access.direction, - variable=mem_access.variable) - , count) + variable=mem_access.variable), + count) for mem_access, count in six.iteritems(subs_map.count_map)) return subs_map -- GitLab From 9a19d4f8059e9676b37bc99aad1c2fa192a69b75 Mon Sep 17 00:00:00 2001 From: James Stevens Date: Wed, 7 Dec 2016 22:52:06 -0600 Subject: [PATCH 3/8] fixing flagged style problems --- test/test_statistics.py | 128 ++++++++++++++++++++-------------------- 1 file changed, 65 insertions(+), 63 deletions(-) diff --git a/test/test_statistics.py b/test/test_statistics.py index ed592842d..13f0474e8 100644 --- a/test/test_statistics.py +++ b/test/test_statistics.py @@ -33,6 +33,7 @@ import numpy as np from pymbolic.primitives import Variable + def test_op_counter_basic(): knl = lp.make_kernel( @@ -235,25 +236,25 @@ def test_mem_access_counter_basic(): params = {'n': n, 'm': m, 'l': l} f32l = mem_map[lp.MemAccess('global', np.float32, stride=0, direction='load', variable='a') - ].eval_with_dict(params) + ].eval_with_dict(params) f32l += mem_map[lp.MemAccess('global', np.float32, stride=0, direction='load', variable='b') - ].eval_with_dict(params) + ].eval_with_dict(params) f64l = mem_map[lp.MemAccess('global', np.float64, stride=0, direction='load', variable='g') - ].eval_with_dict(params) + ].eval_with_dict(params) f64l += mem_map[lp.MemAccess('global', np.float64, stride=0, direction='load', variable='h') - ].eval_with_dict(params) + ].eval_with_dict(params) assert f32l == 3*n*m*l assert f64l == 2*n*m f32s = mem_map[lp.MemAccess('global', np.dtype(np.float32), stride=0, direction='store', variable='c') - ].eval_with_dict(params) + ].eval_with_dict(params) f64s = mem_map[lp.MemAccess('global', np.dtype(np.float64), stride=0, direction='store', variable='e') - ].eval_with_dict(params) + ].eval_with_dict(params) assert f32s == n*m*l assert f64s == n*m @@ -275,21 +276,21 @@ def test_mem_access_counter_reduction(): params = {'n': n, 'm': m, 'l': l} f32l = mem_map[lp.MemAccess('global', np.float32, stride=0, direction='load', variable='a') - ].eval_with_dict(params) + ].eval_with_dict(params) f32l += mem_map[lp.MemAccess('global', np.float32, stride=0, direction='load', variable='b') - ].eval_with_dict(params) + ].eval_with_dict(params) assert f32l == 2*n*m*l f32s = mem_map[lp.MemAccess('global', np.dtype(np.float32), stride=0, direction='store', variable='c') - ].eval_with_dict(params) + ].eval_with_dict(params) assert f32s == n*l ld_bytes = mem_map.filter_by(mtype=['global'], direction=['load'] - ).to_bytes().eval_and_sum(params) + ).to_bytes().eval_and_sum(params) st_bytes = mem_map.filter_by(mtype=['global'], direction=['store'] - ).to_bytes().eval_and_sum(params) + ).to_bytes().eval_and_sum(params) assert ld_bytes == 4*f32l assert st_bytes == 4*f32s @@ -316,13 +317,13 @@ def test_mem_access_counter_logic(): f32_g_l = reduced_map[lp.MemAccess('global', to_loopy_type(np.float32), direction='load') - ].eval_with_dict(params) + ].eval_with_dict(params) f64_g_l = reduced_map[lp.MemAccess('global', to_loopy_type(np.float64), direction='load') - ].eval_with_dict(params) + ].eval_with_dict(params) f64_g_s = reduced_map[lp.MemAccess('global', to_loopy_type(np.float64), direction='store') - ].eval_with_dict(params) + ].eval_with_dict(params) assert f32_g_l == 2*n*m assert f64_g_l == n*m assert f64_g_s == n*m @@ -349,33 +350,34 @@ def test_mem_access_counter_specialops(): params = {'n': n, 'm': m, 'l': l} f32 = mem_map[lp.MemAccess('global', np.float32, stride=0, direction='load', variable='a') - ].eval_with_dict(params) + ].eval_with_dict(params) f32 += mem_map[lp.MemAccess('global', np.float32, stride=0, direction='load', variable='b') - ].eval_with_dict(params) + ].eval_with_dict(params) f64 = mem_map[lp.MemAccess('global', np.dtype(np.float64), stride=0, direction='load', variable='g') - ].eval_with_dict(params) + ].eval_with_dict(params) f64 += mem_map[lp.MemAccess('global', np.dtype(np.float64), stride=0, direction='load', variable='h') - ].eval_with_dict(params) + ].eval_with_dict(params) assert f32 == 2*n*m*l assert f64 == 2*n*m f32 = mem_map[lp.MemAccess('global', np.float32, stride=0, direction='store', variable='c') - ].eval_with_dict(params) + ].eval_with_dict(params) f64 = mem_map[lp.MemAccess('global', np.float64, stride=0, direction='store', variable='e') - ].eval_with_dict(params) + ].eval_with_dict(params) assert f32 == n*m*l assert f64 == n*m - filtered_map = mem_map.filter_by(direction=['load'], variable=['a','g']) + filtered_map = mem_map.filter_by(direction=['load'], variable=['a', 'g']) #tot = lp.eval_and_sum_polys(filtered_map, params) tot = filtered_map.eval_and_sum(params) assert tot == n*m*l + n*m + def test_mem_access_counter_bitwise(): knl = lp.make_kernel( @@ -400,24 +402,24 @@ def test_mem_access_counter_bitwise(): params = {'n': n, 'm': m, 'l': l} i32 = mem_map[lp.MemAccess('global', np.int32, stride=0, direction='load', variable='a') - ].eval_with_dict(params) + ].eval_with_dict(params) i32 += mem_map[lp.MemAccess('global', np.int32, stride=0, direction='load', variable='b') - ].eval_with_dict(params) + ].eval_with_dict(params) i32 += mem_map[lp.MemAccess('global', np.int32, stride=0, direction='load', variable='g') - ].eval_with_dict(params) + ].eval_with_dict(params) i32 += mem_map[lp.MemAccess('global', np.dtype(np.int32), stride=0, direction='load', variable='h') - ].eval_with_dict(params) + ].eval_with_dict(params) assert i32 == 4*n*m+2*n*m*l i32 = mem_map[lp.MemAccess('global', np.int32, stride=0, direction='store', variable='c') - ].eval_with_dict(params) + ].eval_with_dict(params) i32 += mem_map[lp.MemAccess('global', np.int32, stride=0, direction='store', variable='e') - ].eval_with_dict(params) + ].eval_with_dict(params) assert i32 == n*m+n*m*l @@ -446,32 +448,32 @@ def test_mem_access_counter_mixed(): params = {'n': n, 'm': m, 'l': l} f64uniform = mem_map[lp.MemAccess('global', np.float64, stride=0, direction='load', variable='g') - ].eval_with_dict(params) + ].eval_with_dict(params) f64uniform += mem_map[lp.MemAccess('global', np.float64, stride=0, direction='load', variable='h') - ].eval_with_dict(params) + ].eval_with_dict(params) f32uniform = mem_map[lp.MemAccess('global', np.float32, stride=0, direction='load', variable='x') - ].eval_with_dict(params) + ].eval_with_dict(params) f32nonconsec = mem_map[lp.MemAccess('global', np.dtype(np.float32), stride=Variable('m'), direction='load', variable='a') - ].eval_with_dict(params) + ].eval_with_dict(params) f32nonconsec += mem_map[lp.MemAccess('global', np.dtype(np.float32), stride=Variable('m'), direction='load', variable='b') - ].eval_with_dict(params) + ].eval_with_dict(params) assert f64uniform == 2*n*m assert f32uniform == n*m*l/threads assert f32nonconsec == 3*n*m*l f64uniform = mem_map[lp.MemAccess('global', np.float64, stride=0, direction='store', variable='e') - ].eval_with_dict(params) + ].eval_with_dict(params) f32nonconsec = mem_map[lp.MemAccess('global', np.float32, stride=Variable('m'), direction='store', variable='c') - ].eval_with_dict(params) + ].eval_with_dict(params) assert f64uniform == n*m assert f32nonconsec == n*m*l @@ -500,30 +502,30 @@ def test_mem_access_counter_nonconsec(): f64nonconsec = mem_map[lp.MemAccess('global', np.float64, stride=Variable('m'), direction='load', variable='g') - ].eval_with_dict(params) + ].eval_with_dict(params) f64nonconsec += mem_map[lp.MemAccess('global', np.float64, stride=Variable('m'), direction='load', variable='h') - ].eval_with_dict(params) + ].eval_with_dict(params) f32nonconsec = mem_map[lp.MemAccess('global', np.dtype(np.float32), stride=Variable('m')*Variable('l'), direction='load', variable='a') - ].eval_with_dict(params) + ].eval_with_dict(params) f32nonconsec += mem_map[lp.MemAccess('global', np.dtype(np.float32), stride=Variable('m')*Variable('l'), direction='load', variable='b') - ].eval_with_dict(params) + ].eval_with_dict(params) assert f64nonconsec == 2*n*m assert f32nonconsec == 3*n*m*l f64nonconsec = mem_map[lp.MemAccess('global', np.float64, stride=Variable('m'), direction='store', variable='e') - ].eval_with_dict(params) + ].eval_with_dict(params) f32nonconsec = mem_map[lp.MemAccess('global', np.float32, stride=Variable('m')*Variable('l'), direction='store', variable='c') - ].eval_with_dict(params) + ].eval_with_dict(params) assert f64nonconsec == n*m assert f32nonconsec == n*m*l @@ -549,30 +551,27 @@ def test_mem_access_counter_consec(): l = 128 params = {'n': n, 'm': m, 'l': l} - #for k in mem_map: - # print(k.mtype, k.dtype, type(k.dtype), k.stride, k.direction, k.variable, " :\n", mem_map[k]) - f64consec = mem_map[lp.MemAccess('global', np.float64, stride=1, direction='load', variable='g') - ].eval_with_dict(params) + ].eval_with_dict(params) f64consec += mem_map[lp.MemAccess('global', np.float64, stride=1, direction='load', variable='h') - ].eval_with_dict(params) + ].eval_with_dict(params) f32consec = mem_map[lp.MemAccess('global', np.float32, stride=1, direction='load', variable='a') - ].eval_with_dict(params) + ].eval_with_dict(params) f32consec += mem_map[lp.MemAccess('global', np.dtype(np.float32), stride=1, direction='load', variable='b') - ].eval_with_dict(params) + ].eval_with_dict(params) assert f64consec == 2*n*m assert f32consec == 3*n*m*l f64consec = mem_map[lp.MemAccess('global', np.float64, stride=1, direction='store', variable='e') - ].eval_with_dict(params) + ].eval_with_dict(params) f32consec = mem_map[lp.MemAccess('global', np.float32, stride=1, direction='store', variable='c') - ].eval_with_dict(params) + ].eval_with_dict(params) assert f64consec == n*m assert f32consec == n*m*l @@ -671,26 +670,27 @@ def test_all_counters_parallel_matmul(): op_map = lp.get_mem_access_map(knl) f32coal = op_map[lp.MemAccess('global', np.float32, - stride=1, direction='load', variable='b') - ].eval_with_dict(params) + stride=1, direction='load', variable='b') + ].eval_with_dict(params) f32coal += op_map[lp.MemAccess('global', np.float32, - stride=1, direction='load', variable='a') - ].eval_with_dict(params) + stride=1, direction='load', variable='a') + ].eval_with_dict(params) assert f32coal == n*m+m*l f32coal = op_map[lp.MemAccess('global', np.float32, - stride=1, direction='store', variable='c') - ].eval_with_dict(params) + stride=1, direction='store', variable='c') + ].eval_with_dict(params) assert f32coal == n*l local_mem_map = lp.get_mem_access_map(knl).filter_by(mtype=['local']) local_mem_l = local_mem_map[lp.MemAccess('local', np.dtype(np.float32), - direction='load') - ].eval_with_dict(params) + direction='load') + ].eval_with_dict(params) assert local_mem_l == n*m*l*2 + def test_gather_access_footprint(): knl = lp.make_kernel( "{[i,k,j]: 0<=i,j,k Date: Sat, 10 Dec 2016 18:51:14 -0600 Subject: [PATCH 4/8] added get member function --- loopy/statistics.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/loopy/statistics.py b/loopy/statistics.py index fde8643bf..990744248 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -112,6 +112,9 @@ class ToCountMap(object): def __len__(self): return len(self.count_map) + def get(self, key, default=None): + return self.count_map.get(key, default) + def items(self): return self.count_map.items() -- GitLab From d6e4d1a7c837569dd58e99609b6dff933b8d107e Mon Sep 17 00:00:00 2001 From: James Stevens Date: Sun, 11 Dec 2016 02:16:47 -0600 Subject: [PATCH 5/8] added val_type and eval member function to ToCountMap --- loopy/statistics.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/loopy/statistics.py b/loopy/statistics.py index 990744248..e644886f3 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -70,6 +70,7 @@ class ToCountMap(object): if init_dict is None: init_dict = {} self.count_map = init_dict + self.val_type = isl.PwQPolynomial def __add__(self, other): result = self.count_map.copy() @@ -101,7 +102,11 @@ class ToCountMap(object): try: return self.count_map[index] except KeyError: - return isl.PwQPolynomial('{ 0 }') + #TODO what is the best way to handle this? + if self.val_type is isl.PwQPolynomial: + return isl.PwQPolynomial('{ 0 }') + else: + return 0 def __setitem__(self, index, value): self.count_map[index] = value @@ -318,23 +323,36 @@ class ToCountMap(object): bytes_processed = int(key.dtype.itemsize) * val result[key] = bytes_processed + #TODO again, is this okay? + result.val_type = int + return result def sum(self): """Add all counts in ToCountMap. - :return: A :class:`islpy.PwQPolynomial` containing the sum of counts. + :return: A :class:`islpy.PwQPolynomial` or :class:`int` containing the sum of + counts. """ - total = isl.PwQPolynomial('{ 0 }') + + if self.val_type is isl.PwQPolynomial: + total = isl.PwQPolynomial('{ 0 }') + else: + total = 0 + for k, v in self.items(): - if not isinstance(v, isl.PwQPolynomial): - raise ValueError("ToCountMap: sum() encountered type {0} but " - "may only be used on PwQPolynomials." - .format(type(v))) total += v return total + #TODO test and document + def eval(self, params): + result = self.copy() + for key, val in self.items(): + result[key] = val.eval_with_dict(params) + result.val_type = int + return result + def eval_and_sum(self, params): """Add all counts in :class:`ToCountMap` and evaluate with provided parameter dict. -- GitLab From 8d118e18d3769c369361654e4e94f44734df65a3 Mon Sep 17 00:00:00 2001 From: James Stevens Date: Sun, 11 Dec 2016 03:33:39 -0600 Subject: [PATCH 6/8] set val_type in places where it was missing --- loopy/statistics.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/loopy/statistics.py b/loopy/statistics.py index e644886f3..cb15eb554 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -66,17 +66,17 @@ class ToCountMap(object): """ - def __init__(self, init_dict=None): + def __init__(self, init_dict=None, val_type=isl.PwQPolynomial): if init_dict is None: init_dict = {} self.count_map = init_dict - self.val_type = isl.PwQPolynomial + self.val_type = val_type def __add__(self, other): result = self.count_map.copy() for k, v in six.iteritems(other.count_map): result[k] = self.count_map.get(k, 0) + v - return ToCountMap(result) + return ToCountMap(result, self.val_type) def __radd__(self, other): if other != 0: @@ -130,7 +130,7 @@ class ToCountMap(object): return self.count_map.pop(item) def copy(self): - return ToCountMap(dict(self.count_map)) + return ToCountMap(dict(self.count_map), self.val_type) def filter_by(self, **kwargs): """Remove items without specified key fields. @@ -157,7 +157,7 @@ class ToCountMap(object): """ - result_map = ToCountMap() + result_map = ToCountMap(val_type=self.val_type) from loopy.types import to_loopy_type if 'dtype' in kwargs.keys(): @@ -205,7 +205,7 @@ class ToCountMap(object): """ - result_map = ToCountMap() + result_map = ToCountMap(val_type=self.val_type) # for each item in self.count_map, call func on the key for self_key, self_val in self.items(): @@ -260,7 +260,7 @@ class ToCountMap(object): """ - result_map = ToCountMap() + result_map = ToCountMap(val_type=self.val_type) # make sure all item keys have same type if self.count_map: -- GitLab From ee9811db42098c3c3433e1a203f0e7cfa772e3cc Mon Sep 17 00:00:00 2001 From: James Stevens Date: Tue, 2 May 2017 19:10:35 -0500 Subject: [PATCH 7/8] added assumptions to precompute check --- loopy/isl_helpers.py | 4 ++++ loopy/transform/precompute.py | 19 +++++++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py index 0ebe90fbc..36fbb49f4 100644 --- a/loopy/isl_helpers.py +++ b/loopy/isl_helpers.py @@ -594,6 +594,10 @@ def get_simple_strides(bset, key_by="name"): """ result = {} + comp_div_set_pieces = convexify(bset.compute_divs()).get_basic_sets() + assert len(comp_div_set_pieces) == 1 + bset, = comp_div_set_pieces + lspace = bset.get_local_space() for idiv in range(lspace.dim(dim_type.div)): div = lspace.get_div(idiv) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index a19e06ecd..5b208d0a4 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -681,12 +681,18 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, dt, dim_idx = var_dict[primed_non1_saxis_names[i]] mod_domain = mod_domain.set_dim_name(dt, dim_idx, saxis) + def add_assumptions(d): + assumption_non_param = isl.BasicSet.from_params(kernel.assumptions) + assumptions, domain = isl.align_two(assumption_non_param, d) + return d & assumptions + # {{{ check that we got the desired domain - check_domain = check_domain.project_out_except( - primed_non1_saxis_names, [isl.dim_type.set]) + check_domain = add_assumptions( + check_domain.project_out_except( + primed_non1_saxis_names, [isl.dim_type.set])) - mod_check_domain = mod_domain + mod_check_domain = add_assumptions(mod_domain) # re-add the prime from the new variable var_dict = mod_check_domain.get_var_dict(isl.dim_type.set) @@ -716,10 +722,11 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, # project out the new names from the modified domain orig_domain_inames = list(domch.domain.get_var_dict(isl.dim_type.set)) - mod_check_domain = mod_domain.project_out_except( - orig_domain_inames, [isl.dim_type.set]) + mod_check_domain = add_assumptions( + mod_domain.project_out_except( + orig_domain_inames, [isl.dim_type.set])) - check_domain = domch.domain + check_domain = add_assumptions(domch.domain) mod_check_domain, check_domain = isl.align_two( mod_check_domain, check_domain) -- GitLab From 6bb1e04e9d5328d6e35dbbfc8c62c8be54b74286 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 11 May 2017 13:28:08 -0500 Subject: [PATCH 8/8] Fix precompute assumption adding --- loopy/transform/precompute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 5b208d0a4..6077332c4 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -684,7 +684,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, def add_assumptions(d): assumption_non_param = isl.BasicSet.from_params(kernel.assumptions) assumptions, domain = isl.align_two(assumption_non_param, d) - return d & assumptions + return assumptions & domain # {{{ check that we got the desired domain -- GitLab