diff --git a/MEMO b/MEMO index 6fa8a95c5c13f13dc8d0a566c4b5aa4f89d1d65b..19e54706ddaf37dd845abd595dcee519df7c3280 100644 --- a/MEMO +++ b/MEMO @@ -39,8 +39,6 @@ Things to consider To-do ^^^^^ -- Automatically generate testing code vs. sequential. - - Fix all tests - Deal with equality constraints. @@ -83,6 +81,8 @@ Future ideas Dealt with ^^^^^^^^^^ +- Automatically generate testing code vs. sequential. + - If isl can prove that all operands are positive, may use '/' instead of 'floor_div'. diff --git a/loopy/__init__.py b/loopy/__init__.py index 451a175835d31d4fd241ce6f3713a63edeecaa98..ba05ddff496701fbd367609a5d08d55510d7c859 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -381,7 +381,7 @@ def join_dimensions(kernel, inames, new_iname=None, tag=AutoFitLocalIndexTag()): # {{{ dimension tag -def tag_dimensions(kernel, iname_to_tag): +def tag_dimensions(kernel, iname_to_tag, force=False): from loopy.kernel import parse_tag iname_to_tag = dict((iname, parse_tag(tag)) @@ -406,7 +406,7 @@ def tag_dimensions(kernel, iname_to_tag): raise ValueError("cannot tag '%s' as parallel--" "iname requires sequential execution" % iname) - if old_tag is not None and (old_tag != new_tag): + if (not force) and old_tag is not None and (old_tag != new_tag): raise RuntimeError("'%s' is already tagged '%s'--cannot retag" % (iname, old_tag)) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 06807c2e101ab946cd9852c756e0dfeb847f07b6..a08c2fb3e75bb09dfc309d70e2d3aa41d01f14f0 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -99,8 +99,10 @@ def realize_reduction(kernel): for iname in ilp_inames: bounds = kernel.get_iname_bounds(iname) + from loopy.symbolic import pw_aff_to_expr ilp_iname_lengths.append( - static_max_of_pw_aff(bounds.size, constants_only=True)) + int(pw_aff_to_expr( + static_max_of_pw_aff(bounds.size, constants_only=True)))) # }}} diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 83fd8bde3df74a8a81ecedc130d2c80450db09b4..a5bc8ea1200f14f41d9df9cb187577126b322de9 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -258,7 +258,7 @@ class ArrayAccessFinder(CombineMapper): # {{{ C code mapper class LoopyCCodeMapper(CCodeMapper): - def __init__(self, kernel, cse_name_list=[], var_subst_map={}): + def __init__(self, kernel, cse_name_list=[], var_subst_map={}, with_annotation=True): def constant_mapper(c): if isinstance(c, float): # FIXME: type-variable @@ -270,6 +270,7 @@ class LoopyCCodeMapper(CCodeMapper): cse_name_list=cse_name_list) self.kernel = kernel + self.with_annotation = with_annotation self.var_subst_map = var_subst_map.copy() def copy(self, var_subst_map=None, cse_name_list=None): @@ -292,8 +293,12 @@ class LoopyCCodeMapper(CCodeMapper): def map_variable(self, expr, prec): if expr.name in self.var_subst_map: - return " /* %s */ %s" % ( - expr.name, self.rec(self.var_subst_map[expr.name], prec)) + if self.with_annotation: + return " /* %s */ %s" % ( + expr.name, + self.rec(self.var_subst_map[expr.name], prec)) + else: + return str(self.rec(self.var_subst_map[expr.name], prec)) else: return CCodeMapper.map_variable(self, expr, prec) diff --git a/test/test_linalg.py b/test/test_linalg.py index 6a968648cbf9fa807bd298ab515a03cff953ecb9..6fd15542d277fb79c309f77f8f1629e2a240f6d9 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -59,7 +59,7 @@ def check_error(refsol, sol): if rel_err > 1e-5 or np.isinf(rel_err) or np.isnan(rel_err): if 1: import matplotlib.pyplot as pt - pt.imshow(refsol-sol) + pt.imshow(refsol-sol, interpolation="nearest") pt.colorbar() pt.show() elif 0: @@ -623,7 +623,7 @@ def test_image_matrix_mul_ilp(ctx_factory): queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) - n = 2*get_suitable_size(ctx) + n = 32 knl = lp.make_kernel(ctx.devices[0], "{[i,j,k]: 0<=i,j,k<%d}" % n, @@ -639,15 +639,18 @@ def test_image_matrix_mul_ilp(ctx_factory): ilp = 4 knl = lp.split_dimension(knl, "i", 2, outer_tag="g.0", inner_tag="l.1") - j_inner_split = 16 + j_inner_split = 2 knl = lp.split_dimension(knl, "j", ilp*j_inner_split, outer_tag="g.1") knl = lp.split_dimension(knl, "j_inner", j_inner_split, outer_tag="ilp", inner_tag="l.0") knl = lp.split_dimension(knl, "k", 2) # conflict-free - knl = lp.add_prefetch(knl, 'a', ["i_inner", "k_inner"]) + #knl = lp.add_prefetch(knl, 'a', ["i_inner", "k_inner"]) knl = lp.add_prefetch(knl, 'b', ["j_inner_outer", "j_inner_inner", "k_inner"], ["b_j_io", "b_j_ii", "b_k_i"]) - knl = lp.join_dimensions(knl, ["b_j_io", "b_j_ii"]) + if 1: + knl = lp.join_dimensions(knl, ["b_j_io", "b_j_ii"]) + else: + knl = lp.tag_dimensions(knl, {"b_j_io": "unr"}, force=True) #print lp.preprocess_kernel(knl) #1/0