From fa73cbbb98ccab67d5d088b7030c32a49ef39ad9 Mon Sep 17 00:00:00 2001 From: Nick Date: Wed, 12 Sep 2018 19:32:44 -0400 Subject: [PATCH 01/27] add predicate-based limiting of access-range to avoid OOB exceptions inside of conditionals --- loopy/check.py | 10 +++++++++ loopy/symbolic.py | 57 +++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 60 insertions(+), 7 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index c31304d87..15dff6ccc 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -331,6 +331,16 @@ class _AccessCheckMapper(WalkMapper): shape_domain = shape_domain.intersect(slab) + insn = self.kernel.id_to_insn[self.insn_id] + if insn.predicates: + from loopy.symbolic import constraints_from_expr + for pred in insn.predicates: + if get_dependencies(pred) == get_dependencies(subscript): + constraints = constraints_from_expr( + self.domain.get_space(), pred) + for constraint in constraints: + access_range = access_range.add_constraint(constraint) + if not access_range.is_subset(shape_domain): raise LoopyError("'%s' in instruction '%s' " "accesses out-of-bounds array element" diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 8927cd6fb..25af1797e 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1339,6 +1339,53 @@ class PwAffEvaluationMapper(EvaluationMapperBase, IdentityMapperMixin): return num.mod_val(denom) +class ConditionalMapper(PwAffEvaluationMapper): + def map_logical_not(self, expr): + constraints = self.rec(expr.child) + out = [] + for constraint in constraints: + negated = constraint.get_aff().neg() + if constraint.is_equality(): + out.append(isl.Constraint.equality_from_aff(negated)) + else: + # since we're flipping a >= need to account for the ='s + val = int(str(constraint.get_constant_val())) + if val > 0: + val = 1 + elif val < 0: + val = -1 + out.append(isl.Constraint.inequality_from_aff(negated + val)) + return out + + def map_logical_and(self, expr): + constraints = [y for ch in expr.children for y in self.rec(ch)] + return constraints + + map_logical_or = map_logical_and + + def map_comparison(self, expr): + left = self.rec(expr.left) + right = self.rec(expr.right) + _, aff = (left - right).get_pieces()[-1] + if expr.operator == "==": + return [isl.Constraint.equality_from_aff(aff)] + elif expr.operator == "!=": + # piecewise + return [isl.Constraint.inequality_from_aff(aff + 1), + isl.Constraint.inequality_from_aff(aff - 1)] + elif expr.operator == "<": + return [isl.Constraint.inequality_from_aff((aff + 1).neg())] + elif expr.operator == "<=": + return [isl.Constraint.inequality_from_aff((aff).neg())] + elif expr.operator == ">": + return [isl.Constraint.inequality_from_aff((aff - 1))] + elif expr.operator == ">=": + return [isl.Constraint.inequality_from_aff((aff))] + else: + raise ValueError("invalid comparison operator") + return left - right + + def aff_from_expr(space, expr, vars_to_zero=None): if vars_to_zero is None: vars_to_zero = frozenset() @@ -1416,14 +1463,10 @@ def simplify_using_aff(kernel, expr): # }}} -# {{{ expression/set <-> constraint conversion - -def eq_constraint_from_expr(space, expr): - return isl.Constraint.equality_from_aff(aff_from_expr(space, expr)) - +# {{{ expression/set <-> constraints conversion -def ineq_constraint_from_expr(space, expr): - return isl.Constraint.inequality_from_aff(aff_from_expr(space, expr)) +def constraints_from_expr(space, expr): + return ConditionalMapper(space, vars_to_zero=[None])(expr) def constraint_to_cond_expr(cns): -- GitLab From 403c4b6fd3b3e5cf08d168e878f406c016b34d1e Mon Sep 17 00:00:00 2001 From: Nick Date: Wed, 12 Sep 2018 19:32:50 -0400 Subject: [PATCH 02/27] test --- test/test_loopy.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/test/test_loopy.py b/test/test_loopy.py index accf9c1df..76f5bdbb8 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2908,6 +2908,51 @@ def test_dep_cycle_printing_and_error(): print(lp.generate_code(knl)[0]) +@pytest.mark.parametrize("op", ['>', '>=', '<', '<=', '==', '!=']) +def test_conditonal_access_range(ctx_factory, op): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + def get_condition(): + if op == '>': + return 'not (i > 7)' + elif op == '>=': + return 'not (i >= 8)' + elif op == '<': + return 'i < 8' + elif op == '<=': + return 'i <=7' + elif op == '==': + return ' or '.join(['i == {}'.format(i) for i in range(8)]) + elif op == '!=': + return ' and '.join(['i != {}'.format(i) for i in range(8, 10)]) + + condition = get_condition() + knl = lp.make_kernel( + "{[i]: 0 <= i < 10}", + """ + if {condition} + tmp[i] = tmp[i] + 1 + end + """.format(condition=condition), + [lp.GlobalArg('tmp', shape=(8,), dtype=np.int64)]) + + assert np.array_equal(knl(queue, tmp=np.arange(8))[1][0], np.arange(1, 9)) + + # and failure + knl = lp.make_kernel( + "{[i,j]: 0 <= i,j < 10}", + """ + if j < 8 + tmp[i] = tmp[i] + end + """, [lp.GlobalArg('tmp', shape=(8,), dtype=np.int32)]) + + from loopy.diagnostic import LoopyError + with pytest.raises(LoopyError): + knl(queue) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From b3231429d5200afcb048ca021dcb038e41927ed2 Mon Sep 17 00:00:00 2001 From: Nick Date: Thu, 13 Sep 2018 09:37:33 -0400 Subject: [PATCH 03/27] implement non-affine check & test --- loopy/check.py | 26 +++++++++++++++++++++----- test/test_loopy.py | 19 +++++++++++++++++-- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 15dff6ccc..facf127bf 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -332,16 +332,32 @@ class _AccessCheckMapper(WalkMapper): shape_domain = shape_domain.intersect(slab) insn = self.kernel.id_to_insn[self.insn_id] + possible_warns = [] if insn.predicates: from loopy.symbolic import constraints_from_expr for pred in insn.predicates: - if get_dependencies(pred) == get_dependencies(subscript): - constraints = constraints_from_expr( - self.domain.get_space(), pred) - for constraint in constraints: - access_range = access_range.add_constraint(constraint) + if get_dependencies(pred) == insn.within_inames: + try: + constraints = constraints_from_expr( + self.domain.get_space(), pred) + for constraint in constraints: + access_range = access_range.add_constraint( + constraint) + except isl.Error: + # non-affine predicate - store for warning if we fail + # this check + possible_warns += [pred] + pass if not access_range.is_subset(shape_domain): + if possible_warns: + from loopy.diagnostic import warn_with_kernel + warn_with_kernel( + self.kernel, "non_affine_predicates", + "Predicates: ({}) are are expressed in a " + "non-affine manner, and were not considered " + "for out-of-bounds array checking.".format( + ', '.join(str(x) for x in possible_warns))) raise LoopyError("'%s' in instruction '%s' " "accesses out-of-bounds array element" % (expr, self.insn_id)) diff --git a/test/test_loopy.py b/test/test_loopy.py index 76f5bdbb8..52a849df7 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2939,7 +2939,9 @@ def test_conditonal_access_range(ctx_factory, op): assert np.array_equal(knl(queue, tmp=np.arange(8))[1][0], np.arange(1, 9)) - # and failure + +def test_conditonal_access_range_failure(ctx_factory): + # predicate doesn't actually limit access_range knl = lp.make_kernel( "{[i,j]: 0 <= i,j < 10}", """ @@ -2950,7 +2952,20 @@ def test_conditonal_access_range(ctx_factory, op): from loopy.diagnostic import LoopyError with pytest.raises(LoopyError): - knl(queue) + lp.generate_code_v2(knl).device_code() + + # predicate non affine + knl = lp.make_kernel( + "{[i,j]: 0 <= i,j < 10}", + """ + if (i+3)*i < 15 + tmp[i] = tmp[i] + end + """, [lp.GlobalArg('tmp', shape=(2,), dtype=np.int32)]) + + from loopy.diagnostic import LoopyError + with pytest.raises(LoopyError): + lp.generate_code_v2(knl).device_code() if __name__ == "__main__": -- GitLab From ea74afba147241eb3501505f6e33f60db67af1d1 Mon Sep 17 00:00:00 2001 From: Nick Date: Thu, 13 Sep 2018 09:47:21 -0400 Subject: [PATCH 04/27] separate the incorrect space error from the non-affine error to avoid incorrect warning --- loopy/check.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index facf127bf..38267f58a 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -340,14 +340,18 @@ class _AccessCheckMapper(WalkMapper): try: constraints = constraints_from_expr( self.domain.get_space(), pred) - for constraint in constraints: - access_range = access_range.add_constraint( - constraint) except isl.Error: # non-affine predicate - store for warning if we fail # this check possible_warns += [pred] - pass + + for constraint in constraints: + try: + access_range = access_range.add_constraint( + constraint) + except isl.Error: + # space doesn't match -- not sure what to do + pass if not access_range.is_subset(shape_domain): if possible_warns: -- GitLab From 1086f6c24d5652cf0b5913cd89e3d4bac48ede05 Mon Sep 17 00:00:00 2001 From: Nick Date: Thu, 13 Sep 2018 10:08:12 -0400 Subject: [PATCH 05/27] fix --- loopy/check.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 38267f58a..6d3d3cc14 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -340,19 +340,20 @@ class _AccessCheckMapper(WalkMapper): try: constraints = constraints_from_expr( self.domain.get_space(), pred) + + for constraint in constraints: + try: + access_range = access_range.add_constraint( + constraint) + except isl.Error: + # space doesn't match -- not sure what to do + pass + except isl.Error: # non-affine predicate - store for warning if we fail # this check possible_warns += [pred] - for constraint in constraints: - try: - access_range = access_range.add_constraint( - constraint) - except isl.Error: - # space doesn't match -- not sure what to do - pass - if not access_range.is_subset(shape_domain): if possible_warns: from loopy.diagnostic import warn_with_kernel -- GitLab From 3def4cdb8ff51eddbcfadb166c2407c737429b17 Mon Sep 17 00:00:00 2001 From: Nick Date: Thu, 13 Sep 2018 12:06:45 -0400 Subject: [PATCH 06/27] apply predicates to access range _before_ calling get_access_range so we ensure we have the same space --- loopy/check.py | 45 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 6d3d3cc14..3d981e34a 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -312,8 +312,28 @@ class _AccessCheckMapper(WalkMapper): expr.aggregate.name, expr, len(subscript), len(shape))) + # apply predicates + access_range = self.domain + insn = self.kernel.id_to_insn[self.insn_id] + possible_warns = [] + if insn.predicates: + from loopy.symbolic import constraints_from_expr + for pred in insn.predicates: + if insn.within_inames <= get_dependencies(pred): + try: + constraints = constraints_from_expr( + self.domain.space, pred) + for constraint in constraints: + access_range = access_range.add_constraint( + constraint) + + except isl.Error: + # non-affine predicate - store for warning if we fail + # this check + possible_warns += [pred] + try: - access_range = get_access_range(self.domain, subscript, + access_range = get_access_range(access_range, subscript, self.kernel.assumptions) except UnableToDetermineAccessRange: # Likely: index was non-affine, nothing we can do. @@ -331,29 +351,6 @@ class _AccessCheckMapper(WalkMapper): shape_domain = shape_domain.intersect(slab) - insn = self.kernel.id_to_insn[self.insn_id] - possible_warns = [] - if insn.predicates: - from loopy.symbolic import constraints_from_expr - for pred in insn.predicates: - if get_dependencies(pred) == insn.within_inames: - try: - constraints = constraints_from_expr( - self.domain.get_space(), pred) - - for constraint in constraints: - try: - access_range = access_range.add_constraint( - constraint) - except isl.Error: - # space doesn't match -- not sure what to do - pass - - except isl.Error: - # non-affine predicate - store for warning if we fail - # this check - possible_warns += [pred] - if not access_range.is_subset(shape_domain): if possible_warns: from loopy.diagnostic import warn_with_kernel -- GitLab From 0c1b360bbcb62c04480d622d76d6408a8ce73e1c Mon Sep 17 00:00:00 2001 From: Nick Date: Thu, 13 Sep 2018 12:07:11 -0400 Subject: [PATCH 07/27] sp. --- test/test_loopy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_loopy.py b/test/test_loopy.py index 52a849df7..95f29f5c6 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2909,7 +2909,7 @@ def test_dep_cycle_printing_and_error(): @pytest.mark.parametrize("op", ['>', '>=', '<', '<=', '==', '!=']) -def test_conditonal_access_range(ctx_factory, op): +def test_conditional_access_range(ctx_factory, op): ctx = ctx_factory() queue = cl.CommandQueue(ctx) @@ -2940,7 +2940,7 @@ def test_conditonal_access_range(ctx_factory, op): assert np.array_equal(knl(queue, tmp=np.arange(8))[1][0], np.arange(1, 9)) -def test_conditonal_access_range_failure(ctx_factory): +def test_conditional_access_range_failure(ctx_factory): # predicate doesn't actually limit access_range knl = lp.make_kernel( "{[i,j]: 0 <= i,j < 10}", -- GitLab From 1c2e95d6bdb57dcc77ac74f65674e331166e7298 Mon Sep 17 00:00:00 2001 From: Nick Date: Thu, 13 Sep 2018 12:07:59 -0400 Subject: [PATCH 08/27] more complicated example w/ parameters, previously broken w/ space conflict --- test/test_loopy.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/test/test_loopy.py b/test/test_loopy.py index 95f29f5c6..6a44b4e6c 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2940,6 +2940,26 @@ def test_conditional_access_range(ctx_factory, op): assert np.array_equal(knl(queue, tmp=np.arange(8))[1][0], np.arange(1, 9)) +def test_conditional_access_range_with_parameters(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + knl = lp.make_kernel( + ["{[i]: 0 <= i < 10}", + "{[j]: 0 <= j < problem_size}"], + """ + if i < 8 and j < problem_size + tmp[j, i] = tmp[j, i] + 1 + end + """, + [lp.GlobalArg('tmp', shape=('problem_size', 8,), dtype=np.int64), + lp.ValueArg('problem_size', dtype=np.int64)]) + + assert np.array_equal(knl(queue, tmp=np.arange(80).reshape((10, 8)), + problem_size=10)[1][0], np.arange(1, 81).reshape( + (10, 8))) + + def test_conditional_access_range_failure(ctx_factory): # predicate doesn't actually limit access_range knl = lp.make_kernel( -- GitLab From ea99d4c4bfb340999c5a19683674650163a8588d Mon Sep 17 00:00:00 2001 From: Nick Date: Thu, 13 Sep 2018 12:08:46 -0400 Subject: [PATCH 09/27] relax condition to any predicate on iname --- loopy/check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/check.py b/loopy/check.py index 3d981e34a..9f7bc3616 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -319,7 +319,7 @@ class _AccessCheckMapper(WalkMapper): if insn.predicates: from loopy.symbolic import constraints_from_expr for pred in insn.predicates: - if insn.within_inames <= get_dependencies(pred): + if insn.within_inames & get_dependencies(pred): try: constraints = constraints_from_expr( self.domain.space, pred) -- GitLab From c974db197e31cec81ffb6fff109f08e3c0a2cfa2 Mon Sep 17 00:00:00 2001 From: Nick Date: Thu, 13 Sep 2018 12:27:55 -0400 Subject: [PATCH 10/27] catch unknown variables raised by conditional mapper --- loopy/check.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/loopy/check.py b/loopy/check.py index 9f7bc3616..001bc727f 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -276,6 +276,7 @@ class _AccessCheckMapper(WalkMapper): WalkMapper.map_subscript(self, expr) from pymbolic.primitives import Variable + from pymbolic.mapper.evaluator import UnknownVariableError assert isinstance(expr.aggregate, Variable) shape = None @@ -331,6 +332,9 @@ class _AccessCheckMapper(WalkMapper): # non-affine predicate - store for warning if we fail # this check possible_warns += [pred] + except UnknownVariableError: + # data dependent bounds + pass try: access_range = get_access_range(access_range, subscript, -- GitLab From f6a852e042b28e1ed5931d0f6351a42c76b362ee Mon Sep 17 00:00:00 2001 From: Nick Date: Thu, 13 Sep 2018 13:05:40 -0400 Subject: [PATCH 11/27] Allow each part of logical_and / logical_or predicate to succeed/fail separately Add test where half of logical and predicate is data-dependent (and will fail) but other half will succeed to test --- loopy/symbolic.py | 10 ++++++++-- test/test_loopy.py | 23 ++++++++++++++++++++++- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 25af1797e..8572f3bda 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1358,7 +1358,14 @@ class ConditionalMapper(PwAffEvaluationMapper): return out def map_logical_and(self, expr): - constraints = [y for ch in expr.children for y in self.rec(ch)] + from pymbolic.mapper.evaluator import UnknownVariableError + constraints = [] + for child in expr.children: + try: + constraints += [c for c in self.rec(child)] + except UnknownVariableError: + # the child contained data-dependent conditionals -> can't apply + pass return constraints map_logical_or = map_logical_and @@ -1383,7 +1390,6 @@ class ConditionalMapper(PwAffEvaluationMapper): return [isl.Constraint.inequality_from_aff((aff))] else: raise ValueError("invalid comparison operator") - return left - right def aff_from_expr(space, expr, vars_to_zero=None): diff --git a/test/test_loopy.py b/test/test_loopy.py index 6a44b4e6c..6645ece1a 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2944,9 +2944,11 @@ def test_conditional_access_range_with_parameters(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) + # test that conditional on parameter works, otherwise the tmp[j, i] will show + # as OOB knl = lp.make_kernel( ["{[i]: 0 <= i < 10}", - "{[j]: 0 <= j < problem_size}"], + "{[j]: 0 <= j < problem_size + 2}"], """ if i < 8 and j < problem_size tmp[j, i] = tmp[j, i] + 1 @@ -2959,6 +2961,25 @@ def test_conditional_access_range_with_parameters(ctx_factory): problem_size=10)[1][0], np.arange(1, 81).reshape( (10, 8))) + # test a conditional that's only _half_ data-dependent to ensure the other + # half works + knl = lp.make_kernel( + ["{[i]: 0 <= i < 10}", + "{[j]: 0 <= j < problem_size}"], + """ + if i < 8 and (j + offset) < problem_size + tmp[j, i] = tmp[j, i] + 1 + end + """, + [lp.GlobalArg('tmp', shape=('problem_size', 8,), dtype=np.int64), + lp.ValueArg('problem_size', dtype=np.int64), + lp.ValueArg('offset', dtype=np.int64)]) + + assert np.array_equal(knl(queue, tmp=np.arange(80).reshape((10, 8)), + problem_size=10, + offset=0)[1][0], np.arange(1, 81).reshape( + (10, 8))) + def test_conditional_access_range_failure(ctx_factory): # predicate doesn't actually limit access_range -- GitLab From 2d0ece0f02e97cfa82a73b1e0db88c4d89f12e21 Mon Sep 17 00:00:00 2001 From: Nick Date: Fri, 14 Sep 2018 09:08:54 -0400 Subject: [PATCH 12/27] suppress isl stderr --- loopy/symbolic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 8572f3bda..7d617564b 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1472,7 +1472,8 @@ def simplify_using_aff(kernel, expr): # {{{ expression/set <-> constraints conversion def constraints_from_expr(space, expr): - return ConditionalMapper(space, vars_to_zero=[None])(expr) + with isl.SuppressedWarnings(space.get_ctx): + return ConditionalMapper(space, vars_to_zero=[None])(expr) def constraint_to_cond_expr(cns): -- GitLab From 5d7f84fdd5f61ab11682d1deb7bb26a669fe753f Mon Sep 17 00:00:00 2001 From: Nick Date: Fri, 14 Sep 2018 09:16:40 -0400 Subject: [PATCH 13/27] whoops --- loopy/symbolic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 7d617564b..7bc006f9f 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1472,7 +1472,7 @@ def simplify_using_aff(kernel, expr): # {{{ expression/set <-> constraints conversion def constraints_from_expr(space, expr): - with isl.SuppressedWarnings(space.get_ctx): + with isl.SuppressedWarnings(space.get_ctx()): return ConditionalMapper(space, vars_to_zero=[None])(expr) -- GitLab From 7a6db25b4f2ae29c739afdbf45279edc8240ba63 Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 20 Nov 2018 20:53:49 -0500 Subject: [PATCH 14/27] suppress warnings --- loopy/check.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 001bc727f..e735fb413 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -321,20 +321,21 @@ class _AccessCheckMapper(WalkMapper): from loopy.symbolic import constraints_from_expr for pred in insn.predicates: if insn.within_inames & get_dependencies(pred): - try: - constraints = constraints_from_expr( - self.domain.space, pred) - for constraint in constraints: - access_range = access_range.add_constraint( - constraint) - - except isl.Error: - # non-affine predicate - store for warning if we fail - # this check - possible_warns += [pred] - except UnknownVariableError: - # data dependent bounds - pass + with isl.SuppressedWarnings(self.domain.get_ctx()): + try: + constraints = constraints_from_expr( + self.domain.space, pred) + for constraint in constraints: + access_range = access_range.add_constraint( + constraint) + + except isl.Error: + # non-affine predicate - store for warning if we fail + # this check + possible_warns += [pred] + except UnknownVariableError: + # data dependent bounds + pass try: access_range = get_access_range(access_range, subscript, -- GitLab From d00fe4ec953b72d8b3fd35b21d42dae0c364309c Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 20 Nov 2018 20:58:26 -0500 Subject: [PATCH 15/27] warn->info --- loopy/check.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index e735fb413..38539641a 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -358,10 +358,9 @@ class _AccessCheckMapper(WalkMapper): if not access_range.is_subset(shape_domain): if possible_warns: - from loopy.diagnostic import warn_with_kernel - warn_with_kernel( - self.kernel, "non_affine_predicates", - "Predicates: ({}) are are expressed in a " + import logging + logger = logging.getLogger(__name__) + logger.info("Predicates: ({}) are are expressed in a " "non-affine manner, and were not considered " "for out-of-bounds array checking.".format( ', '.join(str(x) for x in possible_warns))) -- GitLab From b3050b882434ee02fa3922dd572a2fdc09f06ac3 Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 20 Nov 2018 21:07:37 -0500 Subject: [PATCH 16/27] make the conditional mapper it's own class (rather than inheriting from the PwAffMapper) --- loopy/symbolic.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 067e220fa..fbb5701ed 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1339,7 +1339,11 @@ class PwAffEvaluationMapper(EvaluationMapperBase, IdentityMapperMixin): return num.mod_val(denom) -class ConditionalMapper(PwAffEvaluationMapper): +class ConditionalMapper(EvaluationMapperBase, IdentityMapperMixin): + def __init__(self, space, vars_to_zero): + self.pw_map = PwAffEvaluationMapper(space, vars_to_zero) + super(ConditionalMapper, self).__init__(self.pw_map.context.copy()) + def map_logical_not(self, expr): constraints = self.rec(expr.child) out = [] -- GitLab From ff7ffe52d91de2239c5d13de9d4f4768dc223cfa Mon Sep 17 00:00:00 2001 From: zachjweiner Date: Sun, 25 Nov 2018 15:59:38 -0600 Subject: [PATCH 17/27] working streaming add_prefetch/precompute option --- loopy/transform/data.py | 4 +- loopy/transform/precompute.py | 272 ++++++++++++++++++++++++++++++---- test/test_apps.py | 35 ++++- 3 files changed, 273 insertions(+), 38 deletions(-) diff --git a/loopy/transform/data.py b/loopy/transform/data.py index 5b1ee6cca..4c34526fd 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -150,6 +150,7 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, temporary_address_space=None, temporary_scope=None, footprint_subscripts=None, fetch_bounding_box=False, + stream_iname=None, # if not None, use streaming prefetch in precompute fetch_outer_inames=None): """Prefetch all accesses to the variable *var_name*, with all accesses being swept through *sweep_inames*. @@ -336,7 +337,8 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, temporary_name=temporary_name, temporary_address_space=temporary_address_space, temporary_scope=temporary_scope, - precompute_outer_inames=fetch_outer_inames) + precompute_outer_inames=fetch_outer_inames, + stream_iname=stream_iname) # {{{ remove inames that were temporarily added by slice sweeps diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 52d568975..0d7f0a2bd 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -270,6 +270,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, fetch_bounding_box=False, temporary_address_space=None, compute_insn_id=None, + stream_iname=None, # if not None, use streaming prefetch in precompute **kwargs): """Precompute the expression described in the substitution rule determined by *subst_use* and store it in a temporary array. A precomputation needs two @@ -838,34 +839,226 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, # }}} - from loopy.kernel.data import Assignment - if compute_insn_id is None: - compute_insn_id = kernel.make_unique_instruction_id(based_on=c_subst_name) - - compute_insn = Assignment( - id=compute_insn_id, - assignee=assignee, - expression=compute_expression, - # within_inames determined below - ) - compute_dep_id = compute_insn_id - added_compute_insns = [compute_insn] - - if temporary_address_space == AddressSpace.GLOBAL: - barrier_insn_id = kernel.make_unique_instruction_id( - based_on=c_subst_name+"_barrier") + if stream_iname is not None: + from pymbolic import parse + stream_var = parse(stream_iname) + + def increment(expr, iname): + from pymbolic import parse, substitute + if isinstance(iname, str): + iname = parse(iname) + return substitute(expr, {iname: iname + 1}) + + def decrement(expr, iname): + from pymbolic import parse, substitute + if isinstance(iname, str): + iname = parse(iname) + return substitute(expr, {iname: iname - 1}) + + def project(set, iname): + var_dict = set.get_var_dict() + dt, dim_idx = var_dict[iname] + return set.project_out(dt, dim_idx, 1) + + def to_param(set, iname): + var_dict = set.get_var_dict() + dt, dim_idx = var_dict[iname] + return set.move_dims(isl.dim_type.param, dim_idx, dt, dim_idx, 1) + + def rename(set, old, new): + var_dict = set.get_var_dict() + dt, dim_idx = var_dict[old] + return set.set_dim_name(dt, dim_idx, new) + + def add_iname(set, inames): + from pymbolic.primitives import Variable + if isinstance(inames, Variable): + inames = [inames.name] + elif isinstance(inames, str): + inames = [inames] + for iname in inames: + set = set.add_dims(isl.dim_type.out, 1).set_dim_name(isl.dim_type.out, set.n_dim(), iname) + return set + + pstorage_axis_names = [name+"'" for name in storage_axis_names] + + global_storage_axis_dict = {} + for iname in storage_axis_names: + # this breaks for custom sweep inames? + global_storage_axis_dict[iname] = iname.replace("dim", "gdim") + global_storage_axis_names = list(global_storage_axis_dict.values()) + + domain = kernel.domains[0] # ??? what should I do about this indexing + + storage_axis_subst_dict_1 = storage_axis_subst_dict + + storage_axis_subst_dict_0 = {} + for iname in storage_axis_subst_dict.keys(): + storage_axis_subst_dict_0[iname] = \ + decrement(storage_axis_subst_dict[iname], stream_var) + + domain = add_iname(domain, list(global_storage_axis_dict.values())) + + from loopy.symbolic import aff_from_expr + + def eq_constraint_from_expr(space, expr): + return isl.Constraint.equality_from_aff(aff_from_expr(space, expr)) + + + def ineq_constraint_from_expr(space, expr): + return isl.Constraint.inequality_from_aff(aff_from_expr(space, expr)) + + constraints_0 \ + = [eq_constraint_from_expr(domain.space, parse(global_storage_axis_dict[si]) \ + - storage_axis_subst_dict_0[si]) for si in storage_axis_names] + constraints_1 = \ + [eq_constraint_from_expr(domain.space, parse(global_storage_axis_dict[si]) \ + - storage_axis_subst_dict_1[si]) for si in storage_axis_names] + + domain_0 = domain.add_constraints(constraints_0) + domain_1 = domain.add_constraints(constraints_1) + + for si, psi in zip(storage_axis_names, pstorage_axis_names): + domain_1 = add_iname(domain_1, psi) + domain_0 = rename(domain_0, si, psi) + domain_0 = add_iname(domain_0, si) + domain_0, domain_1 = isl.align_two(domain_0, domain_1) + + overlap = domain_0 & domain_1 + + # ??? better way to ensure stream_iname is not on first iteration? + # e.g., this (and other code) assumes stream_iname increments by positive 1 + dt, dim_idx = domain.get_var_dict()[stream_iname] + from loopy.symbolic import pw_aff_to_expr + stream_min = pw_aff_to_expr(domain.dim_min(dim_idx)) + overlap = overlap.add_constraint( + ineq_constraint_from_expr(overlap.space, stream_var - stream_min - 1)) + + from loopy.symbolic import basic_set_to_cond_expr + in_overlap = basic_set_to_cond_expr( + overlap.project_out_except( + storage_axis_names+[stream_iname], [isl.dim_type.out])) + + fetch_var = var(temporary_name) + + from pymbolic.primitives import If + stream_assignee = fetch_var[tuple(var(iname) for iname in non1_storage_axis_names)] + + # ??? better way to do all this? + stream_replace_rules = overlap.project_out_except( + storage_axis_names+pstorage_axis_names, + [isl.dim_type.out]) + from loopy.symbolic import aff_to_expr + cns_exprs = [aff_to_expr(cns.get_aff()) + for cns in stream_replace_rules.get_constraints()] + + # primed storage inames + non1_pstorage_axis_names = [name+"'" for name in non1_storage_axis_names] + + stream_subst_dict = {} + from pymbolic.algorithm import solve_affine_equations_for + stream_subst_dict = solve_affine_equations_for(non1_pstorage_axis_names, + [(0, cns) for cns in cns_exprs]) + + from pymbolic import parse + fetch_var = parse(temporary_name) + stream_temp_expression = fetch_var[tuple([stream_subst_dict[Variable(var)] + for var in non1_pstorage_axis_names])] + + stream_fetch_expression = compute_expression + + var_name_gen = kernel.get_var_name_generator() + # ??? probably want to do this with var_name_gen? + stream_temp_assignee = temporary_name+"_temp" + + copy_temp_insn_id = temporary_name+"_copy_temp_rule" + stream_temp_insn_id = temporary_name+"_stream_temp_rule" + fetch_temp_insn_id = temporary_name+"_fetch_temp_rule" + + from loopy.kernel.data import Assignment + stream_temp_insn = Assignment( + id=stream_temp_insn_id, + assignee=stream_temp_assignee, + expression=stream_temp_expression, + predicates=[in_overlap], + depends_on_is_final=True + # within_inames determined below + ) + + fetch_temp_insn = Assignment( + id=fetch_temp_insn_id, + assignee=stream_temp_assignee, + expression=stream_fetch_expression, + predicates=[in_overlap.not_()], + depends_on_is_final=True + # within_inames determined below + ) + + stream_barrier_insn_id = kernel.make_unique_instruction_id( + based_on=stream_temp_insn_id+"_barrier") from loopy.kernel.instruction import BarrierInstruction - barrier_insn = BarrierInstruction( - id=barrier_insn_id, - depends_on=frozenset([compute_insn_id]), - synchronization_kind="global", - mem_kind="global") - compute_dep_id = barrier_insn_id - - added_compute_insns.append(barrier_insn) + stream_barrier_insn = BarrierInstruction( + id=stream_barrier_insn_id, + depends_on=frozenset([stream_temp_insn_id, fetch_temp_insn_id]), + depends_on_is_final=True, + synchronization_kind="local", + mem_kind="local") + + copy_temp_insn = Assignment( + id=copy_temp_insn_id, + assignee=stream_assignee, + expression=stream_temp_assignee, + depends_on_is_final=True, + depends_on=frozenset([stream_barrier_insn_id]) + # within_inames determined below + ) + + copy_barrier_insn_id = kernel.make_unique_instruction_id( + based_on=copy_temp_insn_id+"_barrier") + from loopy.kernel.instruction import BarrierInstruction + copy_barrier_insn = BarrierInstruction( + id=copy_barrier_insn_id, + depends_on=frozenset([copy_temp_insn_id]), + depends_on_is_final=True, + synchronization_kind="local", + mem_kind="local") + + added_compute_insns = [fetch_temp_insn, stream_temp_insn, + stream_barrier_insn, copy_temp_insn, copy_barrier_insn] + + # ??? this is for compatibility with old precompute code + compute_insn_id = copy_barrier_insn_id + compute_dep_id = copy_barrier_insn_id + compute_insn = copy_barrier_insn + else: + from loopy.kernel.data import Assignment + if compute_insn_id is None: + compute_insn_id = kernel.make_unique_instruction_id(based_on=c_subst_name) + + compute_insn = Assignment( + id=compute_insn_id, + assignee=assignee, + expression=compute_expression, + # within_inames determined below + ) + compute_dep_id = compute_insn_id + added_compute_insns = [compute_insn] + + if temporary_address_space == AddressSpace.GLOBAL: + barrier_insn_id = kernel.make_unique_instruction_id( + based_on=c_subst_name+"_barrier") + from loopy.kernel.instruction import BarrierInstruction + barrier_insn = BarrierInstruction( + id=barrier_insn_id, + depends_on=frozenset([compute_insn_id]), + synchronization_kind="global", + mem_kind="global") + compute_dep_id = barrier_insn_id + + added_compute_insns.append(barrier_insn) # }}} - + # {{{ substitute rule into expressions in kernel (if within footprint) from loopy.symbolic import SubstitutionRuleExpander @@ -888,12 +1081,14 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, # {{{ add dependencies to compute insn - kernel = kernel.copy( - instructions=[ - insn.copy(depends_on=frozenset(invr.compute_insn_depends_on)) - if insn.id == compute_insn_id - else insn - for insn in kernel.instructions]) + # ??? removed this from streaming case for now - it adds a stupid barrier + if stream_iname is None: + kernel = kernel.copy( + instructions=[ + insn.copy(depends_on=frozenset(invr.compute_insn_depends_on)) + if insn.id == compute_insn_id + else insn + for insn in kernel.instructions]) # }}} @@ -947,7 +1142,9 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, kernel = kernel.copy( instructions=[ insn.copy(within_inames=precompute_outer_inames) - if insn.id == compute_insn_id + # ??? replaced the below - should work for normal precompute? + # if insn.id == compute_insn_id + if insn.id in [ a.id for a in added_compute_insns] else insn for insn in kernel.instructions]) @@ -1032,6 +1229,17 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, new_temporary_variables[temporary_name] = temp_var + if stream_iname is not None: + temp_name = temporary_name+'_temp' + temp_fetch_variable = lp.TemporaryVariable( + name=temp_name, + dtype=dtype, + base_indices=()*len(new_temp_shape), + shape=tuple(), + address_space=lp.AddressSpace.PRIVATE) + + new_temporary_variables[temp_name] = temp_fetch_variable + kernel = kernel.copy( temporary_variables=new_temporary_variables) diff --git a/test/test_apps.py b/test/test_apps.py index e7f4004fa..c5b27c0a3 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -336,11 +336,10 @@ def test_stencil(ctx_factory): " + a_offset(i,j+1)" " + a_offset(i-1,j)" " + a_offset(i+1,j)" - ], - [ - lp.GlobalArg("a", np.float32, shape=(n+2, n+2,)), - lp.GlobalArg("z", np.float32, shape=(n+2, n+2,)) - ]) + ] + ) + + knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32)) ref_knl = knl @@ -359,9 +358,35 @@ def test_stencil(ctx_factory): knl = lp.prioritize_loops(knl, ["a_dim_0_outer", "a_dim_1_outer"]) return knl + # streaming, block covers output footprint + # default_tag="l.auto" gets ride of iname a_dim_1 + def variant_3(knl): + knl.silenced_warnings = ['single_writer_after_creation'] + knl = lp.split_iname(knl, "i", 4, outer_tag=None, inner_tag=None) + knl = lp.split_iname(knl, "j", 64, outer_tag="g.0", inner_tag="l.0") + knl = lp.add_prefetch(knl, "a", ["i_inner", "j_inner"], + fetch_bounding_box=True, default_tag="l.auto", + stream_iname="i_outer") + knl = lp.tag_inames(knl, dict(a_dim_0="l.1", i_inner = "l.1")) + return knl + + # streaming, block covers input footprint (i.e., includes halos) + def variant_4(knl): + knl.silenced_warnings = ['single_writer_after_creation'] + knl = lp.split_iname(knl, "i", 4, inner_tag=None) + knl = lp.split_iname(knl, "j", 64, inner_tag="l.0") + knl = lp.add_prefetch(knl, "a", ["i_inner", "j_inner"], + fetch_bounding_box=True, default_tag=None, + stream_iname="i_outer") + knl = lp.tag_inames(knl, dict(a_dim_0="l.1", i_inner = "l.1", a_dim_1="l.0")) + knl = lp.tag_inames(knl, dict(i_outer=None, j_outer="g.0")) + return knl + for variant in [ #variant_1, variant_2, + variant_3, + variant_4, ]: lp.auto_test_vs_ref(ref_knl, ctx, variant(knl), print_ref_code=False, -- GitLab From 76f68816ad92b0a57bb534e3d6f0cd5d4cd69e41 Mon Sep 17 00:00:00 2001 From: zachjweiner Date: Sun, 25 Nov 2018 16:06:34 -0600 Subject: [PATCH 18/27] appease flake8 --- loopy/transform/precompute.py | 9 +++++---- test/test_apps.py | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 0d7f0a2bd..2ad0eafc1 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -949,7 +949,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, storage_axis_names+pstorage_axis_names, [isl.dim_type.out]) from loopy.symbolic import aff_to_expr - cns_exprs = [aff_to_expr(cns.get_aff()) + cns_exprs = [aff_to_expr(cns.get_aff()) for cns in stream_replace_rules.get_constraints()] # primed storage inames @@ -1033,7 +1033,8 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, else: from loopy.kernel.data import Assignment if compute_insn_id is None: - compute_insn_id = kernel.make_unique_instruction_id(based_on=c_subst_name) + compute_insn_id = kernel.make_unique_instruction_id( + based_on=c_subst_name) compute_insn = Assignment( id=compute_insn_id, @@ -1058,7 +1059,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, added_compute_insns.append(barrier_insn) # }}} - + # {{{ substitute rule into expressions in kernel (if within footprint) from loopy.symbolic import SubstitutionRuleExpander @@ -1144,7 +1145,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, insn.copy(within_inames=precompute_outer_inames) # ??? replaced the below - should work for normal precompute? # if insn.id == compute_insn_id - if insn.id in [ a.id for a in added_compute_insns] + if insn.id in [a.id for a in added_compute_insns] else insn for insn in kernel.instructions]) diff --git a/test/test_apps.py b/test/test_apps.py index c5b27c0a3..819002e45 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -367,7 +367,7 @@ def test_stencil(ctx_factory): knl = lp.add_prefetch(knl, "a", ["i_inner", "j_inner"], fetch_bounding_box=True, default_tag="l.auto", stream_iname="i_outer") - knl = lp.tag_inames(knl, dict(a_dim_0="l.1", i_inner = "l.1")) + knl = lp.tag_inames(knl, dict(a_dim_0="l.1", i_inner="l.1")) return knl # streaming, block covers input footprint (i.e., includes halos) @@ -378,7 +378,7 @@ def test_stencil(ctx_factory): knl = lp.add_prefetch(knl, "a", ["i_inner", "j_inner"], fetch_bounding_box=True, default_tag=None, stream_iname="i_outer") - knl = lp.tag_inames(knl, dict(a_dim_0="l.1", i_inner = "l.1", a_dim_1="l.0")) + knl = lp.tag_inames(knl, dict(a_dim_0="l.1", i_inner="l.1", a_dim_1="l.0")) knl = lp.tag_inames(knl, dict(i_outer=None, j_outer="g.0")) return knl -- GitLab From a6fd6e9c7c1baea71b47fbe938fd5804b5707ce9 Mon Sep 17 00:00:00 2001 From: zachjweiner Date: Sun, 25 Nov 2018 16:18:06 -0600 Subject: [PATCH 19/27] remove var as iterator - python2.7 doesn't like? --- loopy/transform/precompute.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 2ad0eafc1..b367d8637 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -962,8 +962,8 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, from pymbolic import parse fetch_var = parse(temporary_name) - stream_temp_expression = fetch_var[tuple([stream_subst_dict[Variable(var)] - for var in non1_pstorage_axis_names])] + stream_temp_expression = fetch_var[tuple([stream_subst_dict[Variable(psname)] + for psname in non1_pstorage_axis_names])] stream_fetch_expression = compute_expression -- GitLab From 9d9cd00077c80aa86d1f133a815267350dc3ad9a Mon Sep 17 00:00:00 2001 From: zachjweiner Date: Sun, 25 Nov 2018 16:23:52 -0600 Subject: [PATCH 20/27] more flake8 fixes --- loopy/transform/precompute.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index b367d8637..487c100ed 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -904,16 +904,17 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, def eq_constraint_from_expr(space, expr): return isl.Constraint.equality_from_aff(aff_from_expr(space, expr)) - def ineq_constraint_from_expr(space, expr): return isl.Constraint.inequality_from_aff(aff_from_expr(space, expr)) constraints_0 \ - = [eq_constraint_from_expr(domain.space, parse(global_storage_axis_dict[si]) \ - - storage_axis_subst_dict_0[si]) for si in storage_axis_names] - constraints_1 = \ - [eq_constraint_from_expr(domain.space, parse(global_storage_axis_dict[si]) \ - - storage_axis_subst_dict_1[si]) for si in storage_axis_names] + = [eq_constraint_from_expr(domain.space, + parse(global_storage_axis_dict[si]) + - storage_axis_subst_dict_0[si]) for si in storage_axis_names] + constraints_1 \ + = [eq_constraint_from_expr(domain.space, + parse(global_storage_axis_dict[si]) + - storage_axis_subst_dict_1[si]) for si in storage_axis_names] domain_0 = domain.add_constraints(constraints_0) domain_1 = domain.add_constraints(constraints_1) @@ -941,8 +942,8 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, fetch_var = var(temporary_name) - from pymbolic.primitives import If - stream_assignee = fetch_var[tuple(var(iname) for iname in non1_storage_axis_names)] + stream_assignee = fetch_var[tuple(var(iname) + for iname in non1_storage_axis_names)] # ??? better way to do all this? stream_replace_rules = overlap.project_out_except( -- GitLab From 30bfb514590923f1353d440b32ae1852ad753c48 Mon Sep 17 00:00:00 2001 From: zachjweiner Date: Mon, 26 Nov 2018 14:46:10 -0600 Subject: [PATCH 21/27] use non1 storage axes rather than all --- loopy/transform/data.py | 2 +- loopy/transform/precompute.py | 35 +++++++++++++++++------------------ 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/loopy/transform/data.py b/loopy/transform/data.py index 4c34526fd..2e5a7218c 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -150,7 +150,7 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, temporary_address_space=None, temporary_scope=None, footprint_subscripts=None, fetch_bounding_box=False, - stream_iname=None, # if not None, use streaming prefetch in precompute + stream_iname=None, # if not None, use streaming prefetch in precompute fetch_outer_inames=None): """Prefetch all accesses to the variable *var_name*, with all accesses being swept through *sweep_inames*. diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 487c100ed..34d4f14bf 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -270,7 +270,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, fetch_bounding_box=False, temporary_address_space=None, compute_insn_id=None, - stream_iname=None, # if not None, use streaming prefetch in precompute + stream_iname=None, # if not None, use streaming prefetch in precompute **kwargs): """Precompute the expression described in the substitution rule determined by *subst_use* and store it in a temporary array. A precomputation needs two @@ -877,18 +877,20 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, elif isinstance(inames, str): inames = [inames] for iname in inames: - set = set.add_dims(isl.dim_type.out, 1).set_dim_name(isl.dim_type.out, set.n_dim(), iname) + set = set.add_dims(isl.dim_type.out, + 1).set_dim_name(isl.dim_type.out, set.n_dim(), iname) return set - - pstorage_axis_names = [name+"'" for name in storage_axis_names] + + # primed storage inames + non1_pstorage_axis_names = [name+"'" for name in non1_storage_axis_names] global_storage_axis_dict = {} - for iname in storage_axis_names: + for iname in non1_storage_axis_names: # this breaks for custom sweep inames? global_storage_axis_dict[iname] = iname.replace("dim", "gdim") global_storage_axis_names = list(global_storage_axis_dict.values()) - domain = kernel.domains[0] # ??? what should I do about this indexing + domain = kernel.domains[0] # ??? what should I do about this indexing storage_axis_subst_dict_1 = storage_axis_subst_dict @@ -897,7 +899,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, storage_axis_subst_dict_0[iname] = \ decrement(storage_axis_subst_dict[iname], stream_var) - domain = add_iname(domain, list(global_storage_axis_dict.values())) + domain = add_iname(domain, global_storage_axis_names) from loopy.symbolic import aff_from_expr @@ -909,17 +911,17 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, constraints_0 \ = [eq_constraint_from_expr(domain.space, - parse(global_storage_axis_dict[si]) - - storage_axis_subst_dict_0[si]) for si in storage_axis_names] + parse(global_storage_axis_dict[si]) + - storage_axis_subst_dict_0[si]) for si in non1_storage_axis_names] constraints_1 \ = [eq_constraint_from_expr(domain.space, - parse(global_storage_axis_dict[si]) - - storage_axis_subst_dict_1[si]) for si in storage_axis_names] + parse(global_storage_axis_dict[si]) + - storage_axis_subst_dict_1[si]) for si in non1_storage_axis_names] domain_0 = domain.add_constraints(constraints_0) domain_1 = domain.add_constraints(constraints_1) - for si, psi in zip(storage_axis_names, pstorage_axis_names): + for si, psi in zip(non1_storage_axis_names, non1_pstorage_axis_names): domain_1 = add_iname(domain_1, psi) domain_0 = rename(domain_0, si, psi) domain_0 = add_iname(domain_0, si) @@ -938,24 +940,21 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, from loopy.symbolic import basic_set_to_cond_expr in_overlap = basic_set_to_cond_expr( overlap.project_out_except( - storage_axis_names+[stream_iname], [isl.dim_type.out])) + non1_storage_axis_names+[stream_iname], [isl.dim_type.out])) fetch_var = var(temporary_name) - stream_assignee = fetch_var[tuple(var(iname) + stream_assignee = fetch_var[tuple(var(iname) for iname in non1_storage_axis_names)] # ??? better way to do all this? stream_replace_rules = overlap.project_out_except( - storage_axis_names+pstorage_axis_names, + non1_storage_axis_names+non1_pstorage_axis_names, [isl.dim_type.out]) from loopy.symbolic import aff_to_expr cns_exprs = [aff_to_expr(cns.get_aff()) for cns in stream_replace_rules.get_constraints()] - # primed storage inames - non1_pstorage_axis_names = [name+"'" for name in non1_storage_axis_names] - stream_subst_dict = {} from pymbolic.algorithm import solve_affine_equations_for stream_subst_dict = solve_affine_equations_for(non1_pstorage_axis_names, -- GitLab From 05208a4fb006865f38997fdb811995385634dfe1 Mon Sep 17 00:00:00 2001 From: zachjweiner Date: Thu, 29 Nov 2018 14:05:37 -0600 Subject: [PATCH 22/27] add streaming variant to convolution test --- test/test_apps.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/test/test_apps.py b/test/test_apps.py index 819002e45..f7161a53f 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -106,10 +106,24 @@ def test_convolution(ctx_factory): default_tag="l.auto") return knl + def variant_3(knl): + knl = lp.split_iname(knl, "im_x", 16, inner_tag="l.0") + knl = lp.split_iname(knl, "im_y", 16, inner_tag="l.1") + knl = lp.tag_inames(knl, dict(iimg="g.0")) + knl = lp.add_prefetch(knl, "f[ifeat,:,:,:]", default_tag="l.auto") + knl = lp.add_prefetch(knl, "img", "im_x_inner, im_y_inner, f_x, f_y, icolor", + stream_iname="im_x_outer", + default_tag=None, + fetch_outer_inames="im_x_outer,im_y_outer,iimg,ifeat") + knl = lp.tag_inames(knl, dict(img_dim_1="l.0", img_dim_2="l.1")) + knl.silenced_warnings = ['single_writer_after_creation'] + return knl + for variant in [ #variant_0, #variant_1, - variant_2 + variant_2, + variant_3 ]: lp.auto_test_vs_ref(ref_knl, ctx, variant(knl), parameters=dict( -- GitLab From 01ee247b861d1b431fe6f47634f536ac5ac6b373 Mon Sep 17 00:00:00 2001 From: zachjweiner Date: Thu, 29 Nov 2018 14:37:03 -0600 Subject: [PATCH 23/27] clean up, comment --- loopy/transform/precompute.py | 38 +++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 34d4f14bf..91319fcec 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -843,6 +843,8 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, from pymbolic import parse stream_var = parse(stream_iname) + # {{{ some utility functions + def increment(expr, iname): from pymbolic import parse, substitute if isinstance(iname, str): @@ -855,16 +857,6 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, iname = parse(iname) return substitute(expr, {iname: iname - 1}) - def project(set, iname): - var_dict = set.get_var_dict() - dt, dim_idx = var_dict[iname] - return set.project_out(dt, dim_idx, 1) - - def to_param(set, iname): - var_dict = set.get_var_dict() - dt, dim_idx = var_dict[iname] - return set.move_dims(isl.dim_type.param, dim_idx, dt, dim_idx, 1) - def rename(set, old, new): var_dict = set.get_var_dict() dt, dim_idx = var_dict[old] @@ -881,13 +873,15 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, 1).set_dim_name(isl.dim_type.out, set.n_dim(), iname) return set + # }}} + # primed storage inames non1_pstorage_axis_names = [name+"'" for name in non1_storage_axis_names] + # append "_g" to storage inames for corresponding global iname global_storage_axis_dict = {} for iname in non1_storage_axis_names: - # this breaks for custom sweep inames? - global_storage_axis_dict[iname] = iname.replace("dim", "gdim") + global_storage_axis_dict[iname] = iname+"_g" global_storage_axis_names = list(global_storage_axis_dict.values()) domain = kernel.domains[0] # ??? what should I do about this indexing @@ -903,12 +897,14 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, from loopy.symbolic import aff_from_expr + # these were removed from loopy.symbolic def eq_constraint_from_expr(space, expr): return isl.Constraint.equality_from_aff(aff_from_expr(space, expr)) def ineq_constraint_from_expr(space, expr): return isl.Constraint.inequality_from_aff(aff_from_expr(space, expr)) + # constraints on relationship between storage, etc. inames and global inames constraints_0 \ = [eq_constraint_from_expr(domain.space, parse(global_storage_axis_dict[si]) @@ -947,7 +943,9 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, stream_assignee = fetch_var[tuple(var(iname) for iname in non1_storage_axis_names)] - # ??? better way to do all this? + # {{{ obtain global indexing from constraints + + # ??? better way? stream_replace_rules = overlap.project_out_except( non1_storage_axis_names+non1_pstorage_axis_names, [isl.dim_type.out]) @@ -960,6 +958,10 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, stream_subst_dict = solve_affine_equations_for(non1_pstorage_axis_names, [(0, cns) for cns in cns_exprs]) + # }}} + + # {{{ create instructions + from pymbolic import parse fetch_var = parse(temporary_name) stream_temp_expression = fetch_var[tuple([stream_subst_dict[Variable(psname)] @@ -968,7 +970,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, stream_fetch_expression = compute_expression var_name_gen = kernel.get_var_name_generator() - # ??? probably want to do this with var_name_gen? + # ??? probably want to generate temporary name with var_name_gen? stream_temp_assignee = temporary_name+"_temp" copy_temp_insn_id = temporary_name+"_copy_temp_rule" @@ -1026,7 +1028,9 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, added_compute_insns = [fetch_temp_insn, stream_temp_insn, stream_barrier_insn, copy_temp_insn, copy_barrier_insn] - # ??? this is for compatibility with old precompute code + # }}} + + # ??? this is for compatibility with old (shared) precompute code compute_insn_id = copy_barrier_insn_id compute_dep_id = copy_barrier_insn_id compute_insn = copy_barrier_insn @@ -1075,14 +1079,14 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, kernel = invr.map_kernel(kernel) kernel = kernel.copy( - instructions=added_compute_insns + kernel.instructions) + instructions=added_compute_insns + kernel.instructions) kernel = rule_mapping_context.finish_kernel(kernel) # }}} # {{{ add dependencies to compute insn - # ??? removed this from streaming case for now - it adds a stupid barrier + # ??? removed this from streaming case for now - it adds a redundant barrier if stream_iname is None: kernel = kernel.copy( instructions=[ -- GitLab From 4159cd103bdd40b2f90c4dac913ebe9d78431e22 Mon Sep 17 00:00:00 2001 From: zachjweiner Date: Sat, 11 Jul 2020 14:21:45 -0500 Subject: [PATCH 24/27] ConditionalMapper: map constants to pwaffs --- loopy/symbolic.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 8641100a6..1e8a14c6c 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1416,6 +1416,9 @@ class ConditionalMapper(EvaluationMapperBase, IdentityMapperMixin): map_logical_or = map_logical_and + def map_constant(self, expr): + return self.pw_map(expr) + def map_comparison(self, expr): left = self.rec(expr.left) right = self.rec(expr.right) -- GitLab From cce314835411c6888611440ebc15911ed28ddfdd Mon Sep 17 00:00:00 2001 From: zachjweiner Date: Sun, 25 Oct 2020 16:04:24 -0500 Subject: [PATCH 25/27] _AccessCheckMapper: self.domain -> domain --- loopy/check.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index d5ab4a0ae..3bbe472c4 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -422,17 +422,17 @@ class _AccessCheckMapper(WalkMapper): len(subscript), len(shape))) # apply predicates - access_range = self.domain + access_range = domain insn = self.kernel.id_to_insn[self.insn_id] possible_warns = [] if insn.predicates: from loopy.symbolic import constraints_from_expr for pred in insn.predicates: if insn.within_inames & get_dependencies(pred): - with isl.SuppressedWarnings(self.domain.get_ctx()): + with isl.SuppressedWarnings(domain.get_ctx()): try: constraints = constraints_from_expr( - self.domain.space, pred) + domain.space, pred) for constraint in constraints: access_range = access_range.add_constraint( constraint) -- GitLab From 2c3572a60c4ef47e6a5557596201eb823b3208de Mon Sep 17 00:00:00 2001 From: zachjweiner Date: Sun, 25 Oct 2020 17:03:59 -0500 Subject: [PATCH 26/27] update compyte and remove bad quotes --- loopy/check.py | 2 +- loopy/target/c/compyte | 2 +- test/test_apps.py | 6 +++--- test/test_loopy.py | 46 +++++++++++++++++++++--------------------- 4 files changed, 28 insertions(+), 28 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 3bbe472c4..d46281fbf 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -471,7 +471,7 @@ class _AccessCheckMapper(WalkMapper): logger.info("Predicates: ({}) are are expressed in a " "non-affine manner, and were not considered " "for out-of-bounds array checking.".format( - ', '.join(str(x) for x in possible_warns))) + ", ".join(str(x) for x in possible_warns))) raise LoopyError("'%s' in instruction '%s' " "accesses out-of-bounds array element (could not" " establish '%s' is a subset of '%s')." diff --git a/loopy/target/c/compyte b/loopy/target/c/compyte index 7e48e1166..d1f993dae 160000 --- a/loopy/target/c/compyte +++ b/loopy/target/c/compyte @@ -1 +1 @@ -Subproject commit 7e48e1166a13cfbb7b60f909b071f088034ffda1 +Subproject commit d1f993daecc03947d9e6e3e60d2a5145ecbf3786 diff --git a/test/test_apps.py b/test/test_apps.py index 6b00be3e2..290900d1a 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -117,7 +117,7 @@ def test_convolution(ctx_factory): default_tag=None, fetch_outer_inames="im_x_outer,im_y_outer,iimg,ifeat") knl = lp.tag_inames(knl, dict(img_dim_1="l.0", img_dim_2="l.1")) - knl.silenced_warnings = ['single_writer_after_creation'] + knl.silenced_warnings = ["single_writer_after_creation"] return knl for variant in [ @@ -376,7 +376,7 @@ def test_stencil(ctx_factory): # streaming, block covers output footprint # default_tag="l.auto" gets ride of iname a_dim_1 def variant_3(knl): - knl.silenced_warnings = ['single_writer_after_creation'] + knl.silenced_warnings = ["single_writer_after_creation"] knl = lp.split_iname(knl, "i", 4, outer_tag=None, inner_tag=None) knl = lp.split_iname(knl, "j", 64, outer_tag="g.0", inner_tag="l.0") knl = lp.add_prefetch(knl, "a", ["i_inner", "j_inner"], @@ -387,7 +387,7 @@ def test_stencil(ctx_factory): # streaming, block covers input footprint (i.e., includes halos) def variant_4(knl): - knl.silenced_warnings = ['single_writer_after_creation'] + knl.silenced_warnings = ["single_writer_after_creation"] knl = lp.split_iname(knl, "i", 4, inner_tag=None) knl = lp.split_iname(knl, "j", 64, inner_tag="l.0") knl = lp.add_prefetch(knl, "a", ["i_inner", "j_inner"], diff --git a/test/test_loopy.py b/test/test_loopy.py index 238006e20..04677b8cf 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1906,7 +1906,7 @@ def test_header_extract(): #test CUDA cuknl = knl.copy(target=lp.CudaTarget()) assert str(lp.generate_header(cuknl)[0]) == ( - 'extern "C" __global__ void __launch_bounds__(1) ' + "extern "C" __global__ void __launch_bounds__(1) " "loopy_kernel(float *__restrict__ T);") #test OpenCL @@ -2742,24 +2742,24 @@ def test_dep_cycle_printing_and_error(): print(lp.generate_code(knl)[0]) -@pytest.mark.parametrize("op", ['>', '>=', '<', '<=', '==', '!=']) +@pytest.mark.parametrize("op", [">", ">=", "<", "<=", "==", "!="]) def test_conditional_access_range(ctx_factory, op): ctx = ctx_factory() queue = cl.CommandQueue(ctx) def get_condition(): - if op == '>': - return 'not (i > 7)' - elif op == '>=': - return 'not (i >= 8)' - elif op == '<': - return 'i < 8' - elif op == '<=': - return 'i <=7' - elif op == '==': - return ' or '.join(['i == {}'.format(i) for i in range(8)]) - elif op == '!=': - return ' and '.join(['i != {}'.format(i) for i in range(8, 10)]) + if op == ">": + return "not (i > 7)" + elif op == ">=": + return "not (i >= 8)" + elif op == "<": + return "i < 8" + elif op == "<=": + return "i <=7" + elif op == "==": + return " or ".join(["i == {}".format(i) for i in range(8)]) + elif op == "!=": + return " and ".join(["i != {}".format(i) for i in range(8, 10)]) condition = get_condition() knl = lp.make_kernel( @@ -2769,7 +2769,7 @@ def test_conditional_access_range(ctx_factory, op): tmp[i] = tmp[i] + 1 end """.format(condition=condition), - [lp.GlobalArg('tmp', shape=(8,), dtype=np.int64)]) + [lp.GlobalArg("tmp", shape=(8,), dtype=np.int64)]) assert np.array_equal(knl(queue, tmp=np.arange(8))[1][0], np.arange(1, 9)) @@ -2788,8 +2788,8 @@ def test_conditional_access_range_with_parameters(ctx_factory): tmp[j, i] = tmp[j, i] + 1 end """, - [lp.GlobalArg('tmp', shape=('problem_size', 8,), dtype=np.int64), - lp.ValueArg('problem_size', dtype=np.int64)]) + [lp.GlobalArg("tmp", shape=("problem_size", 8,), dtype=np.int64), + lp.ValueArg("problem_size", dtype=np.int64)]) assert np.array_equal(knl(queue, tmp=np.arange(80).reshape((10, 8)), problem_size=10)[1][0], np.arange(1, 81).reshape( @@ -2805,9 +2805,9 @@ def test_conditional_access_range_with_parameters(ctx_factory): tmp[j, i] = tmp[j, i] + 1 end """, - [lp.GlobalArg('tmp', shape=('problem_size', 8,), dtype=np.int64), - lp.ValueArg('problem_size', dtype=np.int64), - lp.ValueArg('offset', dtype=np.int64)]) + [lp.GlobalArg("tmp", shape=("problem_size", 8,), dtype=np.int64), + lp.ValueArg("problem_size", dtype=np.int64), + lp.ValueArg("offset", dtype=np.int64)]) assert np.array_equal(knl(queue, tmp=np.arange(80).reshape((10, 8)), problem_size=10, @@ -2823,7 +2823,7 @@ def test_conditional_access_range_failure(ctx_factory): if j < 8 tmp[i] = tmp[i] end - """, [lp.GlobalArg('tmp', shape=(8,), dtype=np.int32)]) + """, [lp.GlobalArg("tmp", shape=(8,), dtype=np.int32)]) from loopy.diagnostic import LoopyError with pytest.raises(LoopyError): @@ -2836,7 +2836,7 @@ def test_conditional_access_range_failure(ctx_factory): if (i+3)*i < 15 tmp[i] = tmp[i] end - """, [lp.GlobalArg('tmp', shape=(2,), dtype=np.int32)]) + """, [lp.GlobalArg("tmp", shape=(2,), dtype=np.int32)]) from loopy.diagnostic import LoopyError with pytest.raises(LoopyError): @@ -2872,7 +2872,7 @@ def test_dump_binary(ctx_factory): "2019.8.7.0", "2019.8.8.0", ]): - pytest.skip("Intel CL doesn't implement Kernel.program") + pytest.skip("Intel CL doesn't implement Kernel.program") knl = lp.make_kernel( "{ [i]: 0<=i Date: Sun, 25 Oct 2020 17:10:49 -0500 Subject: [PATCH 27/27] fix more mysterious flake8 complaints --- loopy/transform/precompute.py | 2 +- test/test_loopy.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 469afef8f..2943368ce 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -1228,7 +1228,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, new_temporary_variables[temporary_name] = temp_var if stream_iname is not None: - temp_name = temporary_name+'_temp' + temp_name = temporary_name+"_temp" temp_fetch_variable = lp.TemporaryVariable( name=temp_name, dtype=dtype, diff --git a/test/test_loopy.py b/test/test_loopy.py index 04677b8cf..1dab8c173 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1906,7 +1906,7 @@ def test_header_extract(): #test CUDA cuknl = knl.copy(target=lp.CudaTarget()) assert str(lp.generate_header(cuknl)[0]) == ( - "extern "C" __global__ void __launch_bounds__(1) " + 'extern "C" __global__ void __launch_bounds__(1) ' "loopy_kernel(float *__restrict__ T);") #test OpenCL @@ -2872,7 +2872,7 @@ def test_dump_binary(ctx_factory): "2019.8.7.0", "2019.8.8.0", ]): - pytest.skip("Intel CL doesn't implement Kernel.program") + pytest.skip("Intel CL doesn't implement Kernel.program") knl = lp.make_kernel( "{ [i]: 0<=i