From 71f66e64c40db814f8694b33f493907479a79226 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Sat, 10 Feb 2018 20:49:59 -0600 Subject: [PATCH] check_variable_access_ordered: Only trigger if access ranges overlap --- loopy/check.py | 15 +++++++- loopy/symbolic.py | 89 +++++++++++++++++++++++++++++++++++++++------ test/test_apps.py | 2 +- test/test_loopy.py | 4 +- test/test_target.py | 2 - 5 files changed, 94 insertions(+), 18 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index a45e8ee0c..7f56a76ee 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -421,7 +421,7 @@ class IndirectDependencyEdgeFinder(object): return False -def needs_no_sync_with(kernel, var_scope, dep_a, dep_b): +def declares_nosync_with(kernel, var_scope, dep_a, dep_b): from loopy.kernel.data import temp_var_scope if var_scope == temp_var_scope.GLOBAL: search_scopes = ["global", "any"] @@ -455,6 +455,8 @@ def check_variable_access_ordered(kernel): if kernel.options.enforce_variable_access_ordered == "no_check": return + logger.debug("%s: check_variable_access_ordered: start" % kernel.name) + checked_variables = kernel.get_written_variables() & ( set(kernel.temporary_variables) | set(arg for arg in kernel.arg_dict)) @@ -493,6 +495,8 @@ def check_variable_access_ordered(kernel): # Check even for PRIVATE scope, to ensure intentional program order. + from loopy.symbolic import do_access_ranges_overlap_conservative + for writer_id in writers: for other_id in readers | writers: if writer_id == other_id: @@ -502,7 +506,7 @@ def check_variable_access_ordered(kernel): other = kernel.id_to_insn[other_id] has_dependency_relationship = ( - needs_no_sync_with(kernel, scope, other, writer) + declares_nosync_with(kernel, scope, other, writer) or depfind(writer_id, other_id) or @@ -510,6 +514,11 @@ def check_variable_access_ordered(kernel): ) if not has_dependency_relationship: + if not do_access_ranges_overlap_conservative( + kernel, writer_id, "w", other_id, "any", + name): + continue + msg = ("No dependency relationship found between " "'{writer_id}' which writes {var} and " "'{other_id}' which also accesses {var}. " @@ -536,6 +545,8 @@ def check_variable_access_ordered(kernel): warn_with_kernel( kernel, "variable_access_ordered", msg) + logger.debug("%s: check_variable_access_ordered: done" % kernel.name) + # }}} # }}} diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 9e16c3a59..272a7f45b 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1582,11 +1582,11 @@ def get_access_range(domain, subscript, assumptions): class BatchedAccessRangeMapper(WalkMapper): - def __init__(self, kernel, arg_names): + def __init__(self, kernel, var_names): self.kernel = kernel - self.arg_names = set(arg_names) - self.access_ranges = dict((arg, None) for arg in arg_names) - self.bad_subscripts = dict((arg, []) for arg in arg_names) + self.var_names = set(var_names) + self.access_ranges = dict((arg, None) for arg in var_names) + self.bad_subscripts = dict((arg, []) for arg in var_names) def map_subscript(self, expr, inames): domain = self.kernel.get_inames_domain(inames) @@ -1594,7 +1594,7 @@ class BatchedAccessRangeMapper(WalkMapper): assert isinstance(expr.aggregate, p.Variable) - if expr.aggregate.name not in self.arg_names: + if expr.aggregate.name not in self.var_names: return arg_name = expr.aggregate.name @@ -1622,7 +1622,7 @@ class BatchedAccessRangeMapper(WalkMapper): def map_linear_subscript(self, expr, inames): self.rec(expr.index, inames) - if expr.aggregate.name in self.arg_names: + if expr.aggregate.name in self.var_names: self.bad_subscripts[expr.aggregate.name].append(expr) def map_reduction(self, expr, inames): @@ -1634,20 +1634,87 @@ class BatchedAccessRangeMapper(WalkMapper): class AccessRangeMapper(object): - def __init__(self, kernel, arg_name): - self.arg_name = arg_name - self.inner_mapper = BatchedAccessRangeMapper(kernel, [arg_name]) + def __init__(self, kernel, var_name): + self.var_name = var_name + self.inner_mapper = BatchedAccessRangeMapper(kernel, [var_name]) def __call__(self, expr, inames): return self.inner_mapper(expr, inames) @property def access_range(self): - return self.inner_mapper.access_ranges[self.arg_name] + return self.inner_mapper.access_ranges[self.var_name] @property def bad_subscripts(self): - return self.inner_mapper.bad_subscripts[self.arg_name] + return self.inner_mapper.bad_subscripts[self.var_name] + +# }}} + + +# {{{ do_access_ranges_overlap + +def _get_access_range_conservative(kernel, insn_id, access_dir, var_name): + insn = kernel.id_to_insn[insn_id] + from loopy.kernel.instruction import MultiAssignmentBase + + assert access_dir in ["w", "any"] + + if not isinstance(insn, MultiAssignmentBase): + if access_dir == "any": + return var_name in insn.dependency_names() + else: + return var_name in insn.write_dependency_names() + + exprs = list(insn.assignees) + if access_dir == "any": + exprs.append(insn.expression) + exprs.extend(insn.predicates) + + arange = False + for expr in exprs: + arm = AccessRangeMapper(kernel, var_name) + arm(expr, kernel.insn_inames(insn)) + + if arm.bad_subscripts: + return True + + expr_arange = arm.access_range + if expr_arange is None: + continue + + if arange is False: + arange = expr_arange + else: + arange = arange | expr_arange + + return arange + + +def do_access_ranges_overlap_conservative( + kernel, insn1_id, insn1_dir, insn2_id, insn2_dir, var_name): + """Determine whether the access ranges to *var_name* in the two + given instructions overlpa. This determination is made 'conservatively', + i.e. if precise information is unavailable, it is concluded that the + ranges overlap. + + :arg insn1_dir: either ``"w"`` or ``"any"``, to indicate which + type of access is desired--writing or any + :arg insn2_dir: either ``"w"`` or ``"any"`` + :returns: a :class:`bool` + """ + + insn1_arange = _get_access_range_conservative( + kernel, insn1_id, insn1_dir, var_name) + insn2_arange = _get_access_range_conservative( + kernel, insn2_id, insn2_dir, var_name) + + if insn1_arange is False or insn2_arange is False: + return False + if insn1_arange is True or insn2_arange is True: + return True + + return not (insn1_arange & insn2_arange).is_empty() # }}} diff --git a/test/test_apps.py b/test/test_apps.py index 1be7edec1..ff30e3e7a 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -494,7 +494,7 @@ def test_lbm(ctx_factory): end """) - knl = lp.set_options(knl, enforce_variable_access_ordered="no_check") + #knl = lp.set_options(knl, enforce_variable_access_ordered="no_check") knl = lp.add_and_infer_dtypes(knl, {"f": np.float32}) ref_knl = knl diff --git a/test/test_loopy.py b/test/test_loopy.py index e1de0af80..72c52a10e 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -893,8 +893,8 @@ def test_multiple_writes_to_local_temporary(): knl = lp.make_kernel( "{[i,e]: 0<=i<5 and 0<=e<nelements}", """ - <> temp[i, 0] = 17 {nosync_query=writes:temp} - temp[i, 1] = 15 {nosync_query=writes:temp} + <> temp[i, 0] = 17 + temp[i, 1] = 15 """) knl = lp.tag_inames(knl, dict(i="l.0")) diff --git a/test/test_target.py b/test/test_target.py index c143fbbd2..15964987a 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -215,8 +215,6 @@ def test_random123(ctx_factory, tp): out[i, 3] = real.s3 + 1j * imag.s3 """.replace("TYPE", tp)) - knl = lp.add_nosync(knl, "any", "writes:out", "writes:out", force=True) - knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0") knl = lp.set_options(knl, write_cl=True) -- GitLab