diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index e48b8393565c6002f19f7569705fc8579c1d9db7..6b57ccf6e162d3725ee07fc4f97263c511eaf3b8 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -467,7 +467,11 @@ class LoopKernel(ImmutableRecordWithoutPickling): discard_level_count = 0 while discard_level_count < len(iname_set_stack): - last_inames = iname_set_stack[-1-discard_level_count] + last_inames = ( + iname_set_stack[-1-discard_level_count]) + if discard_level_count + 1 < len(iname_set_stack): + last_inames = ( + last_inames - iname_set_stack[-2-discard_level_count]) if is_domain_dependent_on_inames(self, dom_idx, last_inames): break diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 4148d7d752f89e5c3c37825dbce20a883ba08c0d..c078da2ec58dabbbf646bfcf593ea0138941cc85 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1483,21 +1483,19 @@ class DependencyTracker(object): tgt_read = self.map_to_base_storage( target.read_dependency_names() & self.relevant_vars) - for (accessed_vars, accessor_map, ignore_self) in [ - (tgt_read, self.writer_map, False), - (tgt_write, self.reader_map, False), - (tgt_write, self.writer_map, True)]: + for (accessed_vars, accessor_map) in [ + (tgt_read, self.writer_map), + (tgt_write, self.reader_map), + (tgt_write, self.writer_map)]: for dep in self.get_conflicting_accesses( - accessed_vars, accessor_map, ignore_self, target.id): + accessed_vars, accessor_map, target.id): yield dep def get_conflicting_accesses( - self, accessed_vars, var_to_accessor_map, ignore_self, target): + self, accessed_vars, var_to_accessor_map, target): def determine_conflict_nature(source, target): - if ignore_self and source == target: - return None if (not self.reverse and source in self.kernel.get_nosync_set(target, scope=self.var_kind)): return None diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 99a16bfc23341dba3d28c71038681c31d3e00dba..4c1e423e93e104fecd0b49a2b1ef2b4a261e38e7 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -38,6 +38,12 @@ import logging logger = logging.getLogger(__name__) +def _debug(kernel, s, *args): + if logger.isEnabledFor(logging.DEBUG): + logstr = s % args + logger.debug("%s: %s" % (kernel.name, logstr)) + + # {{{ type inference mapper class TypeInferenceMapper(CombineMapper): @@ -378,8 +384,8 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander): if var_name in kernel.all_params(): return [kernel.index_dtype], [] - def debug(s): - logger.debug("%s: %s" % (kernel.name, s)) + from functools import partial + debug = partial(_debug, kernel) dtype_sets = [] @@ -394,7 +400,7 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander): expr = subst_expander(writer_insn.expression) - debug(" via expr %s" % expr) + debug(" via expr %s", expr) if isinstance(writer_insn, lp.Assignment): result = type_inf_mapper(expr, return_dtype_set=True) elif isinstance(writer_insn, lp.CallInstruction): @@ -416,7 +422,7 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander): if result_i is not None: result.append(result_i) - debug(" result: %s" % result) + debug(" result: %s", result) dtype_sets.append(result) @@ -457,12 +463,12 @@ def infer_unknown_types(kernel, expect_completion=False): logger.debug("%s: infer types" % kernel.name) + from functools import partial + debug = partial(_debug, kernel) + import time start_time = time.time() - def debug(s): - logger.debug("%s: %s" % (kernel.name, s)) - unexpanded_kernel = kernel if kernel.substitutions: from loopy.transform.subst import expand_subst @@ -542,7 +548,7 @@ def infer_unknown_types(kernel, expect_completion=False): name = queue.pop(0) item = item_lookup[name] - debug("inferring type for %s %s" % (type(item).__name__, item.name)) + debug("inferring type for %s %s", type(item).__name__, item.name) result, symbols_with_unavailable_types = ( _infer_var_type( @@ -551,9 +557,9 @@ def infer_unknown_types(kernel, expect_completion=False): failed = not result if not failed: new_dtype, = result - debug(" success: %s" % new_dtype) + debug(" success: %s", new_dtype) if new_dtype != item.dtype: - debug(" changed from: %s" % item.dtype) + debug(" changed from: %s", item.dtype) changed_during_last_queue_run = True if isinstance(item, TemporaryVariable): diff --git a/loopy/version.py b/loopy/version.py index 6a02f4d99b8ae22a3aa86082ae0becb30b7b6448..fd7c66fa249333ecae66c10d0ce919f806d9a884 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v53-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v54-islpy%s" % _islpy_version diff --git a/test/test_apps.py b/test/test_apps.py index 9eab3fdb1fbc152b65344362d39766793d372d90..cd225c9974ba9f7c85363bd40a79657622c77eff 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -608,6 +608,61 @@ def test_poisson_fem(ctx_factory): parameters=dict(n=5, nels=15, nbf=5, sdim=2, nqp=7)) +def test_domain_tree_nesting(): + # From https://github.com/inducer/loopy/issues/78 + from loopy.kernel.data import temp_var_scope as scopes + + out_map = np.array([1, 2], dtype=np.int32) + if_val = np.array([-1, 0], dtype=np.int32) + vals = np.array([2, 3], dtype=np.int32) + num_vals = np.array([2, 4], dtype=np.int32) + num_vals_offset = np.array(np.cumsum(num_vals) - num_vals, dtype=np.int32) + + TV = lp.TemporaryVariable # noqa + + knl = lp.make_kernel(['{[i]: 0 <= i < 12}', + '{[j]: 0 <= j < 100}', + '{[a_count]: 0 <= a_count < a_end}', + '{[b_count]: 0 <= b_count < b_end}'], + """ + for j + for i + <> a_end = abs(if_val[i]) + + <>b_end = num_vals[i] + <>offset = num_vals_offset[i] {id=offset} + <>b_sum = 0 {id=b_init} + for b_count + <>val = vals[offset + b_count] {dep=offset} + end + b_sum = exp(b_sum) {id=b_final, dep=b_accum} + + out[j,i] = b_sum {dep=a_accum:b_final} + end + end + """, + [ + TV('out_map', initializer=out_map, read_only=True, scope=scopes.PRIVATE), + TV('if_val', initializer=if_val, read_only=True, scope=scopes.PRIVATE), + TV('vals', initializer=vals, read_only=True, scope=scopes.PRIVATE), + TV('num_vals', initializer=num_vals, read_only=True, scope=scopes.PRIVATE), + TV('num_vals_offset', initializer=num_vals_offset, read_only=True, + scope=scopes.PRIVATE), + lp.GlobalArg('B', shape=(100, 31), dtype=np.float64), + lp.GlobalArg('out', shape=(100, 12), dtype=np.float64)]) + + parents_per_domain = knl.parents_per_domain() + + def depth(i): + if parents_per_domain[i] is None: + return 0 + else: + return 1 + depth(parents_per_domain[i]) + + for i in range(len(parents_per_domain)): + assert depth(i) < 2 + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1])