diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 5dff5e53c04521bcd2f53cb2fc971ec12227149c..71b112775095155af455abe200d94dbff5ac0c94 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -467,7 +467,11 @@ class LoopKernel(ImmutableRecordWithoutPickling): discard_level_count = 0 while discard_level_count < len(iname_set_stack): - last_inames = iname_set_stack[-1-discard_level_count] + last_inames = ( + iname_set_stack[-1-discard_level_count]) + if discard_level_count + 1 < len(iname_set_stack): + last_inames = ( + last_inames - iname_set_stack[-2-discard_level_count]) if is_domain_dependent_on_inames(self, dom_idx, last_inames): break diff --git a/loopy/version.py b/loopy/version.py index 6a02f4d99b8ae22a3aa86082ae0becb30b7b6448..fd7c66fa249333ecae66c10d0ce919f806d9a884 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v53-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v54-islpy%s" % _islpy_version diff --git a/test/test_apps.py b/test/test_apps.py index 9eab3fdb1fbc152b65344362d39766793d372d90..cd225c9974ba9f7c85363bd40a79657622c77eff 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -608,6 +608,61 @@ def test_poisson_fem(ctx_factory): parameters=dict(n=5, nels=15, nbf=5, sdim=2, nqp=7)) +def test_domain_tree_nesting(): + # From https://github.com/inducer/loopy/issues/78 + from loopy.kernel.data import temp_var_scope as scopes + + out_map = np.array([1, 2], dtype=np.int32) + if_val = np.array([-1, 0], dtype=np.int32) + vals = np.array([2, 3], dtype=np.int32) + num_vals = np.array([2, 4], dtype=np.int32) + num_vals_offset = np.array(np.cumsum(num_vals) - num_vals, dtype=np.int32) + + TV = lp.TemporaryVariable # noqa + + knl = lp.make_kernel(['{[i]: 0 <= i < 12}', + '{[j]: 0 <= j < 100}', + '{[a_count]: 0 <= a_count < a_end}', + '{[b_count]: 0 <= b_count < b_end}'], + """ + for j + for i + <> a_end = abs(if_val[i]) + + <>b_end = num_vals[i] + <>offset = num_vals_offset[i] {id=offset} + <>b_sum = 0 {id=b_init} + for b_count + <>val = vals[offset + b_count] {dep=offset} + end + b_sum = exp(b_sum) {id=b_final, dep=b_accum} + + out[j,i] = b_sum {dep=a_accum:b_final} + end + end + """, + [ + TV('out_map', initializer=out_map, read_only=True, scope=scopes.PRIVATE), + TV('if_val', initializer=if_val, read_only=True, scope=scopes.PRIVATE), + TV('vals', initializer=vals, read_only=True, scope=scopes.PRIVATE), + TV('num_vals', initializer=num_vals, read_only=True, scope=scopes.PRIVATE), + TV('num_vals_offset', initializer=num_vals_offset, read_only=True, + scope=scopes.PRIVATE), + lp.GlobalArg('B', shape=(100, 31), dtype=np.float64), + lp.GlobalArg('out', shape=(100, 12), dtype=np.float64)]) + + parents_per_domain = knl.parents_per_domain() + + def depth(i): + if parents_per_domain[i] is None: + return 0 + else: + return 1 + depth(parents_per_domain[i]) + + for i in range(len(parents_per_domain)): + assert depth(i) < 2 + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1])