diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index 6b80bae2076c38485d3590ad579da3e1aa32f998..f7622936c05207c02f8f7065f44b75f3fbff6221 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -402,7 +402,7 @@ class CKernelExecutor(KernelExecutorBase): if self.kernel.options.write_cl: output = all_code if self.kernel.options.highlight_cl: - output = get_highlighted_code(code=output) + output = get_highlighted_code(output) if self.kernel.options.write_cl is True: print(output) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 2b618a464b5103ee28bceded07dc68f9c376c84d..ad1da3e7e67d9d609f51bfed4db7141d14e508dd 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -139,8 +139,7 @@ class _InameSplitter(RuleAwareIdentityMapper): and self.split_iname not in expn_state.arg_context and self.within( expn_state.kernel, - expn_state.instruction, - expn_state.stack)): + expn_state.instruction)): new_inames = list(expr.inames) new_inames.remove(self.split_iname) new_inames.extend([self.outer_iname, self.inner_iname]) @@ -157,8 +156,7 @@ class _InameSplitter(RuleAwareIdentityMapper): and self.split_iname not in expn_state.arg_context and self.within( expn_state.kernel, - expn_state.instruction, - expn_state.stack)): + expn_state.instruction)): return self.replacement_index else: return super(_InameSplitter, self).map_variable(expr, expn_state) @@ -177,6 +175,22 @@ def _split_iname_backend(kernel, split_iname, for syntax. """ + from loopy.match import parse_match + within = parse_match(within) + + # {{{ return the same kernel if no kernel matches + + def _do_not_transform_if_no_within_matches(): + for insn in kernel.instructions: + if within(kernel, insn): + return + + return kernel + + _do_not_transform_if_no_within_matches() + + # }}} + existing_tags = kernel.iname_tags(split_iname) from loopy.kernel.data import ForceSequentialTag, filter_iname_tags_by_type if (do_tagged_check and existing_tags @@ -230,10 +244,15 @@ def _split_iname_backend(kernel, split_iname, name_dim_type, name_idx = space.get_var_dict()[split_iname] s = s.intersect(fixed_constraint_set) - if within is None: - s = s.project_out(name_dim_type, name_idx, 1) + def _project_out_only_if_all_instructions_in_within(): + for insn in kernel.instructions: + if split_iname in insn.within_inames and ( + not within(kernel, insn)): + return s - return s + return s.project_out(name_dim_type, name_idx, 1) + + return _project_out_only_if_all_instructions_in_within() new_domains = [process_set(dom) for dom in kernel.domains] @@ -249,7 +268,8 @@ def _split_iname_backend(kernel, split_iname, new_insns = [] for insn in kernel.instructions: - if split_iname in insn.within_inames: + if split_iname in insn.within_inames and ( + within(kernel, insn)): new_within_inames = ( (insn.within_inames.copy() - frozenset([split_iname])) @@ -284,9 +304,6 @@ def _split_iname_backend(kernel, split_iname, applied_iname_rewrites=applied_iname_rewrites, loop_priority=frozenset(new_priorities)) - from loopy.match import parse_stack_match - within = parse_stack_match(within) - rule_mapping_context = SubstitutionRuleMappingContext( kernel.substitutions, kernel.get_var_name_generator()) ins = _InameSplitter(rule_mapping_context, within, @@ -329,7 +346,7 @@ def split_iname(kernel, split_iname, inner_length, :arg inner_tag: The iname tag (see :ref:`iname-tags`) to apply to *inner_iname*. :arg within: a stack match as understood by - :func:`loopy.match.parse_stack_match`. + :func:`loopy.match.parse_match`. """ def make_new_loop_index(inner, outer): return inner + outer*inner_length diff --git a/test/test_transform.py b/test/test_transform.py index ed184fb50c099d5fb2a6a0941d2f2c22c3b757bc..394cf668804ed719920e02bc3d20f62971421c2f 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -533,6 +533,23 @@ def test_uniquify_instruction_ids(): assert all(isinstance(id, str) for id in insn_ids) +def test_split_iname_only_if_in_within(): + knl = lp.make_kernel( + "{[i]: 0<=i<10}", + """ + c[i] = 3*d[i] {id=to_split} + a[i] = 2*b[i] {id=not_to_split} + """) + + knl = lp.split_iname(knl, "i", 4, within='id:to_split') + + for insn in knl.instructions: + if insn.id == 'to_split': + assert insn.within_inames == frozenset({'i_outer', 'i_inner'}) + if insn.id == 'not_to_split': + assert insn.within_inames == frozenset({'i'}) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1])