diff --git a/loopy/check.py b/loopy/check.py index 977571fcfde848c3cf3357df8ec80738c7c47196..796c5b4bd056a48515c480939b5b55d07ac1854f 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -185,8 +185,9 @@ def _get_all_unique_iname_tags(kernel): *kernel* that inherit from :class:`loopy.kernel.data.UniqueTag`. """ from loopy.kernel.data import UniqueTag - iname_tags = [kernel.iname_to_tag.get(iname) for iname in - kernel.all_inames()] + from itertools import chain + iname_tags = list(chain(*(kernel.iname_to_tags.get(iname, []) for iname in + kernel.all_inames()))) return set( tag for tag in iname_tags if isinstance(tag, UniqueTag)) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 8f3e15f28954cc73582e6b2126e7e045cca85dea..f7f0c2902c2f71e4c9f9fb8c05dc971fe05b278e 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -580,6 +580,7 @@ def generate_code_v2(program): """ from loopy.kernel import LoopKernel from loopy.program import make_program + from cgen import FunctionBody if isinstance(program, LoopKernel): program = make_program(program) @@ -620,7 +621,15 @@ def generate_code_v2(program): callee_prog_ast = callee_cgr.device_programs[0].ast collective_device_program = collective_device_program.copy( ast=Collection([callee_prog_ast, collective_device_program.ast])) - callee_fdecls.append(callee_prog_ast.fdecl) + if isinstance(callee_prog_ast, Collection): + # if there is a read only constant in the kernel + for entry in callee_prog_ast.contents: + if isinstance(entry, FunctionBody): + callee_fdecls.append(entry.fdecl) + elif isinstance(callee_prog_ast, FunctionBody): + callee_fdecls.append(callee_prog_ast.fdecl) + else: + raise NotImplementedError() # collecting the function declarations of callee kernels for callee_fdecl in callee_fdecls: diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 6682b6ec34172905582c58390a508f60acb69aab..4644935e0266bfad53ee756e27a1cbdeed56da6d 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -579,9 +579,13 @@ class CASTBuilder(ASTBuilderBase): if self.target.fortran_abi: name += "_" + if codegen_state.kernel.is_called_from_host: + name = Value("void", name) + else: + name = Value("static void", name) return FunctionDeclarationWrapper( FunctionDeclaration( - Value("void", name), + name, [self.idi_to_cgen_declarator(codegen_state.kernel, idi) for idi in codegen_state.implemented_data_info])) diff --git a/loopy/transform/pack_and_unpack_args.py b/loopy/transform/pack_and_unpack_args.py index 67ea48326794bb576040ae336daac36daafae7d5..a18326187379cac0b4be46bbfe244bcc2d9e7684 100644 --- a/loopy/transform/pack_and_unpack_args.py +++ b/loopy/transform/pack_and_unpack_args.py @@ -121,8 +121,9 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, from pymbolic import var dim_type = isl.dim_type.set - ilp_inames = set(iname for iname in insn.within_inames if isinstance( - kernel.iname_to_tag.get(iname), (IlpBaseTag, VectorizeTag))) + ilp_inames = set(iname for iname in insn.within_inames + if all(isinstance(tag, (IlpBaseTag, VectorizeTag)) + for tag in kernel.iname_to_tags.get(iname, []))) new_ilp_inames = set() ilp_inames_map = {} for iname in ilp_inames: