diff --git a/doc/misc.rst b/doc/misc.rst index 97bac9fec35d1960f0b8dceb9489f8399b72520c..347b5d098c8dc0e37bb72659c0b0de5a8b4e3704 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -101,6 +101,10 @@ In the meantime, you can generate code simply by saying:: print(cg_result.host_code()) print(cg_result.device_code()) +Additionally, for C-based languages, header defintions are available via:: + + loopy.generate_header(knl) + For what types of codes does :mod:`loopy` work well? ---------------------------------------------------- diff --git a/doc/ref_transform.rst b/doc/ref_transform.rst index f16f8bfdbb26b716af27762d8502bff592496d7c..5609fc25328ed5b0a243eaa8d33c4d1aa6b3eddc 100644 --- a/doc/ref_transform.rst +++ b/doc/ref_transform.rst @@ -119,6 +119,8 @@ Finishing up .. autofunction:: generate_code_v2 +.. autofunction:: generate_header + Setting options --------------- diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 6c3175dc044aecef0989d69d77b0d67ed807e957..7d544ec477235804da68576298d2bb4ddbe56a6a 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -256,6 +256,14 @@ call :func:`loopy.generate_code`: out[i] = 2.0f * a[i]; } +Additionally, for C-based languages, header definitions can be obtained via +the :func:`loopy.generate_header`: + +.. doctest:: + >>> header = str(lp.generate_header(typed_knl)[0]) + >>> print(header) + __kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel(__global float const *__restrict__ a, int const n, __global float *__restrict__ out); + .. }}} .. _ordering: diff --git a/loopy/__init__.py b/loopy/__init__.py index 73a02479d705b74936097a87746baa83e5495de9..c12d7318c24e4ca1edb327f9710ae2182e5491b0 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -130,7 +130,7 @@ from loopy.frontend.fortran import (c_preprocess, parse_transformed_fortran, parse_fortran) from loopy.target import TargetBase, ASTBuilderBase -from loopy.target.c import CTarget +from loopy.target.c import CTarget, generate_header from loopy.target.cuda import CudaTarget from loopy.target.opencl import OpenCLTarget from loopy.target.pyopencl import PyOpenCLTarget @@ -238,7 +238,9 @@ __all__ = [ "LoopyError", "LoopyWarning", - "TargetBase", "CTarget", "CudaTarget", "OpenCLTarget", + "TargetBase", + "CTarget", "generate_header", + "CudaTarget", "OpenCLTarget", "PyOpenCLTarget", "ISPCTarget", "NumbaTarget", "NumbaCudaTarget", "ASTBuilderBase", diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index e28da7453427425e5db1e80f8dbfe80ae911bef0..8b81efb312d996a2cd972a416ddf7d190e580d13 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -29,7 +29,7 @@ import six import numpy as np # noqa from loopy.target import TargetBase, ASTBuilderBase, DummyHostASTBuilder from loopy.diagnostic import LoopyError -from cgen import Pointer +from cgen import Pointer, NestedDeclarator from cgen.mapper import IdentityMapper as CASTIdentityMapperBase from pymbolic.mapper.stringifier import PREC_NONE from loopy.symbolic import IdentityMapper @@ -132,6 +132,10 @@ class POD(Declarator): mapper_method = "map_loopy_pod" + +class FunctionDeclarationWrapper(NestedDeclarator): + mapper_method = "map_function_decl_wrapper" + # }}} @@ -203,6 +207,10 @@ class CASTIdentityMapper(CASTIdentityMapperBase): def map_loopy_pod(self, node, *args, **kwargs): return type(node)(node.ast_builder, node.dtype, node.name) + def map_function_decl_wrapper(self, node, *args, **kwargs): + return FunctionDeclarationWrapper( + self.rec(node.subdecl, *args, **kwargs)) + class SubscriptSubsetCounter(IdentityMapper): def __init__(self, subset_counters): @@ -378,10 +386,11 @@ class CASTBuilder(ASTBuilderBase): if self.target.fortran_abi: name += "_" - return FunctionDeclaration( - Value("void", name), - [self.idi_to_cgen_declarator(codegen_state.kernel, idi) - for idi in codegen_state.implemented_data_info]) + return FunctionDeclarationWrapper( + FunctionDeclaration( + Value("void", name), + [self.idi_to_cgen_declarator(codegen_state.kernel, idi) + for idi in codegen_state.implemented_data_info])) def get_temporary_decls(self, codegen_state, schedule_index): from loopy.kernel.data import temp_var_scope @@ -754,4 +763,45 @@ class CASTBuilder(ASTBuilderBase): return node +# {{{ header generation + +class CFunctionDeclExtractor(CASTIdentityMapper): + def __init__(self): + self.decls = [] + + def map_expression(self, expr): + return expr + + def map_function_decl_wrapper(self, node): + self.decls.append(node.subdecl) + return super(CFunctionDeclExtractor, self)\ + .map_function_decl_wrapper(node) + + +def generate_header(kernel, codegen_result=None): + """ + :arg kernel: a :class:`loopy.LoopKernel` + :arg codegen_result: an instance of :class:`loopy.CodeGenerationResult` + :returns: a list of AST nodes (which may have :func:`str` + called on them to produce a string) representing + function declarations for the generated device + functions. + """ + + if not isinstance(kernel.target, CTarget): + raise LoopyError( + 'Header generation for non C-based languages are not implemented') + + if codegen_result is None: + from loopy.codegen import generate_code_v2 + codegen_result = generate_code_v2(kernel) + + fde = CFunctionDeclExtractor() + for dev_prg in codegen_result.device_programs: + fde(dev_prg.ast) + + return fde.decls + +# }}} + # vim: foldmethod=marker diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index d31718f15ed563bba0b602e6017536b72b6deed0..51ccc301290460c49af73691f2ebe812b8a6da9b 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -99,6 +99,7 @@ def _create_vector_types(): vec.types[np.dtype(base_type), count] = dtype vec.type_to_scalar_and_count[dtype] = np.dtype(base_type), count + _create_vector_types() @@ -232,6 +233,10 @@ class CUDACASTBuilder(CASTBuilder): fdecl = super(CUDACASTBuilder, self).get_function_declaration( codegen_state, codegen_result, schedule_index) + from loopy.target.c import FunctionDeclarationWrapper + assert isinstance(fdecl, FunctionDeclarationWrapper) + fdecl = fdecl.subdecl + from cgen.cuda import CudaGlobal, CudaLaunchBounds fdecl = CudaGlobal(fdecl) @@ -254,7 +259,7 @@ class CUDACASTBuilder(CASTBuilder): fdecl = CudaLaunchBounds(nthreads, fdecl) - return fdecl + return FunctionDeclarationWrapper(fdecl) def generate_code(self, kernel, codegen_state, impl_arg_info): code, implemented_domains = ( diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 2c48fb902e746780599f039a44b2d0a5ea787b88..80a69bd00c99258b709ea18b2a716c339b888b02 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -237,16 +237,19 @@ class ISPCASTBuilder(CASTBuilder): arg_names, arg_decls = self._arg_names_and_decls(codegen_state) if codegen_state.is_generating_device_code: - return ISPCTask( + result = ISPCTask( FunctionDeclaration( Value("void", name), arg_decls)) else: - return ISPCExport( + result = ISPCExport( FunctionDeclaration( Value("void", name), arg_decls)) + from loopy.target.c import FunctionDeclarationWrapper + return FunctionDeclarationWrapper(result) + # }}} def get_kernel_call(self, codegen_state, name, gsize, lsize, extra_args): diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index f0436099c6127e6426b03df2c48342b6ee99c67f..31cf7c6b648ebf370a17d8beb2538b9748ddb30a 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -126,6 +126,7 @@ def _create_vector_types(): vec.types[np.dtype(base_type), count] = dtype vec.type_to_scalar_and_count[dtype] = np.dtype(base_type), count + _create_vector_types() @@ -400,6 +401,10 @@ class OpenCLCASTBuilder(CASTBuilder): fdecl = super(OpenCLCASTBuilder, self).get_function_declaration( codegen_state, codegen_result, schedule_index) + from loopy.target.c import FunctionDeclarationWrapper + assert isinstance(fdecl, FunctionDeclarationWrapper) + fdecl = fdecl.subdecl + from cgen.opencl import CLKernel, CLRequiredWorkGroupSize fdecl = CLKernel(fdecl) @@ -415,7 +420,7 @@ class OpenCLCASTBuilder(CASTBuilder): fdecl = CLRequiredWorkGroupSize(local_sizes, fdecl) - return fdecl + return FunctionDeclarationWrapper(fdecl) def generate_top_of_body(self, codegen_state): from loopy.kernel.data import ImageArg diff --git a/test/test_loopy.py b/test/test_loopy.py index ae41779eb7f1ac2ea09b007ca2fabec927f95a3b..69e0ea325a328d03bbd98cec2163f5c7981a4a78 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1587,6 +1587,36 @@ def test_temp_initializer(ctx_factory, src_order, tmp_order): assert np.array_equal(a, a2) +def test_header_extract(): + knl = lp.make_kernel('{[k]: 0<=k 1: exec(sys.argv[1])