From e71a4b9e883fd01f3adedab56bf91d68dee382a2 Mon Sep 17 00:00:00 2001 From: arghdos Date: Thu, 10 Nov 2016 14:48:31 -0500 Subject: [PATCH 01/14] first test --- loopy/__init__.py | 2 +- loopy/codegen/__init__.py | 25 +++++++++++++++++++++++++ test/test_loopy.py | 19 +++++++++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 73a02479d..9b6071105 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -119,7 +119,7 @@ from loopy.statistics import (get_op_poly, sum_ops_to_dtypes, gather_access_footprints, gather_access_footprint_bytes) from loopy.codegen import ( PreambleInfo, - generate_code, generate_code_v2, generate_body) + generate_code, generate_code_v2, generate_body, generate_header) from loopy.codegen.result import ( GeneratedProgram, CodeGenerationResult) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index ffd291d53..387cc87df 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -550,4 +550,29 @@ def generate_body(kernel): # }}} +# {{{ extract function header + +from loopy.target.c import CASTIdentityMapper +class FunctionDeclExtractor(CASTIdentityMapper): + def __init__(self): + self.decls = [] + + def map_function_declaration(self, node): + self.decls.append(node) + return super(self.__class__, self).map_function_declaration(node) + +def generate_header(kernel): + codegen_result = generate_code_v2(kernel) + + if len(codegen_result.device_programs) != 1: + raise LoopyError("generate_header cannot be used on programs " + "that yield more than one device program") + + dev_prg, = codegen_result.device_programs + + fde = FunctionDeclExtractor() + return fde(dev_prg.ast) + +# }}} + # vim: foldmethod=marker diff --git a/test/test_loopy.py b/test/test_loopy.py index e0e619a1c..b57915932 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1586,6 +1586,25 @@ def test_temp_initializer(ctx_factory, src_order, tmp_order): assert np.array_equal(a, a2) +def test_header_extract(ctx_factory): + ctx = ctx_factory() + + knl = lp.make_kernel('{[k]: 0<=k Date: Tue, 15 Nov 2016 12:11:16 -0500 Subject: [PATCH 02/14] add decl_ast to the GeneratedProgram creation, and modify the header gen to use it --- loopy/codegen/__init__.py | 8 +------- loopy/codegen/result.py | 5 +++++ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 387cc87df..3a4477664 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -550,8 +550,6 @@ def generate_body(kernel): # }}} -# {{{ extract function header - from loopy.target.c import CASTIdentityMapper class FunctionDeclExtractor(CASTIdentityMapper): def __init__(self): @@ -571,8 +569,4 @@ def generate_header(kernel): dev_prg, = codegen_result.device_programs fde = FunctionDeclExtractor() - return fde(dev_prg.ast) - -# }}} - -# vim: foldmethod=marker + return str(fde(dev_prg.decl_ast)) diff --git a/loopy/codegen/result.py b/loopy/codegen/result.py index 04fab05af..29280ac2b 100644 --- a/loopy/codegen/result.py +++ b/loopy/codegen/result.py @@ -61,6 +61,10 @@ class GeneratedProgram(Record): Once generated, this captures the AST of the operative function body (including declaration of necessary temporaries), but not the overall function definition. + .. attribute:: decl_ast + Once generated, this captures the AST of the operative function + definition but not the function body or declaration of necessary + temporaries. """ @@ -310,6 +314,7 @@ def generate_host_or_device_program(codegen_state, schedule_index): codegen_state, cur_prog.copy( ast=ast_builder.process_ast(fdef_ast), + decl_ast=ast_builder.process_ast(fdecl_ast), body_ast=ast_builder.process_ast(body_ast))) return codegen_result -- GitLab From 24e47664822d07858310eae2cdc9914b0a1c3350 Mon Sep 17 00:00:00 2001 From: arghdos Date: Tue, 15 Nov 2016 12:11:26 -0500 Subject: [PATCH 03/14] fix test, add more target tests --- test/test_loopy.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/test/test_loopy.py b/test/test_loopy.py index b57915932..1d29fd4dc 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1595,7 +1595,7 @@ def test_header_extract(ctx_factory): T[k] = k**2 end """, - [lp.ConstantArg('T', shape=(200,), dtype=np.float32), + [lp.GlobalArg('T', shape=(200,), dtype=np.float32), '...']) knl = lp.fix_parameters(knl, n=200) @@ -1603,8 +1603,17 @@ def test_header_extract(ctx_factory): #test C cknl = knl cknl.target = lp.CTarget() - assert lp.generate_header(cknl) == 'void loopy_kernel(float* T)' + assert lp.generate_header(cknl) == 'void loopy_kernel(float *restrict T);' + + #test CUDA + cuknl = knl + cuknl.target = lp.CudaTarget() + assert lp.generate_header(cuknl) == 'extern "C" __global__ void __launch_bounds__(1) loopy_kernel(float *__restrict__ T);' + #test OpenCL + oclknl = knl + oclknl.target = lp.PyOpenCLTarget() + assert lp.generate_header(oclknl) == '__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel(__global float *restrict T);' def test_base_storage_decl(): knl = lp.make_kernel( -- GitLab From 5d16a4d47965acf1b3710528465bacbff1725d9b Mon Sep 17 00:00:00 2001 From: arghdos Date: Tue, 15 Nov 2016 16:11:18 -0500 Subject: [PATCH 04/14] re-add the accidentially vim foldmethod --- loopy/codegen/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 3a4477664..ee9871907 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -570,3 +570,5 @@ def generate_header(kernel): fde = FunctionDeclExtractor() return str(fde(dev_prg.decl_ast)) + +# vim: foldmethod=marker \ No newline at end of file -- GitLab From 1defaf6b7b57a6e7058245f5a4527cedfaf94464 Mon Sep 17 00:00:00 2001 From: arghdos Date: Tue, 15 Nov 2016 16:13:24 -0500 Subject: [PATCH 05/14] move to top of file for now, even if incorrect import --- loopy/codegen/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index ee9871907..467d07f14 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -31,6 +31,7 @@ import islpy as isl from pytools.persistent_dict import PersistentDict from loopy.tools import LoopyKeyBuilder from loopy.version import DATA_MODEL_VERSION +from loopy.target.c import CASTIdentityMapper import logging logger = logging.getLogger(__name__) @@ -550,7 +551,6 @@ def generate_body(kernel): # }}} -from loopy.target.c import CASTIdentityMapper class FunctionDeclExtractor(CASTIdentityMapper): def __init__(self): self.decls = [] -- GitLab From 95f229a8fe4727b901f0437e765e5dfece4e6782 Mon Sep 17 00:00:00 2001 From: arghdos Date: Tue, 15 Nov 2016 16:15:51 -0500 Subject: [PATCH 06/14] Move to C Target file --- loopy/codegen/__init__.py | 9 --------- loopy/target/c/__init__.py | 7 +++++++ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 467d07f14..d55d385c3 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -31,7 +31,6 @@ import islpy as isl from pytools.persistent_dict import PersistentDict from loopy.tools import LoopyKeyBuilder from loopy.version import DATA_MODEL_VERSION -from loopy.target.c import CASTIdentityMapper import logging logger = logging.getLogger(__name__) @@ -551,14 +550,6 @@ def generate_body(kernel): # }}} -class FunctionDeclExtractor(CASTIdentityMapper): - def __init__(self): - self.decls = [] - - def map_function_declaration(self, node): - self.decls.append(node) - return super(self.__class__, self).map_function_declaration(node) - def generate_header(kernel): codegen_result = generate_code_v2(kernel) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index de0fa01fc..dd414db58 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -755,5 +755,12 @@ class CASTBuilder(ASTBuilderBase): sc(node) return node +class FunctionDeclExtractor(CASTIdentityMapper): + def __init__(self): + self.decls = [] + + def map_function_declaration(self, node): + self.decls.append(node) + return super(self.__class__, self).map_function_declaration(node) # vim: foldmethod=marker -- GitLab From 52c975a8776686261cf74c2892f6ace1a769b881 Mon Sep 17 00:00:00 2001 From: arghdos Date: Tue, 15 Nov 2016 16:19:29 -0500 Subject: [PATCH 07/14] add support for multiple headers --- loopy/codegen/__init__.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index d55d385c3..d7b987735 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -553,13 +553,11 @@ def generate_body(kernel): def generate_header(kernel): codegen_result = generate_code_v2(kernel) - if len(codegen_result.device_programs) != 1: - raise LoopyError("generate_header cannot be used on programs " - "that yield more than one device program") - - dev_prg, = codegen_result.device_programs - + headers = [] fde = FunctionDeclExtractor() - return str(fde(dev_prg.decl_ast)) + for dev_prg, _ in codegen_result.device_programs: + headers.append(str(fde(dev_prg.decl_ast))) + + return '\n'.join(headers) # vim: foldmethod=marker \ No newline at end of file -- GitLab From f014254de15890342184d0debeda62723f6c8315 Mon Sep 17 00:00:00 2001 From: arghdos Date: Tue, 15 Nov 2016 16:20:24 -0500 Subject: [PATCH 08/14] add newline --- loopy/codegen/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index d7b987735..811f5ca5a 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -560,4 +560,4 @@ def generate_header(kernel): return '\n'.join(headers) -# vim: foldmethod=marker \ No newline at end of file +# vim: foldmethod=marker -- GitLab From f42ccaf65fea89a62dfe56fa2b73888d1fe9e0ac Mon Sep 17 00:00:00 2001 From: arghdos Date: Tue, 15 Nov 2016 16:37:57 -0500 Subject: [PATCH 09/14] make the device_decl_extractor a part of the Target class, which is not implemented except for c-based languaged --- loopy/codegen/__init__.py | 8 ++++++++ loopy/target/__init__.py | 7 +++++++ loopy/target/c/__init__.py | 5 ++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 811f5ca5a..f43dfb9f5 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -551,6 +551,14 @@ def generate_body(kernel): # }}} def generate_header(kernel): + try: + fde = kernel.target.get_device_decl_extractor() + except NotImplementedError: + from warnings import warn + warn('Header generation for non C-based languages are not implemented', + RuntimeWarning, stacklevel=2) + return None + codegen_result = generate_code_v2(kernel) headers = [] diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 409b9badb..fc80fc3d9 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -138,6 +138,13 @@ class TargetBase(object): """ raise NotImplementedError() + def get_device_decl_extractor(self): + """ + :returns: a FunctionDeclExtractor (if implemented) that extracts a device function's + header definition + """ + raise NotImplementedError() + class ASTBuilderBase(object): """An interface for generating (host or device) ASTs. diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index dd414db58..aa54fd423 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -259,6 +259,9 @@ class CTarget(TargetBase): def get_device_ast_builder(self): return CASTBuilder(self) + def get_device_decl_extractor(self): + return CFunctionDeclExtractor() + # {{{ types @memoize_method @@ -755,7 +758,7 @@ class CASTBuilder(ASTBuilderBase): sc(node) return node -class FunctionDeclExtractor(CASTIdentityMapper): +class CFunctionDeclExtractor(CASTIdentityMapper): def __init__(self): self.decls = [] -- GitLab From d71048f9e0e6c29a3c44b9c491577321972f7278 Mon Sep 17 00:00:00 2001 From: arghdos Date: Tue, 15 Nov 2016 16:40:20 -0500 Subject: [PATCH 10/14] remove extraneous defn --- loopy/codegen/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index f43dfb9f5..c0e64f680 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -562,7 +562,6 @@ def generate_header(kernel): codegen_result = generate_code_v2(kernel) headers = [] - fde = FunctionDeclExtractor() for dev_prg, _ in codegen_result.device_programs: headers.append(str(fde(dev_prg.decl_ast))) -- GitLab From 953d27a1ac131b09882573d4d66cff194ca8dcb7 Mon Sep 17 00:00:00 2001 From: arghdos Date: Tue, 15 Nov 2016 16:41:59 -0500 Subject: [PATCH 11/14] fix bad iteration --- loopy/codegen/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index c0e64f680..c42fd2339 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -562,7 +562,7 @@ def generate_header(kernel): codegen_result = generate_code_v2(kernel) headers = [] - for dev_prg, _ in codegen_result.device_programs: + for dev_prg in codegen_result.device_programs: headers.append(str(fde(dev_prg.decl_ast))) return '\n'.join(headers) -- GitLab From c45b7d6f235f47c9cd02979edf7d257fb940b7fe Mon Sep 17 00:00:00 2001 From: arghdos Date: Tue, 15 Nov 2016 16:53:23 -0500 Subject: [PATCH 12/14] add generate_kernel to doc's --- doc/misc.rst | 4 ++++ doc/tutorial.rst | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/doc/misc.rst b/doc/misc.rst index 97bac9fec..347b5d098 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -101,6 +101,10 @@ In the meantime, you can generate code simply by saying:: print(cg_result.host_code()) print(cg_result.device_code()) +Additionally, for C-based languages, header defintions are available via:: + + loopy.generate_header(knl) + For what types of codes does :mod:`loopy` work well? ---------------------------------------------------- diff --git a/doc/tutorial.rst b/doc/tutorial.rst index fa6fcc950..ebdd2dd29 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -256,6 +256,14 @@ call :func:`loopy.generate_code`: out[i] = 2.0f * a[i]; } +Additionally, for C-based languages, header definitions can be obtained via +the :func:`loopy.generate_header`: + +.. doctest:: + >>> header = lp.generate_header(typed_knl) + >>> print(header) + __kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel(__global float const *__restrict__ a, int const n, __global float *__restrict__ out); + .. }}} .. _ordering: -- GitLab From 64734d3cc3a8e510b02e6ef16a89a1c946359c01 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 16 Nov 2016 22:48:56 -0600 Subject: [PATCH 13/14] Fix up header generation to be less invasive, document --- doc/ref_transform.rst | 2 ++ loopy/__init__.py | 8 +++-- loopy/codegen/__init__.py | 17 ----------- loopy/codegen/result.py | 5 --- loopy/target/__init__.py | 7 ----- loopy/target/c/__init__.py | 62 +++++++++++++++++++++++++++++++------- loopy/target/cuda.py | 7 ++++- loopy/target/ispc.py | 7 +++-- loopy/target/opencl.py | 7 ++++- test/test_loopy.py | 25 ++++++++------- 10 files changed, 89 insertions(+), 58 deletions(-) diff --git a/doc/ref_transform.rst b/doc/ref_transform.rst index f16f8bfdb..5609fc253 100644 --- a/doc/ref_transform.rst +++ b/doc/ref_transform.rst @@ -119,6 +119,8 @@ Finishing up .. autofunction:: generate_code_v2 +.. autofunction:: generate_header + Setting options --------------- diff --git a/loopy/__init__.py b/loopy/__init__.py index 9b6071105..c12d7318c 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -119,7 +119,7 @@ from loopy.statistics import (get_op_poly, sum_ops_to_dtypes, gather_access_footprints, gather_access_footprint_bytes) from loopy.codegen import ( PreambleInfo, - generate_code, generate_code_v2, generate_body, generate_header) + generate_code, generate_code_v2, generate_body) from loopy.codegen.result import ( GeneratedProgram, CodeGenerationResult) @@ -130,7 +130,7 @@ from loopy.frontend.fortran import (c_preprocess, parse_transformed_fortran, parse_fortran) from loopy.target import TargetBase, ASTBuilderBase -from loopy.target.c import CTarget +from loopy.target.c import CTarget, generate_header from loopy.target.cuda import CudaTarget from loopy.target.opencl import OpenCLTarget from loopy.target.pyopencl import PyOpenCLTarget @@ -238,7 +238,9 @@ __all__ = [ "LoopyError", "LoopyWarning", - "TargetBase", "CTarget", "CudaTarget", "OpenCLTarget", + "TargetBase", + "CTarget", "generate_header", + "CudaTarget", "OpenCLTarget", "PyOpenCLTarget", "ISPCTarget", "NumbaTarget", "NumbaCudaTarget", "ASTBuilderBase", diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index c42fd2339..ffd291d53 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -550,21 +550,4 @@ def generate_body(kernel): # }}} -def generate_header(kernel): - try: - fde = kernel.target.get_device_decl_extractor() - except NotImplementedError: - from warnings import warn - warn('Header generation for non C-based languages are not implemented', - RuntimeWarning, stacklevel=2) - return None - - codegen_result = generate_code_v2(kernel) - - headers = [] - for dev_prg in codegen_result.device_programs: - headers.append(str(fde(dev_prg.decl_ast))) - - return '\n'.join(headers) - # vim: foldmethod=marker diff --git a/loopy/codegen/result.py b/loopy/codegen/result.py index 29280ac2b..04fab05af 100644 --- a/loopy/codegen/result.py +++ b/loopy/codegen/result.py @@ -61,10 +61,6 @@ class GeneratedProgram(Record): Once generated, this captures the AST of the operative function body (including declaration of necessary temporaries), but not the overall function definition. - .. attribute:: decl_ast - Once generated, this captures the AST of the operative function - definition but not the function body or declaration of necessary - temporaries. """ @@ -314,7 +310,6 @@ def generate_host_or_device_program(codegen_state, schedule_index): codegen_state, cur_prog.copy( ast=ast_builder.process_ast(fdef_ast), - decl_ast=ast_builder.process_ast(fdecl_ast), body_ast=ast_builder.process_ast(body_ast))) return codegen_result diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index fc80fc3d9..409b9badb 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -138,13 +138,6 @@ class TargetBase(object): """ raise NotImplementedError() - def get_device_decl_extractor(self): - """ - :returns: a FunctionDeclExtractor (if implemented) that extracts a device function's - header definition - """ - raise NotImplementedError() - class ASTBuilderBase(object): """An interface for generating (host or device) ASTs. diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 513386a58..8b81efb31 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -29,7 +29,7 @@ import six import numpy as np # noqa from loopy.target import TargetBase, ASTBuilderBase, DummyHostASTBuilder from loopy.diagnostic import LoopyError -from cgen import Pointer +from cgen import Pointer, NestedDeclarator from cgen.mapper import IdentityMapper as CASTIdentityMapperBase from pymbolic.mapper.stringifier import PREC_NONE from loopy.symbolic import IdentityMapper @@ -132,6 +132,10 @@ class POD(Declarator): mapper_method = "map_loopy_pod" + +class FunctionDeclarationWrapper(NestedDeclarator): + mapper_method = "map_function_decl_wrapper" + # }}} @@ -203,6 +207,10 @@ class CASTIdentityMapper(CASTIdentityMapperBase): def map_loopy_pod(self, node, *args, **kwargs): return type(node)(node.ast_builder, node.dtype, node.name) + def map_function_decl_wrapper(self, node, *args, **kwargs): + return FunctionDeclarationWrapper( + self.rec(node.subdecl, *args, **kwargs)) + class SubscriptSubsetCounter(IdentityMapper): def __init__(self, subset_counters): @@ -259,9 +267,6 @@ class CTarget(TargetBase): def get_device_ast_builder(self): return CASTBuilder(self) - def get_device_decl_extractor(self): - return CFunctionDeclExtractor() - # {{{ types @memoize_method @@ -381,10 +386,11 @@ class CASTBuilder(ASTBuilderBase): if self.target.fortran_abi: name += "_" - return FunctionDeclaration( - Value("void", name), - [self.idi_to_cgen_declarator(codegen_state.kernel, idi) - for idi in codegen_state.implemented_data_info]) + return FunctionDeclarationWrapper( + FunctionDeclaration( + Value("void", name), + [self.idi_to_cgen_declarator(codegen_state.kernel, idi) + for idi in codegen_state.implemented_data_info])) def get_temporary_decls(self, codegen_state, schedule_index): from loopy.kernel.data import temp_var_scope @@ -756,12 +762,46 @@ class CASTBuilder(ASTBuilderBase): sc(node) return node + +# {{{ header generation + class CFunctionDeclExtractor(CASTIdentityMapper): def __init__(self): self.decls = [] - def map_function_declaration(self, node): - self.decls.append(node) - return super(self.__class__, self).map_function_declaration(node) + def map_expression(self, expr): + return expr + + def map_function_decl_wrapper(self, node): + self.decls.append(node.subdecl) + return super(CFunctionDeclExtractor, self)\ + .map_function_decl_wrapper(node) + + +def generate_header(kernel, codegen_result=None): + """ + :arg kernel: a :class:`loopy.LoopKernel` + :arg codegen_result: an instance of :class:`loopy.CodeGenerationResult` + :returns: a list of AST nodes (which may have :func:`str` + called on them to produce a string) representing + function declarations for the generated device + functions. + """ + + if not isinstance(kernel.target, CTarget): + raise LoopyError( + 'Header generation for non C-based languages are not implemented') + + if codegen_result is None: + from loopy.codegen import generate_code_v2 + codegen_result = generate_code_v2(kernel) + + fde = CFunctionDeclExtractor() + for dev_prg in codegen_result.device_programs: + fde(dev_prg.ast) + + return fde.decls + +# }}} # vim: foldmethod=marker diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index d31718f15..51ccc3012 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -99,6 +99,7 @@ def _create_vector_types(): vec.types[np.dtype(base_type), count] = dtype vec.type_to_scalar_and_count[dtype] = np.dtype(base_type), count + _create_vector_types() @@ -232,6 +233,10 @@ class CUDACASTBuilder(CASTBuilder): fdecl = super(CUDACASTBuilder, self).get_function_declaration( codegen_state, codegen_result, schedule_index) + from loopy.target.c import FunctionDeclarationWrapper + assert isinstance(fdecl, FunctionDeclarationWrapper) + fdecl = fdecl.subdecl + from cgen.cuda import CudaGlobal, CudaLaunchBounds fdecl = CudaGlobal(fdecl) @@ -254,7 +259,7 @@ class CUDACASTBuilder(CASTBuilder): fdecl = CudaLaunchBounds(nthreads, fdecl) - return fdecl + return FunctionDeclarationWrapper(fdecl) def generate_code(self, kernel, codegen_state, impl_arg_info): code, implemented_domains = ( diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 2c48fb902..80a69bd00 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -237,16 +237,19 @@ class ISPCASTBuilder(CASTBuilder): arg_names, arg_decls = self._arg_names_and_decls(codegen_state) if codegen_state.is_generating_device_code: - return ISPCTask( + result = ISPCTask( FunctionDeclaration( Value("void", name), arg_decls)) else: - return ISPCExport( + result = ISPCExport( FunctionDeclaration( Value("void", name), arg_decls)) + from loopy.target.c import FunctionDeclarationWrapper + return FunctionDeclarationWrapper(result) + # }}} def get_kernel_call(self, codegen_state, name, gsize, lsize, extra_args): diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index f0436099c..31cf7c6b6 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -126,6 +126,7 @@ def _create_vector_types(): vec.types[np.dtype(base_type), count] = dtype vec.type_to_scalar_and_count[dtype] = np.dtype(base_type), count + _create_vector_types() @@ -400,6 +401,10 @@ class OpenCLCASTBuilder(CASTBuilder): fdecl = super(OpenCLCASTBuilder, self).get_function_declaration( codegen_state, codegen_result, schedule_index) + from loopy.target.c import FunctionDeclarationWrapper + assert isinstance(fdecl, FunctionDeclarationWrapper) + fdecl = fdecl.subdecl + from cgen.opencl import CLKernel, CLRequiredWorkGroupSize fdecl = CLKernel(fdecl) @@ -415,7 +420,7 @@ class OpenCLCASTBuilder(CASTBuilder): fdecl = CLRequiredWorkGroupSize(local_sizes, fdecl) - return fdecl + return FunctionDeclarationWrapper(fdecl) def generate_top_of_body(self, codegen_state): from loopy.kernel.data import ImageArg diff --git a/test/test_loopy.py b/test/test_loopy.py index 9f1b6ea8e..69e0ea325 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1586,9 +1586,8 @@ def test_temp_initializer(ctx_factory, src_order, tmp_order): assert np.array_equal(a, a2) -def test_header_extract(ctx_factory): - ctx = ctx_factory() +def test_header_extract(): knl = lp.make_kernel('{[k]: 0<=k 1: exec(sys.argv[1]) -- GitLab From 23b32c260744073cf800690c403105f1762f61a5 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 16 Nov 2016 23:05:50 -0600 Subject: [PATCH 14/14] Fix header generation doctest --- doc/tutorial.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 31dafac0f..7d544ec47 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -260,7 +260,7 @@ Additionally, for C-based languages, header definitions can be obtained via the :func:`loopy.generate_header`: .. doctest:: - >>> header = lp.generate_header(typed_knl) + >>> header = str(lp.generate_header(typed_knl)[0]) >>> print(header) __kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel(__global float const *__restrict__ a, int const n, __global float *__restrict__ out); -- GitLab