diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 6b0ba25b3b0d0f3ff4dbfe790fc31a1369c3cdeb..80536d912e7d426feca598ac4b07cd16223b0307 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -28,7 +28,7 @@ import six import numpy as np # noqa from loopy.kernel.data import CallMangleInfo -from loopy.target import TargetBase, ASTBuilderBase +from loopy.target import TargetBase, ASTBuilderBase, DummyHostASTBuilder from loopy.diagnostic import LoopyError, LoopyTypeError from cgen import Pointer, NestedDeclarator, Block from cgen.mapper import IdentityMapper as CASTIdentityMapperBase @@ -270,7 +270,7 @@ class CTarget(TargetBase): return False def get_host_ast_builder(self): - return CASTBuilder(self) + return DummyHostASTBuilder(self) def get_device_ast_builder(self): return CASTBuilder(self) @@ -312,9 +312,10 @@ class CTarget(TargetBase): # {{{ executable c target class ExecutableCTarget(CTarget): - """An executable CTarget that uses (by default) JIT compilation of C-code - via :mod:`codepy`. """ - from .c_execution import CCompiler + """ + An executable CTarget that uses (by default) JIT compilation of C-code + """ + from loopy.target.c.c_execution import CCompiler def __init__(self, compiler=CCompiler(), fortran_abi=False): super(ExecutableCTarget, self).__init__(fortran_abi=fortran_abi) @@ -324,6 +325,10 @@ class ExecutableCTarget(CTarget): from loopy.target.c.c_execution import CKernelExecutor return CKernelExecutor(knl, compiler=self.compiler) + def get_host_ast_builder(self): + # enable host code generation + return CASTBuilder(self) + # }}} @@ -465,7 +470,7 @@ class CASTBuilder(ASTBuilderBase): # We only need to write declarations for global variables with # the first device program. `is_first_dev_prog` determines # whether this is the first device program in the schedule. - is_first_dev_prog = True + is_first_dev_prog = codegen_state.is_generating_device_code for i in range(schedule_index): if isinstance(kernel.schedule[i], CallKernel): is_first_dev_prog = False diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index 1a69d8da2bcf09b4920cd1927f526253dd46818d..36c4b769d02a75df056757b6240103108377f8b0 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -280,13 +280,6 @@ class IDIToCDLL(object): return arg_info - def _append_arg(self, name, dtype, pointer=False): - """Append arg info to current argument list.""" - self._arg_info.append(( - name, - self._dtype_to_ctype(dtype, pointer=pointer) - )) - def _dtype_to_ctype(self, dtype, pointer=False): """Map NumPy dtype to equivalent ctypes type.""" typename = self.registry.dtype_to_ctype(dtype) @@ -385,6 +378,8 @@ class CKernelExecutor(KernelExecutorBase): if self.kernel.options.edit_cl: from pytools import invoke_editor dev_code = invoke_editor(dev_code, "code.c") + # update code from editor + all_code = '\n'.join([dev_code, '', host_code]) c_kernels = [] for dp in codegen_result.device_programs: diff --git a/test/test_c_execution.py b/test/test_c_execution.py index 5cd1e44f6d0a85ef9b6fbe0e74de0945d22e36b6..cd8d1177676fc79521e3bdd9c170e018de083e1a 100644 --- a/test/test_c_execution.py +++ b/test/test_c_execution.py @@ -189,3 +189,28 @@ def test_function_decl_extractor(): target=ExecutableCTarget()) assert np.allclose(knl(b=np.arange(10), v=-1)[1], np.arange(10) - 1) + + +def test_c_execution_with_global_temporaries(): + # ensure that the "host" code of a bare ExecutableCTarget with + # global constant temporaries is None + + from loopy.target.c import ExecutableCTarget + from loopy.kernel.data import temp_var_scope as scopes + n = 10 + + knl = lp.make_kernel('{[i]: 0 <= i < n}', + """ + a[i] = b[i] + """, + [lp.GlobalArg('a', shape=(n,), dtype=np.int32), + lp.TemporaryVariable('b', shape=(n,), + initializer=np.arange(n, dtype=np.int32), + dtype=np.int32, + read_only=True, + scope=scopes.GLOBAL)], + target=ExecutableCTarget()) + + knl = lp.fix_parameters(knl, n=n) + assert ('int b[%d]' % n) not in lp.generate_code_v2(knl).host_code() + assert np.allclose(knl(a=np.zeros(10, dtype=np.int32))[1], np.arange(10))