diff --git a/loopy/auto_test.py b/loopy/auto_test.py index 6a4d559758bc1d7ca52e9dc4da1b7e503e22cc29..db29d51307392d72eca2e381b0b9143964ddf6cf 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -403,7 +403,7 @@ def auto_test_vs_ref( raise LoopyError("ref_knl and test_knl argument lists disagree at index " "%d (1-based)" % (i+1)) - from loopy.compiled import CompiledKernel, get_highlighted_cl_code + from loopy.compiled import CompiledKernel, get_highlighted_code if isinstance(op_count, (int, float)): warn("op_count should be a list", stacklevel=2) @@ -448,7 +448,7 @@ def auto_test_vs_ref( print(75*"-") print("Reference Code:") print(75*"-") - print(get_highlighted_cl_code(ref_compiled.get_code())) + print(get_highlighted_code(ref_compiled.get_code())) print(75*"-") ref_cl_kernel_info = ref_compiled.cl_kernel_info(frozenset()) diff --git a/loopy/check.py b/loopy/check.py index 6a1e3dc33a33b826ad54c42a549b35ad275d9fe5..e8082c0ee4a71549f88dd6b1ca7174b9cdcf153d 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -731,8 +731,8 @@ def check_implemented_domains(kernel, implemented_domains, code=None): print(79*"-") print("CODE:") print(79*"-") - from loopy.compiled import get_highlighted_cl_code - print(get_highlighted_cl_code(code)) + from loopy.compiled import get_highlighted_code + print(get_highlighted_code(code)) print(79*"-") raise LoopyError("sanity check failed--implemented and desired " diff --git a/loopy/compiled.py b/loopy/compiled.py index b3e4fe0589dc9a62d7bdefd7152784560be0ca8a..062ba60cccb74da92ee593ee4c55b7bc3b4d0c63 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -24,8 +24,8 @@ THE SOFTWARE. from loopy.target.pyopencl_execution import ( # noqa - PyOpenCLKernelExecutor, - get_highlighted_cl_code) + PyOpenCLKernelExecutor) +from loopy.execution import get_highlighted_code # {{{ compatibility diff --git a/loopy/execution.py b/loopy/execution.py index 65968e663093bfbba17b60cbd0142633b3114dda..ad33ae3e735727b757978ae3ade4ff4fdacd585a 100644 --- a/loopy/execution.py +++ b/loopy/execution.py @@ -126,7 +126,10 @@ class ExecutionWrapperGeneratorBase(object): self.system_args = system_args[:] def python_dtype_str(self, dtype): - if dtype.isbuiltin: + # TODO: figure out why isbuiltin isn't working in test (requiring second + # line) + if dtype.isbuiltin or \ + np.dtype(str(dtype)).isbuiltin: return "_lpy_np."+dtype.name raise Exception('dtype: {} not recognized'.format(dtype)) @@ -329,7 +332,7 @@ class ExecutionWrapperGeneratorBase(object): # {{{ handle non numpy arguements def handle_non_numpy_arg(self, gen, arg): - raise Exception('Non-numpy args are not allowed for C-execution') + pass # }}} @@ -345,32 +348,45 @@ class ExecutionWrapperGeneratorBase(object): for i in range(num_axes): gen("_lpy_shape_%d = %s" % (i, strify(arg.unvec_shape[i]))) - sym_order = var('_lpy_order') - gen("%s = %s" % (strify(sym_order), arg.order)) - - sym_shape = tuple( - var("_lpy_shape_%d" % i) - for i in range(num_axes)) + itemsize = kernel_arg.dtype.numpy_dtype.itemsize + for i in range(num_axes): + gen("_lpy_strides_%d = %s" % (i, strify( + itemsize*arg.unvec_strides[i]))) if not skip_arg_checks: for i in range(num_axes): - gen("assert _lpy_shape_%d > 0, " - "\"'%s' has negative shape in axis %d\"" + gen("assert _lpy_strides_%d > 0, " + "\"'%s' has negative stride in axis %d\"" % (i, arg.name, i)) + sym_strides = tuple( + var("_lpy_strides_%d" % i) + for i in range(num_axes)) + + sym_shape = tuple( + var("_lpy_shape_%d" % i) + for i in range(num_axes)) + gen("%(name)s = _lpy_np.empty(%(shape)s, " - "%(dtype)s, order=%(order)s)" + "%(dtype)s)" % dict( name=arg.name, shape=strify(sym_shape), - order=strify(sym_order), dtype=self.python_dtype_str( kernel_arg.dtype.numpy_dtype))) + #check strides + gen("%(name)s = _lpy_strided(%(name)s, %(shape)s, " + "%(strides)s)" + % dict( + name=arg.name, + shape=strify(sym_shape), + strides=strify(sym_strides))) + if not skip_arg_checks: for i in range(num_axes): gen("del _lpy_shape_%d" % i) - gen("del %s" % strify(sym_order)) + gen("del _lpy_strides_%d" % i) gen("") # }}} @@ -548,7 +564,8 @@ class ExecutionWrapperGeneratorBase(object): % (arg.name, arg.name, strify(sym_strides))) if not arg.allows_offset: - gen("if %s.offset:" % arg.name) + gen("if hasattr(%s, 'offset') and %s.offset:" % ( + arg.name, arg.name)) with Indentation(gen): gen("raise ValueError(\"Argument '%s' does not " "allow arrays with offsets. Try passing " @@ -584,7 +601,8 @@ class ExecutionWrapperGeneratorBase(object): Add default C-imports to preamble """ gen.add_to_preamble("import numpy as _lpy_np") - gen.add_to_preamble("import loopy.target.c_execution as _lpy_c") + gen.add_to_preamble("from loopy.target.c.compyte.array" + " import as_strided as _lpy_strided") def intialize_system_args(self, gen): """ @@ -690,10 +708,11 @@ class ExecutionWrapperGeneratorBase(object): args = self.generate_arg_setup( gen, kernel, implemented_data_info, options) - self.generate_invocation(codegen_result.host_program.name, args) + self.generate_invocation(gen, codegen_result.host_program.name, args) self.generate_output_handler(gen, options, kernel, implemented_data_info) + import pdb; pdb.set_trace() if options.write_wrapper: output = gen.get() if options.highlight_wrapper: diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 85d751260470cbbd04595c1e64945ce07601f6da..d2e72b84ba2ac6909aea91cf5c6515a0d77e2d6c 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -303,7 +303,7 @@ class CTarget(TargetBase): return self.compiler def get_kernel_executor(self, knl, *args, **kwargs): - from loopy.target.c import CKernelExecutor + from loopy.target.c.c_execution import CKernelExecutor return CKernelExecutor(knl, self.compiler) # }}} diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index 230777529761ed278f9b13ef705e2a47db5840a0..618da0226d029a385993cdbda9e8f91e1507b014 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -27,8 +27,6 @@ import cgen import os import subprocess -from loopy.target.c import CTarget, generate_header -from loopy.codegen import generate_code from loopy.execution import (KernelExecutorBase, _Kernels, _KernelInfo, ExecutionWrapperGeneratorBase) from pytools import memoize_method @@ -104,21 +102,28 @@ class CppCompiler(CCompiler): default_compile_flags = '-g -O3'.split() -class CompiledKernel(object): +class CompiledCKernel(object): """ - A CompiledKernel wraps a loopy kernel, compiling it and loading the + A CompiledCKernel wraps a loopy kernel, compiling it and loading the result as a shared library, and provides access to the kernel as a ctypes function object, wrapped by the __call__ method, which attempts to automatically map argument types. """ - def __init__(self, knl, comp=None): - assert isinstance(knl.target, CTarget) + def __init__(self, knl, target, comp=None): + from loopy.target.c import CTarget + assert isinstance(target, CTarget) + self.target = target self.knl = knl - self.code, _ = generate_code(knl) + # get code and build + self.code = str(knl.ast) self.comp = comp or CCompiler() self.dll = self.comp.build(self.code) - self.func_decl, = generate_header(knl) + # get the function declaration for interface with ctypes + from loopy.target.c import CFunctionDeclExtractor + self.func_decl = CFunctionDeclExtractor() + self.func_decl(knl.ast) + self.func_decl = self.func_decl.decls[0] self._arg_info = [] # TODO knl.args[:].dtype is sufficient self._visit_func_decl(self.func_decl) @@ -165,7 +170,7 @@ class CompiledKernel(object): self._append_arg(pod.name, pod.dtype) def _visit_pointer(self, node): - "Visit pointer argument of kernel." + """Visit pointer argument of kernel.""" pod = node.subdecl # type: cgen.POD self._append_arg(pod.name, pod.dtype, pointer=True) @@ -181,7 +186,7 @@ class CompiledKernel(object): def _dtype_to_ctype(self, dtype, pointer=False): """Map NumPy dtype to equivalent ctypes type.""" - target = self.knl.target # type: CTarget + target = self.target # type: CTarget registry = target.get_dtype_registry().wrapped_registry typename = registry.dtype_to_ctype(dtype) typename = {'unsigned': 'uint'}.get(typename, typename) @@ -236,7 +241,9 @@ class CKernelExecutor(KernelExecutorBase): c_kernels = _Kernels() for dp in codegen_result.device_programs: - setattr(c_kernels, dp.name, CompiledKernel(dp, self.compiler)) + setattr(c_kernels, dp.name, CompiledCKernel(dp, + self.kernel.target, + self.compiler)) return _KernelInfo( kernel=kernel, @@ -246,7 +253,7 @@ class CKernelExecutor(KernelExecutorBase): # }}} - def __call__(self, **kwargs): + def __call__(self, *args, **kwargs): """ :returns: ``(None, output)`` the output is a tuple of output arguments (arguments that are written as part of the kernel). The order is given @@ -262,4 +269,4 @@ class CKernelExecutor(KernelExecutorBase): kernel_info = self.kernel_info(self.arg_to_dtype_set(kwargs)) return kernel_info.invoker( - kernel_info.c_kernels, **kwargs) + kernel_info.c_kernels, *args, **kwargs) diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index a2574bf8aa9ece05a42764cd2cf891ea237cbd5a..021bc786f0d15f8e2bb9e3a0bf7aca5a4bfc7e16 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -24,7 +24,7 @@ THE SOFTWARE. from six.moves import range, zip -from pytools import ImmutableRecord, memoize_method +from pytools import memoize_method from pytools.py_codegen import Indentation from loopy.execution import (KernelExecutorBase, ExecutionWrapperGeneratorBase, _KernelInfo, _Kernels) @@ -309,7 +309,7 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): kwargs = self.packing_controller.unpack(kwargs) - kernel_info = self.cl_kernel_info(self.arg_to_dtype_set(kwargs)) + kernel_info = self.kernel_info(self.arg_to_dtype_set(kwargs)) return kernel_info.invoker( kernel_info.cl_kernels, queue, allocator, wait_for,