diff --git a/doc/_templates/layout.html b/doc/_templates/layout.html index 0fed238faf1fc27457ad8672017577f055e7edf0..400e7ec1d49677537aff6bf744e2803ef5c01e9e 100644 --- a/doc/_templates/layout.html +++ b/doc/_templates/layout.html @@ -1,2 +1,2 @@ {% extends "!layout.html" %} -{% set css_files = css_files + ['_static/akdoc.css']%} +{% set bootswatch_css_custom = ['_static/akdoc.css']%} diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 141a048f57f4743d57a799eb9aac0a2e41738879..099af5874ee422ffab1dcbf896b76b75d91fa3f8 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -117,9 +117,9 @@ class Program(object): if isinstance(source, unicode) and sys.version_info < (3,): from warnings import warn warn("Received OpenCL source code in Unicode, " - "should be ASCII string. Attempting conversion.", - stacklevel=2) - source = str(source) + "should be ASCII string. Attempting conversion.", + stacklevel=2) + source = source.encode() self._context = context self._source = source @@ -245,7 +245,7 @@ class Program(object): def compile(self, options=[], devices=None, headers=[]): options = " ".join(options) - return self._prg().compile(options, devices, headers) + return self._prg.compile(options, devices, headers) def __eq__(self, other): return self._get_prg() == other._get_prg() diff --git a/pyopencl/array.py b/pyopencl/array.py index 88770736ed7271a16edce7bd416810afe76dc8ad..1fde923fbb0c2129c12749db1a2270c592486155 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -38,6 +38,7 @@ from pyopencl.compyte.array import ( c_contiguous_strides as _c_contiguous_strides, ArrayFlags as _ArrayFlags, get_common_dtype as _get_common_dtype_base) +from pyopencl.compyte.dtypes import DTypeDict as _DTypeDict from pyopencl.characterize import has_double_support @@ -45,6 +46,17 @@ def _get_common_dtype(obj1, obj2, queue): return _get_common_dtype_base(obj1, obj2, has_double_support(queue.device)) +# Work around PyPy not currently supporting the object dtype. +# (Yes, it doesn't even support checking!) +# (as of May 27, 2014 on PyPy 2.3) +try: + np.dtype(object) + + def _dtype_is_object(t): + return t == object +except: + def _dtype_is_object(t): + return False # {{{ vector types @@ -58,7 +70,7 @@ def _create_vector_types(): from pyopencl.tools import get_or_register_dtype vec.types = {} - vec.type_to_scalar_and_count = {} + vec.type_to_scalar_and_count = _DTypeDict() counts = [2, 3, 4, 8, 16] @@ -90,10 +102,18 @@ def _create_vector_types(): if len(titles) < len(names): titles.extend((len(names)-len(titles))*[None]) - dtype = np.dtype(dict( - names=names, - formats=[base_type]*padded_count, - titles=titles)) + try: + dtype = np.dtype(dict( + names=names, + formats=[base_type]*padded_count, + titles=titles)) + except NotImplementedError: + try: + dtype = np.dtype([((n, title), base_type) + for (n, title) in zip(names, titles)]) + except TypeError: + dtype = np.dtype([(n, base_type) for (n, title) + in zip(names, titles)]) get_or_register_dtype(name, dtype) @@ -498,7 +518,7 @@ class Array(object): # }}} - if dtype == object: + if _dtype_is_object(dtype): raise TypeError("object arrays on the compute device are not allowed") self.queue = queue @@ -1470,7 +1490,7 @@ def to_device(queue, ary, allocator=None, async=False): *context* argument was deprecated. """ - if ary.dtype == object: + if _dtype_is_object(ary.dtype): raise RuntimeError("to_device does not work on object arrays.") result = Array(queue, ary.shape, ary.dtype, diff --git a/pyopencl/clmath.py b/pyopencl/clmath.py index d3c6d0f7272e7914c58c8bceffae0d507a3c8050..1b41ce67d0ce9b7543c6f76944edf29c1a082ff0 100644 --- a/pyopencl/clmath.py +++ b/pyopencl/clmath.py @@ -23,6 +23,7 @@ THE SOFTWARE. import pyopencl.array as cl_array import pyopencl.elementwise as elementwise from pyopencl.array import _get_common_dtype +import numpy as np def _make_unary_array_func(name): @@ -56,14 +57,14 @@ asinpi = _make_unary_array_func("asinpi") @cl_array.elwise_kernel_runner def _atan2(result, arg1, arg2): - return elementwise.get_binary_func_kernel(result.context, "atan2", - arg1.dtype, arg2.dtype, result.dtype) + return elementwise.get_float_binary_func_kernel( + result.context, "atan2", arg1.dtype, arg2.dtype, result.dtype) @cl_array.elwise_kernel_runner def _atan2pi(result, arg1, arg2): - return elementwise.get_binary_func_kernel(result.context, "atan2pi", - arg1.dtype, arg2.dtype, result.dtype) + return elementwise.get_float_binary_func_kernel( + result.context, "atan2pi", arg1.dtype, arg2.dtype, result.dtype) atan = _make_unary_array_func("atan") @@ -118,13 +119,15 @@ floor = _make_unary_array_func("floor") @cl_array.elwise_kernel_runner def _fmod(result, arg, mod): - return elementwise.get_fmod_kernel(result.context) + return elementwise.get_fmod_kernel(result.context, result.dtype, + arg.dtype, mod.dtype) def fmod(arg, mod, queue=None): """Return the floating point remainder of the division `arg/mod`, for each element in `arg` and `mod`.""" - result = arg._new_like_me(queue=queue) + queue = (queue or arg.queue) or mod.queue + result = arg._new_like_me(_get_common_dtype(arg, mod, queue)) _fmod(result, arg, mod, queue=queue) return result @@ -133,7 +136,8 @@ def fmod(arg, mod, queue=None): @cl_array.elwise_kernel_runner def _frexp(sig, expt, arg): - return elementwise.get_frexp_kernel(sig.context) + return elementwise.get_frexp_kernel(sig.context, sig.dtype, + expt.dtype, arg.dtype) def frexp(arg, queue=None): @@ -153,7 +157,8 @@ ilogb = _make_unary_array_func("ilogb") @cl_array.elwise_kernel_runner def _ldexp(result, sig, exp): - return elementwise.get_ldexp_kernel(result.context) + return elementwise.get_ldexp_kernel(result.context, result.dtype, + sig.dtype, exp.dtype) def ldexp(significand, exponent, queue=None): @@ -181,7 +186,8 @@ logb = _make_unary_array_func("logb") @cl_array.elwise_kernel_runner def _modf(intpart, fracpart, arg): - return elementwise.get_modf_kernel(intpart.context) + return elementwise.get_modf_kernel(intpart.context, intpart.dtype, + fracpart.dtype, arg.dtype) def modf(arg, queue=None): @@ -223,13 +229,15 @@ trunc = _make_unary_array_func("trunc") # TODO: table 6.12, clamp et al @cl_array.elwise_kernel_runner -def _bessel_jn(result, sig, exp): - return elementwise.get_bessel_kernel(result.context, "j") +def _bessel_jn(result, n, x): + return elementwise.get_bessel_kernel(result.context, "j", result.dtype, + np.dtype(type(n)), x.dtype) @cl_array.elwise_kernel_runner -def _bessel_yn(result, sig, exp): - return elementwise.get_bessel_kernel(result.context, "y") +def _bessel_yn(result, n, x): + return elementwise.get_bessel_kernel(result.context, "y", result.dtype, + np.dtype(type(n)), x.dtype) def bessel_jn(n, x, queue=None): diff --git a/pyopencl/compyte b/pyopencl/compyte index c5e80622ee2dfd129d56886098a5ad532c54cf09..2293b43ecfadfd4ea2adc9266c8ec18f2ae0ce11 160000 --- a/pyopencl/compyte +++ b/pyopencl/compyte @@ -1 +1 @@ -Subproject commit c5e80622ee2dfd129d56886098a5ad532c54cf09 +Subproject commit 2293b43ecfadfd4ea2adc9266c8ec18f2ae0ce11 diff --git a/pyopencl/elementwise.py b/pyopencl/elementwise.py index 5878b89cf9b557c2c65bb2b9e061b5cd29813f74..03c37024489a57b70860f71f8d0a99fd1edf7159 100644 --- a/pyopencl/elementwise.py +++ b/pyopencl/elementwise.py @@ -812,75 +812,113 @@ def get_array_comparison_kernel(context, operator, dtype_a, dtype_b): @context_dependent_memoize -def get_fmod_kernel(context): - return get_elwise_kernel(context, - "float *z, float *arg, float *mod", - "z[i] = fmod(arg[i], mod[i])", - name="fmod_kernel") +def get_unary_func_kernel(context, func_name, in_dtype, out_dtype=None): + if out_dtype is None: + out_dtype = in_dtype + + return get_elwise_kernel(context, [ + VectorArg(out_dtype, "z", with_offset=True), + VectorArg(in_dtype, "y", with_offset=True), + ], + "z[i] = %s(y[i])" % func_name, + name="%s_kernel" % func_name) @context_dependent_memoize -def get_modf_kernel(context): - return get_elwise_kernel(context, - "float *intpart ,float *fracpart, float *x", - "fracpart[i] = modf(x[i], &intpart[i])", - name="modf_kernel") +def get_binary_func_kernel(context, func_name, x_dtype, y_dtype, out_dtype, + preamble="", name=None): + return get_elwise_kernel(context, [ + VectorArg(out_dtype, "z", with_offset=True), + VectorArg(x_dtype, "x", with_offset=True), + VectorArg(y_dtype, "y", with_offset=True), + ], + "z[i] = %s(x[i], y[i])" % func_name, + name="%s_kernel" % func_name if name is None else name, + preamble=preamble) @context_dependent_memoize -def get_frexp_kernel(context): - return get_elwise_kernel(context, - "float *significand, float *exponent, float *x", - """ - int expt = 0; - significand[i] = frexp(x[i], &expt); - exponent[i] = expt; - """, - name="frexp_kernel") +def get_float_binary_func_kernel(context, func_name, x_dtype, y_dtype, + out_dtype, preamble="", name=None): + if (np.array(0, x_dtype) * np.array(0, y_dtype)).itemsize > 4: + arg_type = 'double' + preamble = """ + #pragma OPENCL EXTENSION cl_khr_fp64: enable + #define PYOPENCL_DEFINE_CDOUBLE + """ + preamble + else: + arg_type = 'float' + return get_elwise_kernel(context, [ + VectorArg(out_dtype, "z", with_offset=True), + VectorArg(x_dtype, "x", with_offset=True), + VectorArg(y_dtype, "y", with_offset=True), + ], + "z[i] = %s((%s)x[i], (%s)y[i])" % (func_name, arg_type, arg_type), + name="%s_kernel" % func_name if name is None else name, + preamble=preamble) @context_dependent_memoize -def get_ldexp_kernel(context): - return get_elwise_kernel(context, - "float *z, float *sig, float *expt", - "z[i] = ldexp(sig[i], (int) expt[i])", - name="ldexp_kernel") +def get_fmod_kernel(context, out_dtype=np.float32, arg_dtype=np.float32, + mod_dtype=np.float32): + return get_float_binary_func_kernel(context, 'fmod', arg_dtype, + mod_dtype, out_dtype) @context_dependent_memoize -def get_bessel_kernel(context, which_func): - return get_elwise_kernel(context, - "double *z, int ord_n, double *x", - "z[i] = bessel_%sn(ord_n, x[i])" % which_func, - name="bessel_%sn_kernel" % which_func, - preamble=""" - #include - """ % which_func) +def get_modf_kernel(context, int_dtype=np.float32, + frac_dtype=np.float32, x_dtype=np.float32): + return get_elwise_kernel(context, [ + VectorArg(int_dtype, "intpart", with_offset=True), + VectorArg(frac_dtype, "fracpart", with_offset=True), + VectorArg(x_dtype, "x", with_offset=True), + ], + """ + fracpart[i] = modf(x[i], &intpart[i]) + """, + name="modf_kernel") @context_dependent_memoize -def get_unary_func_kernel(context, func_name, in_dtype, out_dtype=None): - if out_dtype is None: - out_dtype = in_dtype +def get_frexp_kernel(context, sign_dtype=np.float32, exp_dtype=np.float32, + x_dtype=np.float32): + return get_elwise_kernel(context, [ + VectorArg(sign_dtype, "significand", with_offset=True), + VectorArg(exp_dtype, "exponent", with_offset=True), + VectorArg(x_dtype, "x", with_offset=True), + ], + """ + int expt = 0; + significand[i] = frexp(x[i], &expt); + exponent[i] = expt; + """, + name="frexp_kernel") - return get_elwise_kernel(context, - "%(tp_out)s *z, %(tp_in)s *y" % { - "tp_in": dtype_to_ctype(in_dtype), - "tp_out": dtype_to_ctype(out_dtype), - }, - "z[i] = %s(y[i])" % func_name, - name="%s_kernel" % func_name) + +@context_dependent_memoize +def get_ldexp_kernel(context, out_dtype=np.float32, sig_dtype=np.float32, + expt_dtype=np.float32): + return get_binary_func_kernel( + context, '_PYOCL_LDEXP', sig_dtype, expt_dtype, out_dtype, + preamble="#define _PYOCL_LDEXP(x, y) ldexp(x, (int)(y))", + name="ldexp_kernel") @context_dependent_memoize -def get_binary_func_kernel(context, func_name, x_dtype, y_dtype, out_dtype): +def get_bessel_kernel(context, which_func, out_dtype=np.float64, + order_dtype=np.int32, x_dtype=np.float64): return get_elwise_kernel(context, [ VectorArg(out_dtype, "z", with_offset=True), + ScalarArg(order_dtype, "ord_n"), VectorArg(x_dtype, "x", with_offset=True), - VectorArg(y_dtype, "y", with_offset=True), ], - "z[i] = %s(x[i], y[i])" % func_name, - name="%s_kernel" % func_name) + "z[i] = bessel_%sn(ord_n, x[i])" % which_func, + name="bessel_%sn_kernel" % which_func, + preamble=""" + #pragma OPENCL EXTENSION cl_khr_fp64: enable + #define PYOPENCL_DEFINE_CDOUBLE + #include + """ % which_func) @context_dependent_memoize diff --git a/pyopencl/tools.py b/pyopencl/tools.py index b270bd09820d6757deceaa9409210fd0615a1f7d..b85708d8c15809f0a0a86a74d0be82b3997ab5f2 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -38,7 +38,9 @@ import re from pyopencl.compyte.dtypes import ( # noqa get_or_register_dtype, TypeNameNotKnown, - register_dtype, dtype_to_ctype) + register_dtype, dtype_to_ctype, + dtype_hashable as _dtype_hashable, + dtype_to_key as _dtype_to_key) def _register_types(): @@ -492,8 +494,15 @@ class _CDeclList: return result +if _dtype_hashable: + _memoize_match_dtype_to_c_struct = memoize +else: + import json as _json + _memoize_match_dtype_to_c_struct = memoize( + key=lambda device, name, dtype, context=None: + (device, name, _dtype_to_key(dtype), context)) -@memoize +@_memoize_match_dtype_to_c_struct def match_dtype_to_c_struct(device, name, dtype, context=None): """Return a tuple `(dtype, c_decl)` such that the C struct declaration in `c_decl` and the structure :class:`numpy.dtype` instance `dtype` @@ -608,28 +617,47 @@ def match_dtype_to_c_struct(device, name, dtype, context=None): del queue del context - dtype_arg_dict = dict( - names=[field_name for field_name, (field_dtype, offset) in fields], - formats=[field_dtype - for field_name, (field_dtype, offset) in fields], - offsets=[int(x) for x in offsets], - itemsize=int(size_and_offsets[0]), - ) - dtype = np.dtype(dtype_arg_dict) - - if dtype.itemsize != size_and_offsets[0]: - # "Old" versions of numpy (1.6.x?) silently ignore "itemsize". Boo. - dtype_arg_dict["names"].append("_pycl_size_fixer") - dtype_arg_dict["formats"].append(np.uint8) - dtype_arg_dict["offsets"].append(int(size_and_offsets[0])-1) + try: + dtype_arg_dict = { + 'names': [field_name + for field_name, (field_dtype, offset) in fields], + 'formats': [field_dtype + for field_name, (field_dtype, offset) in fields], + 'offsets': [int(x) for x in offsets], + 'itemsize': int(size_and_offsets[0]), + } dtype = np.dtype(dtype_arg_dict) + if dtype.itemsize != size_and_offsets[0]: + # "Old" versions of numpy (1.6.x?) silently ignore "itemsize". Boo. + dtype_arg_dict["names"].append("_pycl_size_fixer") + dtype_arg_dict["formats"].append(np.uint8) + dtype_arg_dict["offsets"].append(int(size_and_offsets[0])-1) + dtype = np.dtype(dtype_arg_dict) + except NotImplementedError: + def calc_field_type(): + total_size = 0 + padding_count = 0 + for offset, (field_name, (field_dtype, _)) in zip(offsets, fields): + if offset > total_size: + padding_count += 1 + yield ('__pycl_padding%d' % padding_count, + 'V%d' % offset - total_size) + yield field_name, field_dtype + total_size = field_dtype.itemsize + offset + dtype = np.dtype(list(calc_field_type())) assert dtype.itemsize == size_and_offsets[0] return dtype, c_decl +if _dtype_hashable: + _memoize_dtype_to_c_struct = memoize +else: + import json as _json + _memoize_dtype_to_c_struct = memoize( + key=lambda device, dtype: (device, _dtype_to_key(dtype))) -@memoize +@_memoize_dtype_to_c_struct def dtype_to_c_struct(device, dtype): matched_dtype, c_decl = match_dtype_to_c_struct( device, dtype_to_ctype(dtype), dtype) diff --git a/test/test_array.py b/test/test_array.py index 49c9d44bd3d7d317e720dcf9e712e78d83c0cc23..bfe227849e4f3f2d7ccaaf0da1e97c4f128d986b 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -445,6 +445,13 @@ def test_random(ctx_factory): # {{{ misc def test_numpy_integer_shape(ctx_factory): + try: + list(np.int32(17)) + except: + pass + else: + from pytest import skip + skip("numpy implementation does not handle scalar correctly.") context = ctx_factory() queue = cl.CommandQueue(context) diff --git a/test/test_clmath.py b/test/test_clmath.py index 5778860b260eb25f488d00507fd11adf9bbc14e0..6ebbe46bcb1f49ed70e1797c003013fe43d79147 100644 --- a/test/test_clmath.py +++ b/test/test_clmath.py @@ -88,7 +88,8 @@ def make_unary_function_test(name, limits=(0, 1), threshold=0, use_complex=False args = cl_array.arange(queue, a, b, (b-a)/s, dtype=dtype) if dtype.kind == "c": - args = args+dtype.type(1j)*args + # args = args + dtype.type(1j) * args + args = args + args * dtype.type(1j) gpu_results = gpu_func(args).get() cpu_results = cpu_func(args.get()) @@ -126,6 +127,40 @@ if have_cl(): test_tanh = make_unary_function_test("tanh", (-3, 3), 2e-6, use_complex=True) +def test_atan2(ctx_factory): + context = ctx_factory() + queue = cl.CommandQueue(context) + + for s in sizes: + a = (cl_array.arange(queue, s, dtype=np.float32) - s / 2) / 100 + a2 = (s / 2 - 1 - cl_array.arange(queue, s, dtype=np.float32)) / 100 + b = clmath.atan2(a, a2) + + a = a.get() + a2 = a2.get() + b = b.get() + + for i in range(s): + assert abs(math.atan2(a[i], a2[i]) - b[i]) < 1e-6 + + +def test_atan2pi(ctx_factory): + context = ctx_factory() + queue = cl.CommandQueue(context) + + for s in sizes: + a = (cl_array.arange(queue, s, dtype=np.float32) - s / 2) / 100 + a2 = (s / 2 - 1 - cl_array.arange(queue, s, dtype=np.float32)) / 100 + b = clmath.atan2pi(a, a2) + + a = a.get() + a2 = a2.get() + b = b.get() + + for i in range(s): + assert abs(math.atan2(a[i], a2[i]) / math.pi - b[i]) < 1e-6 + + def test_fmod(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) diff --git a/test/test_wrapper.py b/test/test_wrapper.py index 9dd9de445d800f0b30ece8b844444a750e13c17d..4358422bed4708b0f30a8a30aaa0e1561b0acee2 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -48,6 +48,8 @@ def test_get_info(ctx_factory): pocl_quirks = [ (cl.Buffer, cl.mem_info.OFFSET), + (cl.Program, cl.program_info.BINARIES), + (cl.Program, cl.program_info.BINARY_SIZES), ] if ctx._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2): pocl_quirks.extend([ @@ -69,6 +71,9 @@ def test_get_info(ctx_factory): (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.9-pre"), pocl_quirks), + (("The pocl project", "Portable Computing Language", + "OpenCL 1.2 pocl 0.9"), + pocl_quirks), (("Apple", "Apple", "OpenCL 1.2 (Apr 25 2013 18:32:06)"), [ @@ -584,6 +589,13 @@ def test_context_dep_memoize(ctx_factory): def test_can_build_binary(ctx_factory): ctx = ctx_factory() device, = ctx.devices + platform = device.platform + + if (platform.vendor == "The pocl project" and + platform.name == "Portable Computing Language"): + # Segfault on pocl 0.9 + from pytest import skip + skip("pocl doesn't like getting PROGRAM_BINARIES") program = cl.Program(ctx, """ __kernel void simple(__global float *in, __global float *out)