diff --git a/doc/_templates/layout.html b/doc/_templates/layout.html
index 0fed238faf1fc27457ad8672017577f055e7edf0..400e7ec1d49677537aff6bf744e2803ef5c01e9e 100644
--- a/doc/_templates/layout.html
+++ b/doc/_templates/layout.html
@@ -1,2 +1,2 @@
{% extends "!layout.html" %}
-{% set css_files = css_files + ['_static/akdoc.css']%}
+{% set bootswatch_css_custom = ['_static/akdoc.css']%}
diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py
index d7e346289dc7e3f7c1f7c3cc07ca397b01380f89..66d82b9be47219c24b9bdf7e8c6a0f2f077dcec8 100644
--- a/pyopencl/__init__.py
+++ b/pyopencl/__init__.py
@@ -117,9 +117,9 @@ class Program(object):
if isinstance(source, unicode) and sys.version_info < (3,):
from warnings import warn
warn("Received OpenCL source code in Unicode, "
- "should be ASCII string. Attempting conversion.",
- stacklevel=2)
- source = str(source)
+ "should be ASCII string. Attempting conversion.",
+ stacklevel=2)
+ source = source.encode()
self._context = context
self._source = source
@@ -242,7 +242,7 @@ class Program(object):
def compile(self, options=[], devices=None, headers=[]):
options = " ".join(options)
- return self._prg().compile(options, devices, headers)
+ return self._prg.compile(options, devices, headers)
def __eq__(self, other):
return self._get_prg() == other._get_prg()
diff --git a/pyopencl/array.py b/pyopencl/array.py
index 7622f370539476d895e5d7a215c8e088272248f7..78190b86a46cfe3234bc249704d18a5b6629d0a2 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -38,6 +38,7 @@ from pyopencl.compyte.array import (
c_contiguous_strides as _c_contiguous_strides,
ArrayFlags as _ArrayFlags,
get_common_dtype as _get_common_dtype_base)
+from pyopencl.compyte.dtypes import DTypeDict as _DTypeDict
from pyopencl.characterize import has_double_support
@@ -45,6 +46,17 @@ def _get_common_dtype(obj1, obj2, queue):
return _get_common_dtype_base(obj1, obj2,
has_double_support(queue.device))
+# Work around PyPy not currently supporting the object dtype.
+# (Yes, it doesn't even support checking!)
+# (as of May 27, 2014 on PyPy 2.3)
+try:
+ np.dtype(object)
+
+ def _dtype_is_object(t):
+ return t == object
+except:
+ def _dtype_is_object(t):
+ return False
# {{{ vector types
@@ -58,7 +70,7 @@ def _create_vector_types():
from pyopencl.tools import get_or_register_dtype
vec.types = {}
- vec.type_to_scalar_and_count = {}
+ vec.type_to_scalar_and_count = _DTypeDict()
counts = [2, 3, 4, 8, 16]
@@ -90,10 +102,18 @@ def _create_vector_types():
if len(titles) < len(names):
titles.extend((len(names)-len(titles))*[None])
- dtype = np.dtype(dict(
- names=names,
- formats=[base_type]*padded_count,
- titles=titles))
+ try:
+ dtype = np.dtype(dict(
+ names=names,
+ formats=[base_type]*padded_count,
+ titles=titles))
+ except NotImplementedError:
+ try:
+ dtype = np.dtype([((n, title), base_type)
+ for (n, title) in zip(names, titles)])
+ except TypeError:
+ dtype = np.dtype([(n, base_type) for (n, title)
+ in zip(names, titles)])
get_or_register_dtype(name, dtype)
@@ -498,7 +518,7 @@ class Array(object):
# }}}
- if dtype == object:
+ if _dtype_is_object(dtype):
raise TypeError("object arrays on the compute device are not allowed")
self.queue = queue
@@ -1470,7 +1490,7 @@ def to_device(queue, ary, allocator=None, async=False):
*context* argument was deprecated.
"""
- if ary.dtype == object:
+ if _dtype_is_object(ary.dtype):
raise RuntimeError("to_device does not work on object arrays.")
result = Array(queue, ary.shape, ary.dtype,
diff --git a/pyopencl/clmath.py b/pyopencl/clmath.py
index d3c6d0f7272e7914c58c8bceffae0d507a3c8050..1b41ce67d0ce9b7543c6f76944edf29c1a082ff0 100644
--- a/pyopencl/clmath.py
+++ b/pyopencl/clmath.py
@@ -23,6 +23,7 @@ THE SOFTWARE.
import pyopencl.array as cl_array
import pyopencl.elementwise as elementwise
from pyopencl.array import _get_common_dtype
+import numpy as np
def _make_unary_array_func(name):
@@ -56,14 +57,14 @@ asinpi = _make_unary_array_func("asinpi")
@cl_array.elwise_kernel_runner
def _atan2(result, arg1, arg2):
- return elementwise.get_binary_func_kernel(result.context, "atan2",
- arg1.dtype, arg2.dtype, result.dtype)
+ return elementwise.get_float_binary_func_kernel(
+ result.context, "atan2", arg1.dtype, arg2.dtype, result.dtype)
@cl_array.elwise_kernel_runner
def _atan2pi(result, arg1, arg2):
- return elementwise.get_binary_func_kernel(result.context, "atan2pi",
- arg1.dtype, arg2.dtype, result.dtype)
+ return elementwise.get_float_binary_func_kernel(
+ result.context, "atan2pi", arg1.dtype, arg2.dtype, result.dtype)
atan = _make_unary_array_func("atan")
@@ -118,13 +119,15 @@ floor = _make_unary_array_func("floor")
@cl_array.elwise_kernel_runner
def _fmod(result, arg, mod):
- return elementwise.get_fmod_kernel(result.context)
+ return elementwise.get_fmod_kernel(result.context, result.dtype,
+ arg.dtype, mod.dtype)
def fmod(arg, mod, queue=None):
"""Return the floating point remainder of the division `arg/mod`,
for each element in `arg` and `mod`."""
- result = arg._new_like_me(queue=queue)
+ queue = (queue or arg.queue) or mod.queue
+ result = arg._new_like_me(_get_common_dtype(arg, mod, queue))
_fmod(result, arg, mod, queue=queue)
return result
@@ -133,7 +136,8 @@ def fmod(arg, mod, queue=None):
@cl_array.elwise_kernel_runner
def _frexp(sig, expt, arg):
- return elementwise.get_frexp_kernel(sig.context)
+ return elementwise.get_frexp_kernel(sig.context, sig.dtype,
+ expt.dtype, arg.dtype)
def frexp(arg, queue=None):
@@ -153,7 +157,8 @@ ilogb = _make_unary_array_func("ilogb")
@cl_array.elwise_kernel_runner
def _ldexp(result, sig, exp):
- return elementwise.get_ldexp_kernel(result.context)
+ return elementwise.get_ldexp_kernel(result.context, result.dtype,
+ sig.dtype, exp.dtype)
def ldexp(significand, exponent, queue=None):
@@ -181,7 +186,8 @@ logb = _make_unary_array_func("logb")
@cl_array.elwise_kernel_runner
def _modf(intpart, fracpart, arg):
- return elementwise.get_modf_kernel(intpart.context)
+ return elementwise.get_modf_kernel(intpart.context, intpart.dtype,
+ fracpart.dtype, arg.dtype)
def modf(arg, queue=None):
@@ -223,13 +229,15 @@ trunc = _make_unary_array_func("trunc")
# TODO: table 6.12, clamp et al
@cl_array.elwise_kernel_runner
-def _bessel_jn(result, sig, exp):
- return elementwise.get_bessel_kernel(result.context, "j")
+def _bessel_jn(result, n, x):
+ return elementwise.get_bessel_kernel(result.context, "j", result.dtype,
+ np.dtype(type(n)), x.dtype)
@cl_array.elwise_kernel_runner
-def _bessel_yn(result, sig, exp):
- return elementwise.get_bessel_kernel(result.context, "y")
+def _bessel_yn(result, n, x):
+ return elementwise.get_bessel_kernel(result.context, "y", result.dtype,
+ np.dtype(type(n)), x.dtype)
def bessel_jn(n, x, queue=None):
diff --git a/pyopencl/elementwise.py b/pyopencl/elementwise.py
index 36d127fdac65ff526608d973c8a686ab00ee3da3..b3e420c71ba943c4f7354c7f8468f1498c8da9a3 100644
--- a/pyopencl/elementwise.py
+++ b/pyopencl/elementwise.py
@@ -812,75 +812,113 @@ def get_array_comparison_kernel(context, operator, dtype_a, dtype_b):
@context_dependent_memoize
-def get_fmod_kernel(context):
- return get_elwise_kernel(context,
- "float *z, float *arg, float *mod",
- "z[i] = fmod(arg[i], mod[i])",
- name="fmod_kernel")
+def get_unary_func_kernel(context, func_name, in_dtype, out_dtype=None):
+ if out_dtype is None:
+ out_dtype = in_dtype
+
+ return get_elwise_kernel(context, [
+ VectorArg(out_dtype, "z", with_offset=True),
+ VectorArg(in_dtype, "y", with_offset=True),
+ ],
+ "z[i] = %s(y[i])" % func_name,
+ name="%s_kernel" % func_name)
@context_dependent_memoize
-def get_modf_kernel(context):
- return get_elwise_kernel(context,
- "float *intpart ,float *fracpart, float *x",
- "fracpart[i] = modf(x[i], &intpart[i])",
- name="modf_kernel")
+def get_binary_func_kernel(context, func_name, x_dtype, y_dtype, out_dtype,
+ preamble="", name=None):
+ return get_elwise_kernel(context, [
+ VectorArg(out_dtype, "z", with_offset=True),
+ VectorArg(x_dtype, "x", with_offset=True),
+ VectorArg(y_dtype, "y", with_offset=True),
+ ],
+ "z[i] = %s(x[i], y[i])" % func_name,
+ name="%s_kernel" % func_name if name is None else name,
+ preamble=preamble)
@context_dependent_memoize
-def get_frexp_kernel(context):
- return get_elwise_kernel(context,
- "float *significand, float *exponent, float *x",
- """
- int expt = 0;
- significand[i] = frexp(x[i], &expt);
- exponent[i] = expt;
- """,
- name="frexp_kernel")
+def get_float_binary_func_kernel(context, func_name, x_dtype, y_dtype,
+ out_dtype, preamble="", name=None):
+ if (np.array(0, x_dtype) * np.array(0, y_dtype)).itemsize > 4:
+ arg_type = 'double'
+ preamble = """
+ #pragma OPENCL EXTENSION cl_khr_fp64: enable
+ #define PYOPENCL_DEFINE_CDOUBLE
+ """ + preamble
+ else:
+ arg_type = 'float'
+ return get_elwise_kernel(context, [
+ VectorArg(out_dtype, "z", with_offset=True),
+ VectorArg(x_dtype, "x", with_offset=True),
+ VectorArg(y_dtype, "y", with_offset=True),
+ ],
+ "z[i] = %s((%s)x[i], (%s)y[i])" % (func_name, arg_type, arg_type),
+ name="%s_kernel" % func_name if name is None else name,
+ preamble=preamble)
@context_dependent_memoize
-def get_ldexp_kernel(context):
- return get_elwise_kernel(context,
- "float *z, float *sig, float *expt",
- "z[i] = ldexp(sig[i], (int) expt[i])",
- name="ldexp_kernel")
+def get_fmod_kernel(context, out_dtype=np.float32, arg_dtype=np.float32,
+ mod_dtype=np.float32):
+ return get_float_binary_func_kernel(context, 'fmod', arg_dtype,
+ mod_dtype, out_dtype)
@context_dependent_memoize
-def get_bessel_kernel(context, which_func):
- return get_elwise_kernel(context,
- "double *z, int ord_n, double *x",
- "z[i] = bessel_%sn(ord_n, x[i])" % which_func,
- name="bessel_%sn_kernel" % which_func,
- preamble="""
- #include
- """ % which_func)
+def get_modf_kernel(context, int_dtype=np.float32,
+ frac_dtype=np.float32, x_dtype=np.float32):
+ return get_elwise_kernel(context, [
+ VectorArg(int_dtype, "intpart", with_offset=True),
+ VectorArg(frac_dtype, "fracpart", with_offset=True),
+ VectorArg(x_dtype, "x", with_offset=True),
+ ],
+ """
+ fracpart[i] = modf(x[i], &intpart[i])
+ """,
+ name="modf_kernel")
@context_dependent_memoize
-def get_unary_func_kernel(context, func_name, in_dtype, out_dtype=None):
- if out_dtype is None:
- out_dtype = in_dtype
+def get_frexp_kernel(context, sign_dtype=np.float32, exp_dtype=np.float32,
+ x_dtype=np.float32):
+ return get_elwise_kernel(context, [
+ VectorArg(sign_dtype, "significand", with_offset=True),
+ VectorArg(exp_dtype, "exponent", with_offset=True),
+ VectorArg(x_dtype, "x", with_offset=True),
+ ],
+ """
+ int expt = 0;
+ significand[i] = frexp(x[i], &expt);
+ exponent[i] = expt;
+ """,
+ name="frexp_kernel")
- return get_elwise_kernel(context,
- "%(tp_out)s *z, %(tp_in)s *y" % {
- "tp_in": dtype_to_ctype(in_dtype),
- "tp_out": dtype_to_ctype(out_dtype),
- },
- "z[i] = %s(y[i])" % func_name,
- name="%s_kernel" % func_name)
+
+@context_dependent_memoize
+def get_ldexp_kernel(context, out_dtype=np.float32, sig_dtype=np.float32,
+ expt_dtype=np.float32):
+ return get_binary_func_kernel(
+ context, '_PYOCL_LDEXP', sig_dtype, expt_dtype, out_dtype,
+ preamble="#define _PYOCL_LDEXP(x, y) ldexp(x, (int)(y))",
+ name="ldexp_kernel")
@context_dependent_memoize
-def get_binary_func_kernel(context, func_name, x_dtype, y_dtype, out_dtype):
+def get_bessel_kernel(context, which_func, out_dtype=np.float64,
+ order_dtype=np.int32, x_dtype=np.float64):
return get_elwise_kernel(context, [
VectorArg(out_dtype, "z", with_offset=True),
+ ScalarArg(order_dtype, "ord_n"),
VectorArg(x_dtype, "x", with_offset=True),
- VectorArg(y_dtype, "y", with_offset=True),
],
- "z[i] = %s(x[i], y[i])" % func_name,
- name="%s_kernel" % func_name)
+ "z[i] = bessel_%sn(ord_n, x[i])" % which_func,
+ name="bessel_%sn_kernel" % which_func,
+ preamble="""
+ #pragma OPENCL EXTENSION cl_khr_fp64: enable
+ #define PYOPENCL_DEFINE_CDOUBLE
+ #include
+ """ % which_func)
@context_dependent_memoize
diff --git a/pyopencl/tools.py b/pyopencl/tools.py
index 635dfb6327047c40309bd6fc59810315a9514ab5..d887595b3f0ce66aad3df3b36545fe1e825f8688 100644
--- a/pyopencl/tools.py
+++ b/pyopencl/tools.py
@@ -37,7 +37,9 @@ import re
from pyopencl.compyte.dtypes import ( # noqa
get_or_register_dtype, TypeNameNotKnown,
- register_dtype, dtype_to_ctype)
+ register_dtype, dtype_to_ctype,
+ dtype_hashable as _dtype_hashable,
+ dtype_to_key as _dtype_to_key)
def _register_types():
@@ -490,8 +492,15 @@ class _CDeclList:
return result
+if _dtype_hashable:
+ _memoize_match_dtype_to_c_struct = memoize
+else:
+ import json as _json
+ _memoize_match_dtype_to_c_struct = memoize(
+ key=lambda device, name, dtype, context=None:
+ (device, name, _dtype_to_key(dtype), context))
-@memoize
+@_memoize_match_dtype_to_c_struct
def match_dtype_to_c_struct(device, name, dtype, context=None):
"""Return a tuple `(dtype, c_decl)` such that the C struct declaration
in `c_decl` and the structure :class:`numpy.dtype` instance `dtype`
@@ -606,28 +615,47 @@ def match_dtype_to_c_struct(device, name, dtype, context=None):
del queue
del context
- dtype_arg_dict = dict(
- names=[field_name for field_name, (field_dtype, offset) in fields],
- formats=[field_dtype
- for field_name, (field_dtype, offset) in fields],
- offsets=[int(x) for x in offsets],
- itemsize=int(size_and_offsets[0]),
- )
- dtype = np.dtype(dtype_arg_dict)
-
- if dtype.itemsize != size_and_offsets[0]:
- # "Old" versions of numpy (1.6.x?) silently ignore "itemsize". Boo.
- dtype_arg_dict["names"].append("_pycl_size_fixer")
- dtype_arg_dict["formats"].append(np.uint8)
- dtype_arg_dict["offsets"].append(int(size_and_offsets[0])-1)
+ try:
+ dtype_arg_dict = {
+ 'names': [field_name
+ for field_name, (field_dtype, offset) in fields],
+ 'formats': [field_dtype
+ for field_name, (field_dtype, offset) in fields],
+ 'offsets': [int(x) for x in offsets],
+ 'itemsize': int(size_and_offsets[0]),
+ }
dtype = np.dtype(dtype_arg_dict)
+ if dtype.itemsize != size_and_offsets[0]:
+ # "Old" versions of numpy (1.6.x?) silently ignore "itemsize". Boo.
+ dtype_arg_dict["names"].append("_pycl_size_fixer")
+ dtype_arg_dict["formats"].append(np.uint8)
+ dtype_arg_dict["offsets"].append(int(size_and_offsets[0])-1)
+ dtype = np.dtype(dtype_arg_dict)
+ except NotImplementedError:
+ def calc_field_type():
+ total_size = 0
+ padding_count = 0
+ for offset, (field_name, (field_dtype, _)) in zip(offsets, fields):
+ if offset > total_size:
+ padding_count += 1
+ yield ('__pycl_padding%d' % padding_count,
+ 'V%d' % offset - total_size)
+ yield field_name, field_dtype
+ total_size = field_dtype.itemsize + offset
+ dtype = np.dtype(list(calc_field_type()))
assert dtype.itemsize == size_and_offsets[0]
return dtype, c_decl
+if _dtype_hashable:
+ _memoize_dtype_to_c_struct = memoize
+else:
+ import json as _json
+ _memoize_dtype_to_c_struct = memoize(
+ key=lambda device, dtype: (device, _dtype_to_key(dtype)))
-@memoize
+@_memoize_dtype_to_c_struct
def dtype_to_c_struct(device, dtype):
matched_dtype, c_decl = match_dtype_to_c_struct(
device, dtype_to_ctype(dtype), dtype)
diff --git a/test/test_array.py b/test/test_array.py
index 49c9d44bd3d7d317e720dcf9e712e78d83c0cc23..bfe227849e4f3f2d7ccaaf0da1e97c4f128d986b 100644
--- a/test/test_array.py
+++ b/test/test_array.py
@@ -445,6 +445,13 @@ def test_random(ctx_factory):
# {{{ misc
def test_numpy_integer_shape(ctx_factory):
+ try:
+ list(np.int32(17))
+ except:
+ pass
+ else:
+ from pytest import skip
+ skip("numpy implementation does not handle scalar correctly.")
context = ctx_factory()
queue = cl.CommandQueue(context)
diff --git a/test/test_clmath.py b/test/test_clmath.py
index 5778860b260eb25f488d00507fd11adf9bbc14e0..6ebbe46bcb1f49ed70e1797c003013fe43d79147 100644
--- a/test/test_clmath.py
+++ b/test/test_clmath.py
@@ -88,7 +88,8 @@ def make_unary_function_test(name, limits=(0, 1), threshold=0, use_complex=False
args = cl_array.arange(queue, a, b, (b-a)/s, dtype=dtype)
if dtype.kind == "c":
- args = args+dtype.type(1j)*args
+ # args = args + dtype.type(1j) * args
+ args = args + args * dtype.type(1j)
gpu_results = gpu_func(args).get()
cpu_results = cpu_func(args.get())
@@ -126,6 +127,40 @@ if have_cl():
test_tanh = make_unary_function_test("tanh", (-3, 3), 2e-6, use_complex=True)
+def test_atan2(ctx_factory):
+ context = ctx_factory()
+ queue = cl.CommandQueue(context)
+
+ for s in sizes:
+ a = (cl_array.arange(queue, s, dtype=np.float32) - s / 2) / 100
+ a2 = (s / 2 - 1 - cl_array.arange(queue, s, dtype=np.float32)) / 100
+ b = clmath.atan2(a, a2)
+
+ a = a.get()
+ a2 = a2.get()
+ b = b.get()
+
+ for i in range(s):
+ assert abs(math.atan2(a[i], a2[i]) - b[i]) < 1e-6
+
+
+def test_atan2pi(ctx_factory):
+ context = ctx_factory()
+ queue = cl.CommandQueue(context)
+
+ for s in sizes:
+ a = (cl_array.arange(queue, s, dtype=np.float32) - s / 2) / 100
+ a2 = (s / 2 - 1 - cl_array.arange(queue, s, dtype=np.float32)) / 100
+ b = clmath.atan2pi(a, a2)
+
+ a = a.get()
+ a2 = a2.get()
+ b = b.get()
+
+ for i in range(s):
+ assert abs(math.atan2(a[i], a2[i]) / math.pi - b[i]) < 1e-6
+
+
def test_fmod(ctx_factory):
context = ctx_factory()
queue = cl.CommandQueue(context)
diff --git a/test/test_wrapper.py b/test/test_wrapper.py
index 9dd9de445d800f0b30ece8b844444a750e13c17d..4358422bed4708b0f30a8a30aaa0e1561b0acee2 100644
--- a/test/test_wrapper.py
+++ b/test/test_wrapper.py
@@ -48,6 +48,8 @@ def test_get_info(ctx_factory):
pocl_quirks = [
(cl.Buffer, cl.mem_info.OFFSET),
+ (cl.Program, cl.program_info.BINARIES),
+ (cl.Program, cl.program_info.BINARY_SIZES),
]
if ctx._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2):
pocl_quirks.extend([
@@ -69,6 +71,9 @@ def test_get_info(ctx_factory):
(("The pocl project", "Portable Computing Language",
"OpenCL 1.2 pocl 0.9-pre"),
pocl_quirks),
+ (("The pocl project", "Portable Computing Language",
+ "OpenCL 1.2 pocl 0.9"),
+ pocl_quirks),
(("Apple", "Apple",
"OpenCL 1.2 (Apr 25 2013 18:32:06)"),
[
@@ -584,6 +589,13 @@ def test_context_dep_memoize(ctx_factory):
def test_can_build_binary(ctx_factory):
ctx = ctx_factory()
device, = ctx.devices
+ platform = device.platform
+
+ if (platform.vendor == "The pocl project" and
+ platform.name == "Portable Computing Language"):
+ # Segfault on pocl 0.9
+ from pytest import skip
+ skip("pocl doesn't like getting PROGRAM_BINARIES")
program = cl.Program(ctx, """
__kernel void simple(__global float *in, __global float *out)