Newer
Older
import numpy.linalg as la
import sys
import pytools.test
def have_cl():
try:
import pyopencl
return True
except:
return False
if have_cl():
import pyopencl.array as cl_array
import pyopencl as cl
from pyopencl.tools import pytest_generate_tests_for_pyopencl \
as pytest_generate_tests
from pyopencl.characterize import has_double_support
@pytools.test.mark_test.opencl
def test_pow_array(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
a_gpu = cl_array.to_device(queue, a)
result = pow(a_gpu,a_gpu).get()
result = (a_gpu**a_gpu).get()
@pytools.test.mark_test.opencl
def test_pow_number(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
a_gpu = cl_array.to_device(queue, a)
@pytools.test.mark_test.opencl
def test_abs(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
res = a.get()
for i in range(111):
assert res[i] <= 0
a = abs(a)
res = a.get()
for i in range (111):
assert abs(res[i]) >= 0
assert res[i] == i
@pytools.test.mark_test.opencl
def test_len(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
a_cpu = cl_array.to_device(queue, a)
assert len(a_cpu) == 10
@pytools.test.mark_test.opencl
def test_multiply(ctx_getter):
"""Test the muliplication of an array with a scalar. """
context = ctx_getter()
queue = cl.CommandQueue(context)
for sz in [10, 50000]:
for dtype, scalars in [
a_gpu = cl_array.to_device(queue, a)
a_doubled = (scalar * a_gpu).get()
assert (a * scalar == a_doubled).all()
@pytools.test.mark_test.opencl
def test_multiply_array(ctx_getter):
"""Test the multiplication of two arrays."""
context = ctx_getter()
queue = cl.CommandQueue(context)
a_gpu = cl_array.to_device(queue, a)
b_gpu = cl_array.to_device(queue, a)
a_squared = (b_gpu*a_gpu).get()
assert (a*a == a_squared).all()
@pytools.test.mark_test.opencl
def test_addition_array(ctx_getter):
"""Test the addition of two arrays."""
context = ctx_getter()
queue = cl.CommandQueue(context)
a_gpu = cl_array.to_device(queue, a)
a_added = (a_gpu+a_gpu).get()
assert (a+a == a_added).all()
@pytools.test.mark_test.opencl
def test_addition_scalar(ctx_getter):
"""Test the addition of an array and a scalar."""
context = ctx_getter()
queue = cl.CommandQueue(context)
a_gpu = cl_array.to_device(queue, a)
a_added = (7+a_gpu).get()
assert (7+a == a_added).all()
@pytools.test.mark_test.opencl
def test_substract_array(ctx_getter):
"""Test the substraction of two arrays."""
#test data
a = np.array([1,2,3,4,5,6,7,8,9,10]).astype(np.float32)
b = np.array([10,20,30,40,50,60,70,80,90,100]).astype(np.float32)
context = ctx_getter()
queue = cl.CommandQueue(context)
a_gpu = cl_array.to_device(queue, a)
b_gpu = cl_array.to_device(queue, b)
result = (a_gpu-b_gpu).get()
assert (a-b == result).all()
result = (b_gpu-a_gpu).get()
assert (b-a == result).all()
@pytools.test.mark_test.opencl
def test_substract_scalar(ctx_getter):
"""Test the substraction of an array and a scalar."""
context = ctx_getter()
queue = cl.CommandQueue(context)
#test data
a_gpu = cl_array.to_device(queue, a)
result = (a_gpu-7).get()
assert (a-7 == result).all()
result = (7-a_gpu).get()
assert (7-a == result).all()
@pytools.test.mark_test.opencl
def test_divide_scalar(ctx_getter):
"""Test the division of an array and a scalar."""
context = ctx_getter()
queue = cl.CommandQueue(context)
a_gpu = cl_array.to_device(queue, a)
result = (a_gpu/2).get()
assert (a/2 == result).all()
result = (2/a_gpu).get()
assert (2/a == result).all()
@pytools.test.mark_test.opencl
def test_divide_array(ctx_getter):
"""Test the division of an array and a scalar. """
context = ctx_getter()
queue = cl.CommandQueue(context)
#test data
a = np.array([10,20,30,40,50,60,70,80,90,100]).astype(np.float32)
b = np.array([10,10,10,10,10,10,10,10,10,10]).astype(np.float32)
a_gpu = cl_array.to_device(queue, a)
b_gpu = cl_array.to_device(queue, b)
a_divide = (a_gpu/b_gpu).get()
a_divide = (b_gpu/a_gpu).get()
@pytools.test.mark_test.opencl
def test_random(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
from pyopencl.clrandom import rand as clrand
if has_double_support(context.devices[0]):
for dtype in dtypes:
a = clrand(context, queue, (10, 100), dtype=dtype).get()
assert (0 <= a).all()
assert (a < 1).all()
@pytools.test.mark_test.opencl
def test_nan_arithmetic(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
def make_nan_contaminated_vector(size):
shape = (size,)
#for i in range(0, shape[0], 3):
#a[i] = float('nan')
from random import randrange
for i in range(size//10):
a[randrange(0, size)] = float('nan')
return a
size = 1 << 20
a = make_nan_contaminated_vector(size)
a_gpu = cl_array.to_device(queue, a)
b = make_nan_contaminated_vector(size)
b_gpu = cl_array.to_device(queue, b)
ab = a*b
ab_gpu = (a_gpu*b_gpu).get()
assert (np.isnan(ab) == np.isnan(ab_gpu)).all()
@pytools.test.mark_test.opencl
def test_elwise_kernel(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
from pyopencl.clrandom import rand as clrand
a_gpu = clrand(context, queue, (50,), np.float32)
b_gpu = clrand(context, queue, (50,), np.float32)
from pyopencl.elementwise import ElementwiseKernel
lin_comb = ElementwiseKernel(context,
"float a, float *x, float b, float *y, float *z",
"z[i] = a*x[i] + b*y[i]",
"linear_combination")
c_gpu = cl_array.empty_like(a_gpu)
lin_comb(5, a_gpu, 6, b_gpu, c_gpu)
assert la.norm((c_gpu - (5*a_gpu+6*b_gpu)).get()) < 1e-5
@pytools.test.mark_test.opencl
def test_take(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
idx = cl_array.arange(queue, 0, 200000, 2, dtype=np.uint32)
a = cl_array.arange(queue, 0, 600000, 3, dtype=np.float32)
result = cl_array.take(a, idx)
assert ((3*idx).get() == result.get()).all()
@pytools.test.mark_test.opencl
def test_arange(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
n = 5000
a = cl_array.arange(queue, n, dtype=np.float32)
assert (np.arange(n, dtype=np.float32) == a.get()).all()
@pytools.test.mark_test.opencl
def test_reverse(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
n = 5000
a_gpu = cl_array.to_device(queue, a)
a_gpu = a_gpu.reverse()
assert (a[::-1] == a_gpu.get()).all()
@pytools.test.mark_test.opencl
def test_sum(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
@pytools.test.mark_test.opencl
def test_minmax(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
for what in ["min", "max"]:
for dtype in dtypes:
a_gpu = clrand(context, queue, (200000,), dtype)
a = a_gpu.get()
op_a_gpu = getattr(cl_array, what)(a_gpu).get()
assert op_a_gpu == op_a, (op_a_gpu, op_a, dtype, what)
@pytools.test.mark_test.opencl
def test_subset_minmax(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
l_a = 200000
gran = 5
l_m = l_a - l_a // gran + 1
if has_double_support(context.devices[0]):
for dtype in dtypes:
a_gpu = clrand(context, queue, (l_a,), dtype)
meaningful_indices = meaningful_indices_gpu.get()
j = 0
for i in range(len(meaningful_indices)):
meaningful_indices[i] = j
j = j + 1
if j % gran == 0:
j = j + 1
meaningful_indices_gpu = cl_array.to_device(
min_a_gpu = cl_array.subset_min(meaningful_indices_gpu, a_gpu).get()
assert min_a_gpu == min_a
@pytools.test.mark_test.opencl
def test_dot(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
from pyopencl.clrandom import rand as clrand
dot_ab_gpu = cl_array.dot(a_gpu, b_gpu).get()
assert abs(dot_ab_gpu-dot_ab)/abs(dot_ab) < 1e-4
if False:
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
@pytools.test.mark_test.opencl
def test_slice(ctx_getter):
from pyopencl.clrandom import rand as clrand
l = 20000
a_gpu = clrand(context, queue, (l,))
a = a_gpu.get()
from random import randrange
for i in range(200):
start = randrange(l)
end = randrange(start, l)
a_gpu_slice = a_gpu[start:end]
a_slice = a[start:end]
assert la.norm(a_gpu_slice.get()-a_slice) == 0
@pytools.test.mark_test.opencl
def test_if_positive(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
from pyopencl.clrandom import rand as clrand
l = 20000
a_gpu = clrand(context, queue, (l,), np.float32)
b_gpu = clrand(context, queue, (l,), np.float32)
a = a_gpu.get()
b = b_gpu.get()
max_a_b_gpu = cl_array.maximum(a_gpu, b_gpu)
min_a_b_gpu = cl_array.minimum(a_gpu, b_gpu)
assert la.norm(max_a_b_gpu.get()- np.maximum(a, b)) == 0
assert la.norm(min_a_b_gpu.get()- np.minimum(a, b)) == 0
@pytools.test.mark_test.opencl
def test_take_put(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
for n in [5, 17, 333]:
one_field_size = 8
dest_indices = cl_array.to_device(queue,
np.array([ 0, 1, 2, 3, 32, 33, 34, 35], dtype=np.uint32))
read_map = cl_array.to_device(queue,
cl_array.multi_take_put(
arrays=[buf_gpu for i in range(n)],
dest_indices=dest_indices,
src_indices=read_map,
src_offsets=[i*one_field_size for i in range(n)],
dest_shape=(96,))
@pytools.test.mark_test.opencl
def test_astype(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
from pyopencl.clrandom import rand as clrand
if not has_double_support(context.devices[0]):
return
a_gpu = clrand(context, queue, (2000,), dtype=np.float32)
a = a_gpu.get().astype(np.float64)
a2 = a_gpu.astype(np.float64).get()
assert la.norm(a - a2) == 0, (a, a2)
a_gpu = clrand(context, queue, (2000,), dtype=np.float64)
a = a_gpu.get().astype(np.float32)
a2 = a_gpu.astype(np.float32).get()
assert la.norm(a - a2)/la.norm(a) < 1e-7
@pytools.test.mark_test.opencl
def test_scan(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
from pyopencl.scan import InclusiveScanKernel, ExclusiveScanKernel
dtype = np.int32
for cls in [InclusiveScanKernel, ExclusiveScanKernel]:
knl = cls(context, dtype, "a+b", "0")
for n in [
10, 2**10-5, 2**10,
2**20-2**18,
2**20-2**18+5,
2**10+5,
2**20+5,
2**20, 2**24
]:
host_data = np.random.randint(0, 10, n).astype(dtype)
dev_data = cl_array.to_device(queue, host_data)
knl(dev_data)
desired_result = np.cumsum(host_data, axis=0)
if cls is ExclusiveScanKernel:
desired_result -= host_data
assert (dev_data.get() == desired_result).all()
from gc import collect
collect()
@pytools.test.mark_test.opencl
def test_stride_preservation(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
A = np.random.rand(3,3)
AT = A.T
print AT.flags.f_contiguous, AT.flags.c_contiguous
AT_GPU = cl_array.to_device(queue, AT)
print AT_GPU.flags.f_contiguous, AT_GPU.flags.c_contiguous
assert np.allclose(AT_GPU.get(),AT)
if __name__ == "__main__":
# make sure that import failures get reported, instead of skipping the tests.
import pyopencl as cl
import sys
if len(sys.argv) > 1:
else:
from py.test.cmdline import main
main([__file__])