diff --git a/pyopencl/scan.py b/pyopencl/scan.py index ae380ff29a8567558a9220519412ca393b25caad..41dd7912e3912a467274d6d0213331d899ffc083 100644 --- a/pyopencl/scan.py +++ b/pyopencl/scan.py @@ -143,7 +143,7 @@ void ${name_prefix}_scan_intervals( ) { // index K in first dimension used for carry storage - %if scan_dtype.itemsize > 4 and scan_dtype.itemsize % 8 == 0 and is_gpu: + %if use_bank_conflict_avoidance: // Avoid bank conflicts by adding a single 32-bit value to the size of // the scan type. struct __attribute__ ((__packed__)) wrapped_scan_type @@ -1064,7 +1064,10 @@ class GenericScanKernel(_GenericScanKernelBase): dev.local_mem_size for dev in self.devices) - if self.devices[0].type == cl.device_type.CPU: + is_cpu = self.devices[0].type & cl.device_type.CPU + is_gpu = self.devices[0].type & cl.device_type.GPU + + if is_cpu: # (about the widest vector a CPU can support, also taking # into account that CPUs don't hide latency by large work groups max_scan_wg_size = 16 @@ -1073,6 +1076,9 @@ class GenericScanKernel(_GenericScanKernelBase): max_scan_wg_size = min(dev.max_work_group_size for dev in self.devices) wg_size_multiples = 64 + use_bank_conflict_avoidance = ( + self.dtype.itemsize > 4 and self.dtype.itemsize % 8 == 0 and is_gpu) + # k_group_size should be a power of two because of in-kernel # division by that number. @@ -1082,11 +1088,12 @@ class GenericScanKernel(_GenericScanKernelBase): wg_size_multiples): k_group_size = 2**k_exp - lmem_use = self.get_local_mem_use(wg_size, k_group_size) + lmem_use = self.get_local_mem_use(wg_size, k_group_size, + use_bank_conflict_avoidance) if lmem_use + 256 <= avail_local_mem: solutions.append((wg_size*k_group_size, k_group_size, wg_size)) - if self.devices[0].type & cl.device_type.GPU: + if is_gpu: from pytools import any for wg_size_floor in [256, 192, 128]: have_sol_above_floor = any(wg_size >= wg_size_floor @@ -1109,7 +1116,8 @@ class GenericScanKernel(_GenericScanKernelBase): input_fetch_exprs=self.input_fetch_exprs, is_first_level=True, store_segment_start_flags=self.store_segment_start_flags, - k_group_size=k_group_size) + k_group_size=k_group_size, + use_bank_conflict_avoidance=use_bank_conflict_avoidance) # Will this device actually let us execute this kernel # at the desired work group size? Building it is the @@ -1164,6 +1172,7 @@ class GenericScanKernel(_GenericScanKernelBase): is_first_level=False, store_segment_start_flags=False, k_group_size=k_group_size, + use_bank_conflict_avoidance=use_bank_conflict_avoidance, **second_level_build_kwargs) # }}} @@ -1202,7 +1211,7 @@ class GenericScanKernel(_GenericScanKernelBase): # {{{ scan kernel build/properties - def get_local_mem_use(self, k_group_size, wg_size): + def get_local_mem_use(self, k_group_size, wg_size, use_bank_conflict_avoidance): arg_dtypes = {} for arg in self.parsed_args: arg_dtypes[arg.name] = arg.dtype @@ -1211,9 +1220,13 @@ class GenericScanKernel(_GenericScanKernelBase): for name, arg_name, ife_offset in self.input_fetch_exprs: fetch_expr_offsets.setdefault(arg_name, set()).add(ife_offset) + itemsize = self.dtype.itemsize + if use_bank_conflict_avoidance: + itemsize += 4 + return ( # ldata - self.dtype.itemsize*(k_group_size+1)*(wg_size+1) + itemsize*(k_group_size+1)*(wg_size+1) # l_segment_start_flags + k_group_size*wg_size @@ -1228,7 +1241,8 @@ class GenericScanKernel(_GenericScanKernelBase): def build_scan_kernel(self, max_wg_size, arguments, input_expr, is_segment_start_expr, input_fetch_exprs, is_first_level, - store_segment_start_flags, k_group_size): + store_segment_start_flags, k_group_size, + use_bank_conflict_avoidance): scalar_arg_dtypes = get_arg_list_scalar_arg_dtypes(arguments) # Empirically found on Nv hardware: no need to be bigger than this size @@ -1245,6 +1259,7 @@ class GenericScanKernel(_GenericScanKernelBase): input_fetch_exprs=input_fetch_exprs, is_first_level=is_first_level, store_segment_start_flags=store_segment_start_flags, + use_bank_conflict_avoidance=use_bank_conflict_avoidance, **self.code_variables)) prg = cl.Program(self.context, scan_src).build(self.options) diff --git a/test/test_algorithm.py b/test/test_algorithm.py index 59fa60f03adb6068247a4fe95678164b81fa88df..7f0f9f4c71b8ab4b73f79d4fd7afbe4537677201 100644 --- a/test/test_algorithm.py +++ b/test/test_algorithm.py @@ -25,7 +25,6 @@ THE SOFTWARE. import numpy as np import numpy.linalg as la import sys -import pytools.test from pytools import memoize from test_array import general_clrand @@ -36,11 +35,11 @@ import pyopencl.array as cl_array # noqa from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) from pyopencl.characterize import has_double_support +from pyopencl.scan import InclusiveScanKernel, ExclusiveScanKernel # {{{ elementwise -@pytools.test.mark_test.opencl def test_elwise_kernel(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -62,7 +61,6 @@ def test_elwise_kernel(ctx_factory): assert la.norm((c_gpu - (5 * a_gpu + 6 * b_gpu)).get()) < 1e-5 -@pytools.test.mark_test.opencl def test_elwise_kernel_with_options(ctx_factory): from pyopencl.clrandom import rand as clrand from pyopencl.elementwise import ElementwiseKernel @@ -94,7 +92,6 @@ def test_elwise_kernel_with_options(ctx_factory): assert la.norm(gv - gt) < 1e-5 -@pytools.test.mark_test.opencl def test_ranged_elwise_kernel(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -119,7 +116,6 @@ def test_ranged_elwise_kernel(ctx_factory): assert (a_cpu == a_gpu.get()).all() -@pytools.test.mark_test.opencl def test_take(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -130,7 +126,6 @@ def test_take(ctx_factory): assert ((3 * idx).get() == result.get()).all() -@pytools.test.mark_test.opencl def test_arange(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -140,7 +135,6 @@ def test_arange(ctx_factory): assert (np.arange(n, dtype=np.float32) == a.get()).all() -@pytools.test.mark_test.opencl def test_reverse(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -154,7 +148,6 @@ def test_reverse(ctx_factory): assert (a[::-1] == a_gpu.get()).all() -@pytools.test.mark_test.opencl def test_if_positive(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -177,7 +170,6 @@ def test_if_positive(ctx_factory): assert la.norm(min_a_b_gpu.get() - np.minimum(a, b)) == 0 -@pytools.test.mark_test.opencl def test_take_put(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -199,7 +191,6 @@ def test_take_put(ctx_factory): dest_shape=(96,)) -@pytools.test.mark_test.opencl def test_astype(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -231,7 +222,6 @@ def test_astype(ctx_factory): # {{{ reduction -@pytools.test.mark_test.opencl def test_sum(ctx_factory): from pytest import importorskip importorskip("mako") @@ -257,7 +247,6 @@ def test_sum(ctx_factory): assert abs(sum_a_gpu - sum_a) / abs(sum_a) < 1e-4 -@pytools.test.mark_test.opencl def test_minmax(ctx_factory): from pytest import importorskip importorskip("mako") @@ -283,7 +272,6 @@ def test_minmax(ctx_factory): assert op_a_gpu == op_a, (op_a_gpu, op_a, dtype, what) -@pytools.test.mark_test.opencl def test_subset_minmax(ctx_factory): from pytest import importorskip importorskip("mako") @@ -326,7 +314,6 @@ def test_subset_minmax(ctx_factory): assert min_a_gpu == min_a -@pytools.test.mark_test.opencl def test_dot(ctx_factory): from pytest import importorskip importorskip("mako") @@ -374,7 +361,6 @@ def make_mmc_dtype(device): return dtype, c_decl -@pytools.test.mark_test.opencl def test_struct_reduce(ctx_factory): pytest.importorskip("mako") @@ -500,48 +486,41 @@ scan_test_counts = [ ] -@pytools.test.mark_test.opencl -def test_scan(ctx_factory): +@pytest.mark.parametrize("dtype", [np.int32, np.int64]) +@pytest.mark.parametrize("scan_cls", [InclusiveScanKernel, ExclusiveScanKernel]) +def test_scan(ctx_factory, dtype, scan_cls): from pytest import importorskip importorskip("mako") context = ctx_factory() queue = cl.CommandQueue(context) - from pyopencl.scan import InclusiveScanKernel, ExclusiveScanKernel + knl = scan_cls(context, dtype, "a+b", "0") - dtype = np.int32 - for cls in [ - InclusiveScanKernel, - ExclusiveScanKernel - ]: - knl = cls(context, dtype, "a+b", "0") - - for n in scan_test_counts: - host_data = np.random.randint(0, 10, n).astype(dtype) - dev_data = cl_array.to_device(queue, host_data) + for n in scan_test_counts: + host_data = np.random.randint(0, 10, n).astype(dtype) + dev_data = cl_array.to_device(queue, host_data) - # /!\ fails on Nv GT2?? for some drivers - assert (host_data == dev_data.get()).all() + # /!\ fails on Nv GT2?? for some drivers + assert (host_data == dev_data.get()).all() - knl(dev_data) + knl(dev_data) - desired_result = np.cumsum(host_data, axis=0) - if cls is ExclusiveScanKernel: - desired_result -= host_data + desired_result = np.cumsum(host_data, axis=0) + if scan_cls is ExclusiveScanKernel: + desired_result -= host_data - is_ok = (dev_data.get() == desired_result).all() - if 1 and not is_ok: - print("something went wrong, summarizing error...") - print(summarize_error(dev_data.get(), desired_result, host_data)) + is_ok = (dev_data.get() == desired_result).all() + if 1 and not is_ok: + print("something went wrong, summarizing error...") + print(summarize_error(dev_data.get(), desired_result, host_data)) - print("n:%d %s worked:%s" % (n, cls, is_ok)) - assert is_ok - from gc import collect - collect() + print("dtype:%s n:%d %s worked:%s" % (dtype, n, scan_cls, is_ok)) + assert is_ok + from gc import collect + collect() -@pytools.test.mark_test.opencl def test_copy_if(ctx_factory): from pytest import importorskip importorskip("mako") @@ -566,7 +545,6 @@ def test_copy_if(ctx_factory): collect() -@pytools.test.mark_test.opencl def test_partition(ctx_factory): from pytest import importorskip importorskip("mako") @@ -595,7 +573,6 @@ def test_partition(ctx_factory): assert (false_dev.get()[:n-count_true_dev] == false_host).all() -@pytools.test.mark_test.opencl def test_unique(ctx_factory): from pytest import importorskip importorskip("mako") @@ -622,7 +599,6 @@ def test_unique(ctx_factory): collect() -@pytools.test.mark_test.opencl def test_index_preservation(ctx_factory): from pytest import importorskip importorskip("mako") @@ -656,7 +632,6 @@ def test_index_preservation(ctx_factory): collect() -@pytools.test.mark_test.opencl def test_segmented_scan(ctx_factory): from pytest import importorskip importorskip("mako") @@ -755,7 +730,6 @@ def test_segmented_scan(ctx_factory): print("%d excl:%s done" % (n, is_exclusive)) -@pytools.test.mark_test.opencl def test_sort(ctx_factory): from pytest import importorskip importorskip("mako") @@ -798,7 +772,6 @@ def test_sort(ctx_factory): assert (a_dev_sorted.get() == a_sorted).all() -@pytools.test.mark_test.opencl def test_list_builder(ctx_factory): from pytest import importorskip importorskip("mako") @@ -825,7 +798,6 @@ def test_list_builder(ctx_factory): assert (inf.lists.get()[-6:] == [1, 2, 2, 3, 3, 3]).all() -@pytools.test.mark_test.opencl def test_key_value_sorter(ctx_factory): from pytest import importorskip importorskip("mako") diff --git a/test/test_array.py b/test/test_array.py index c147dfaba7333d37c58dd1577e1139721f55f49f..e3258fb9ae01751f93d438b52d039885013b7ecd 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -25,7 +25,6 @@ THE SOFTWARE. import numpy as np import numpy.linalg as la import sys -import pytools.test import pyopencl as cl import pyopencl.array as cl_array @@ -71,7 +70,6 @@ def make_random_array(queue, dtype, size): # {{{ dtype-related -@pytools.test.mark_test.opencl def test_basic_complex(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -88,7 +86,6 @@ def test_basic_complex(ctx_factory): assert la.norm((ary*c).get() - c*host_ary) < 1e-5 * la.norm(host_ary) -@pytools.test.mark_test.opencl def test_mix_complex(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -159,7 +156,6 @@ def test_mix_complex(ctx_factory): assert correct -@pytools.test.mark_test.opencl def test_pow_neg1_vs_inv(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) @@ -179,7 +175,6 @@ def test_pow_neg1_vs_inv(ctx_factory): assert la.norm(res2-ref, np.inf) / la.norm(ref) < 1e-13 -@pytools.test.mark_test.opencl def test_vector_fill(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -192,7 +187,6 @@ def test_vector_fill(ctx_factory): a_gpu = cl_array.zeros(queue, 100, dtype=cl_array.vec.float4) -@pytools.test.mark_test.opencl def test_absrealimag(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -227,7 +221,6 @@ def test_absrealimag(ctx_factory): # {{{ operators -@pytools.test.mark_test.opencl def test_rmul_yields_right_type(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -242,7 +235,6 @@ def test_rmul_yields_right_type(ctx_factory): assert isinstance(two_a, cl_array.Array) -@pytools.test.mark_test.opencl def test_pow_array(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -257,7 +249,6 @@ def test_pow_array(ctx_factory): assert (np.abs(pow(a, a) - result) < 1e-3).all() -@pytools.test.mark_test.opencl def test_pow_number(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -269,7 +260,6 @@ def test_pow_number(ctx_factory): assert (np.abs(a ** 2 - result) < 1e-3).all() -@pytools.test.mark_test.opencl def test_multiply(ctx_factory): """Test the muliplication of an array with a scalar. """ @@ -289,7 +279,6 @@ def test_multiply(ctx_factory): assert (a * scalar == a_mult).all() -@pytools.test.mark_test.opencl def test_multiply_array(ctx_factory): """Test the multiplication of two arrays.""" @@ -306,7 +295,6 @@ def test_multiply_array(ctx_factory): assert (a * a == a_squared).all() -@pytools.test.mark_test.opencl def test_addition_array(ctx_factory): """Test the addition of two arrays.""" @@ -320,7 +308,6 @@ def test_addition_array(ctx_factory): assert (a + a == a_added).all() -@pytools.test.mark_test.opencl def test_addition_scalar(ctx_factory): """Test the addition of an array and a scalar.""" @@ -334,7 +321,6 @@ def test_addition_scalar(ctx_factory): assert (7 + a == a_added).all() -@pytools.test.mark_test.opencl def test_substract_array(ctx_factory): """Test the substraction of two arrays.""" #test data @@ -355,7 +341,6 @@ def test_substract_array(ctx_factory): assert (b - a == result).all() -@pytools.test.mark_test.opencl def test_substract_scalar(ctx_factory): """Test the substraction of an array and a scalar.""" @@ -375,7 +360,6 @@ def test_substract_scalar(ctx_factory): assert (7 - a == result).all() -@pytools.test.mark_test.opencl def test_divide_scalar(ctx_factory): """Test the division of an array and a scalar.""" @@ -392,7 +376,6 @@ def test_divide_scalar(ctx_factory): assert (np.abs(2 / a - result) < 1e-5).all() -@pytools.test.mark_test.opencl def test_divide_array(ctx_factory): """Test the division of an array and a scalar. """ @@ -417,7 +400,6 @@ def test_divide_array(ctx_factory): # {{{ RNG -@pytools.test.mark_test.opencl def test_random(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -461,7 +443,6 @@ def test_random(ctx_factory): # {{{ misc -@pytools.test.mark_test.opencl def test_numpy_integer_shape(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -470,7 +451,6 @@ def test_numpy_integer_shape(ctx_factory): cl_array.empty(queue, (np.int32(17), np.int32(17)), np.float32) -@pytools.test.mark_test.opencl def test_len(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -480,7 +460,6 @@ def test_len(ctx_factory): assert len(a_cpu) == 10 -@pytools.test.mark_test.opencl def test_stride_preservation(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -493,7 +472,6 @@ def test_stride_preservation(ctx_factory): assert np.allclose(AT_GPU.get(), AT) -@pytools.test.mark_test.opencl def test_nan_arithmetic(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -519,7 +497,6 @@ def test_nan_arithmetic(ctx_factory): assert (np.isnan(ab) == np.isnan(ab_gpu)).all() -@pytools.test.mark_test.opencl def test_mem_pool_with_arrays(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -532,7 +509,6 @@ def test_mem_pool_with_arrays(ctx_factory): assert b_dev.allocator is mem_pool -@pytools.test.mark_test.opencl def test_view(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -557,7 +533,6 @@ def test_view(ctx_factory): # {{{ slices, concatenation -@pytools.test.mark_test.opencl def test_slice(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -599,7 +574,6 @@ def test_slice(ctx_factory): assert la.norm(a_gpu.get() - a) == 0 -@pytools.test.mark_test.opencl def test_concatenate(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -623,7 +597,6 @@ def test_concatenate(ctx_factory): # {{{ conditionals, any, all -@pytools.test.mark_test.opencl def test_comparisons(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -655,7 +628,6 @@ def test_comparisons(ctx_factory): assert (res_dev.get() == res).all() -@pytools.test.mark_test.opencl def test_any_all(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -679,7 +651,6 @@ def test_any_all(ctx_factory): # }}} -@pytools.test.mark_test.opencl def test_map_to_host(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) diff --git a/test/test_clmath.py b/test/test_clmath.py index 1425cc46184ecd243e2305c967740058163c5e61..190dd6c43acc0184d955536c15ce5318ac105346 100644 --- a/test/test_clmath.py +++ b/test/test_clmath.py @@ -23,7 +23,6 @@ THE SOFTWARE. """ import math import numpy as np -import pytools.test def have_cl(): try: @@ -41,14 +40,9 @@ if have_cl(): from pyopencl.characterize import has_double_support - - - sizes = [10, 128, 1<<10, 1<<11, 1<<13] - - numpy_func_names = { "asin": "arcsin", "acos": "arccos", @@ -56,8 +50,6 @@ numpy_func_names = { } - - def make_unary_function_test(name, limits=(0, 1), threshold=0, use_complex=False): (a, b) = limits a = float(a) @@ -100,7 +92,7 @@ def make_unary_function_test(name, limits=(0, 1), threshold=0, use_complex=False assert (max_err <= my_threshold).all(), \ (max_err, name, dtype) - return pytools.test.mark_test.opencl(test) + return test @@ -129,7 +121,6 @@ if have_cl(): -@pytools.test.mark_test.opencl def test_fmod(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -146,7 +137,6 @@ def test_fmod(ctx_factory): for i in range(s): assert math.fmod(a[i], a2[i]) == b[i] -@pytools.test.mark_test.opencl def test_ldexp(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -163,7 +153,6 @@ def test_ldexp(ctx_factory): for i in range(s): assert math.ldexp(a[i], int(a2[i])) == b[i] -@pytools.test.mark_test.opencl def test_modf(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -182,7 +171,6 @@ def test_modf(ctx_factory): assert intpart_true == intpart[i] assert abs(fracpart_true - fracpart[i]) < 1e-4 -@pytools.test.mark_test.opencl def test_frexp(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -201,7 +189,6 @@ def test_frexp(ctx_factory): assert sig_true == significands[i] assert ex_true == exponents[i] -@pytools.test.mark_test.opencl def test_bessel(ctx_factory): try: import scipy.special as spec diff --git a/test/test_wrapper.py b/test/test_wrapper.py index 7b5a65e23685c1d21ed5c716490b3d8812086caa..3a7a52d9be6edf848e16fa4affa7b767bbd0e8d5 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -24,8 +24,6 @@ THE SOFTWARE. import numpy as np import numpy.linalg as la -import pytools.test - import pyopencl as cl import pyopencl.array as cl_array @@ -41,7 +39,6 @@ else: faulthandler.enable() -@pytools.test.mark_test.opencl def test_get_info(ctx_factory): ctx = ctx_factory() device, = ctx.devices @@ -189,7 +186,6 @@ def test_get_info(ctx_factory): lambda info: img.get_image_info(info)) -@pytools.test.mark_test.opencl def test_int_ptr(ctx_factory): def do_test(obj): new_obj = type(obj).from_int_ptr(obj.int_ptr) @@ -238,7 +234,6 @@ def test_int_ptr(ctx_factory): do_test(img) -@pytools.test.mark_test.opencl def test_invalid_kernel_names_cause_failures(ctx_factory): ctx = ctx_factory() device = ctx.devices[0] @@ -268,7 +263,6 @@ def test_invalid_kernel_names_cause_failures(ctx_factory): raise -@pytools.test.mark_test.opencl def test_image_format_constructor(): # doesn't need image support to succeed iform = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.FLOAT) @@ -278,7 +272,6 @@ def test_image_format_constructor(): assert not iform.__dict__ -@pytools.test.mark_test.opencl def test_nonempty_supported_image_formats(ctx_factory): context = ctx_factory() @@ -292,7 +285,6 @@ def test_nonempty_supported_image_formats(ctx_factory): skip("images not supported on %s" % device.name) -@pytools.test.mark_test.opencl def test_that_python_args_fail(ctx_factory): context = ctx_factory() @@ -325,7 +317,6 @@ def test_that_python_args_fail(ctx_factory): cl.enqueue_read_buffer(queue, a_buf, a_result).wait() -@pytools.test.mark_test.opencl def test_image_2d(ctx_factory): context = ctx_factory() @@ -402,7 +393,6 @@ def test_image_2d(ctx_factory): assert good -@pytools.test.mark_test.opencl def test_image_3d(ctx_factory): #test for image_from_array for 3d image of float2 context = ctx_factory() @@ -478,7 +468,6 @@ def test_image_3d(ctx_factory): assert good -@pytools.test.mark_test.opencl def test_copy_buffer(ctx_factory): context = ctx_factory() @@ -497,7 +486,6 @@ def test_copy_buffer(ctx_factory): assert la.norm(a - b) == 0 -@pytools.test.mark_test.opencl def test_mempool(ctx_factory): from pyopencl.tools import MemoryPool, CLAllocator @@ -517,7 +505,6 @@ def test_mempool(ctx_factory): pool.stop_holding() -@pytools.test.mark_test.opencl def test_mempool_2(): from pyopencl.tools import MemoryPool from random import randrange @@ -532,7 +519,6 @@ def test_mempool_2(): assert asize < asize*(1+1/8) -@pytools.test.mark_test.opencl def test_vector_args(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) @@ -554,7 +540,6 @@ def test_vector_args(ctx_factory): assert (dest == x).all() -@pytools.test.mark_test.opencl def test_header_dep_handling(ctx_factory): context = ctx_factory() @@ -575,7 +560,6 @@ def test_header_dep_handling(ctx_factory): cl.Program(context, kernel_src).build(["-I", os.getcwd()]) -@pytools.test.mark_test.opencl def test_context_dep_memoize(ctx_factory): context = ctx_factory() @@ -593,7 +577,6 @@ def test_context_dep_memoize(ctx_factory): assert counter[0] == 1 -@pytools.test.mark_test.opencl def test_can_build_binary(ctx_factory): ctx = ctx_factory() device, = ctx.devices