From 8ae62aaa2f707fc55ff0e9190bb0ec44990ff29e Mon Sep 17 00:00:00 2001 From: Vincent Favre-Nicolin Date: Sun, 14 Jul 2024 21:12:46 +0200 Subject: [PATCH 1/8] Compatibility with numpy 2 --- pycuda/gpuarray.py | 2 +- pyproject.toml | 6 ++---- src/wrapper/_pvt_struct_v3.cpp | 4 ++-- src/wrapper/mempool.cpp | 15 ++++++++------- src/wrapper/wrap_cudadrv.cpp | 22 ++++++++++++++-------- test/test_gpuarray.py | 2 +- 6 files changed, 28 insertions(+), 23 deletions(-) diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index 4f739ad6..b4399042 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -1883,7 +1883,7 @@ def concatenate(arrays, axis=0, allocator=None): # }}} shape = tuple(shape) - dtype = np.find_common_type([ary.dtype for ary in arrays], []) + dtype = np.result_type(*(ary.dtype for ary in arrays)) result = empty(shape, dtype, allocator=allocator) full_slice = (slice(None),) * len(shape) diff --git a/pyproject.toml b/pyproject.toml index 2bc8218a..98caf09e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,8 @@ [build-system] -# For each Python version, build against the oldest numpy C_API_VERSION for -# which binary numpy wheels exist, and then the newest version of numpy -# implementing that C_API_VERSION. +# See https://github.com/scipy/oldest-supported-numpy deprecation notice requires = [ "setuptools", "wheel", - "oldest-supported-numpy", + "numpy>=2.0", ] diff --git a/src/wrapper/_pvt_struct_v3.cpp b/src/wrapper/_pvt_struct_v3.cpp index 2b10cc04..ab488b31 100644 --- a/src/wrapper/_pvt_struct_v3.cpp +++ b/src/wrapper/_pvt_struct_v3.cpp @@ -743,7 +743,7 @@ np_complex_float(char *p, PyObject *v, const formatdef *f) NPY_CFLOAT); if (!v_cast) return -1; - memcpy(p, PyArray_DATA(v_cast), PyArray_NBYTES(v_cast)); + memcpy(p, PyArray_DATA((PyArrayObject*)v_cast), PyArray_NBYTES((PyArrayObject*)v_cast)); Py_DECREF(v_cast); } else { @@ -773,7 +773,7 @@ np_complex_double(char *p, PyObject *v, const formatdef *f) NPY_CDOUBLE); if (!v_cast) return -1; - memcpy(p, PyArray_DATA(v_cast), PyArray_NBYTES(v_cast)); + memcpy(p, PyArray_DATA((PyArrayObject*)v_cast), PyArray_NBYTES((PyArrayObject*)v_cast)); Py_DECREF(v_cast); } else { diff --git a/src/wrapper/mempool.cpp b/src/wrapper/mempool.cpp index d889d516..07c00de9 100644 --- a/src/wrapper/mempool.cpp +++ b/src/wrapper/mempool.cpp @@ -8,8 +8,9 @@ #include #include - - +#if NPY_ABI_VERSION < 0x02000000 + #define PyDataType_ELSIZE(descr) ((descr)->elsize) +#endif namespace py = boost::python; @@ -194,15 +195,15 @@ namespace std::unique_ptr alloc( new pooled_host_allocation( - pool, tp_descr->elsize*pycuda::size_from_dims(dims.size(), &dims.front()))); + pool, PyDataType_ELSIZE(tp_descr)*pycuda::size_from_dims(dims.size(), &dims.front()))); - NPY_ORDER order = PyArray_CORDER; + NPY_ORDER order = NPY_CORDER; PyArray_OrderConverter(order_py.ptr(), &order); int flags = 0; - if (order == PyArray_FORTRANORDER) + if (order == NPY_FORTRANORDER) flags |= NPY_FARRAY; - else if (order == PyArray_CORDER) + else if (order == NPY_CORDER) flags |= NPY_CARRAY; else throw std::runtime_error("unrecognized order specifier"); @@ -213,7 +214,7 @@ namespace alloc->ptr(), flags, /*obj*/NULL)); py::handle<> alloc_py(handle_from_new_ptr(alloc.release())); - PyArray_BASE(result.get()) = alloc_py.get(); + PyArray_SetBaseObject((PyArrayObject*)result.get(), alloc_py.get()); Py_INCREF(alloc_py.get()); return result; diff --git a/src/wrapper/wrap_cudadrv.cpp b/src/wrapper/wrap_cudadrv.cpp index 3758689b..5d53271d 100644 --- a/src/wrapper/wrap_cudadrv.cpp +++ b/src/wrapper/wrap_cudadrv.cpp @@ -10,6 +10,9 @@ #include "wrap_helpers.hpp" #include +#if NPY_ABI_VERSION < 0x02000000 + #define PyDataType_ELSIZE(descr) ((descr)->elsize) +#endif @@ -573,17 +576,17 @@ namespace std::unique_ptr alloc( new Allocation( - tp_descr->elsize*pycuda::size_from_dims(dims.size(), &dims.front()), + PyDataType_ELSIZE(tp_descr)*pycuda::size_from_dims(dims.size(), &dims.front()), par1) ); - NPY_ORDER order = PyArray_CORDER; + NPY_ORDER order = NPY_CORDER; PyArray_OrderConverter(order_py.ptr(), &order); int ary_flags = 0; - if (order == PyArray_FORTRANORDER) + if (order == NPY_FORTRANORDER) ary_flags |= NPY_FARRAY; - else if (order == PyArray_CORDER) + else if (order == NPY_CORDER) ary_flags |= NPY_CARRAY; else throw pycuda::error("numpy_empty", CUDA_ERROR_INVALID_VALUE, @@ -595,7 +598,7 @@ namespace alloc->data(), ary_flags, /*obj*/NULL)); py::handle<> alloc_py(handle_from_new_ptr(alloc.release())); - PyArray_BASE(result.get()) = alloc_py.get(); + PyArray_SetBaseObject((PyArrayObject*)result.get(), alloc_py.get()); Py_INCREF(alloc_py.get()); return result; @@ -608,13 +611,15 @@ namespace throw pycuda::error("register_host_memory", CUDA_ERROR_INVALID_VALUE, "ary argument is not a numpy array"); - if (!PyArray_ISCONTIGUOUS(ary.ptr())) + if (!PyArray_ISCONTIGUOUS((PyArrayObject*)ary.ptr())) throw pycuda::error("register_host_memory", CUDA_ERROR_INVALID_VALUE, "ary argument is not contiguous"); std::unique_ptr regmem( new registered_host_memory( - PyArray_DATA(ary.ptr()), PyArray_NBYTES(ary.ptr()), flags, ary)); + PyArray_DATA((PyArrayObject*)ary.ptr()), + PyArray_NBYTES((PyArrayObject*)ary.ptr()), + flags, ary)); PyObject *new_array_ptr = PyArray_FromInterface(ary.ptr()); if (new_array_ptr == Py_NotImplemented) @@ -624,7 +629,8 @@ namespace py::handle<> result(new_array_ptr); py::handle<> regmem_py(handle_from_new_ptr(regmem.release())); - PyArray_BASE(result.get()) = regmem_py.get(); + // ValueError: Cannot set the NumPy array 'base' dependency more than once + PyArray_SetBaseObject((PyArrayObject*)result.get(), regmem_py.get()); Py_INCREF(regmem_py.get()); return result; diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index 629a2630..2fa72157 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -1376,7 +1376,7 @@ class TestGPUArray: for i in range(10): red(a_gpu[i], out=max_gpu[i]) - assert np.alltrue(a.max(axis=1) == max_gpu.get()) + assert np.all(a.max(axis=1) == max_gpu.get()) def test_sum_allocator(self): # FIXME -- GitLab From f820b84f31cdbc5ae16ce467607774dd45882f1e Mon Sep 17 00:00:00 2001 From: Vincent Favre-Nicolin Date: Sun, 14 Jul 2024 21:19:17 +0200 Subject: [PATCH 2/8] Update compyte version for numpy 2 --- pycuda/compyte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycuda/compyte b/pycuda/compyte index d4549d4c..f6d87a19 160000 --- a/pycuda/compyte +++ b/pycuda/compyte @@ -1 +1 @@ -Subproject commit d4549d4c711513e2cc098d3f5d4e918eac53ee7a +Subproject commit f6d87a19ec50c86654845fee7fc6876154087ff1 -- GitLab From ed783b3c8ca8da712f06fed872e7fdccdf344e3b Mon Sep 17 00:00:00 2001 From: Vincent Favre-Nicolin Date: Mon, 15 Jul 2024 12:15:18 +0200 Subject: [PATCH 3/8] Make sure array shape and strides are int and not np.intNN types which could overflow when using numpy 2 --- pycuda/gpuarray.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index b4399042..b2d3ac37 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -242,6 +242,10 @@ class GPUArray: # bombs if s is a Python integer s = s.item() + # Make sure shape is made of int and not e.g. np.int32 as these can overflow + # e.g. in __getitem__() when adding the new_offset... + shape = tuple(int(v) for v in shape) + if strides is None: if order == "F": strides = _f_contiguous_strides(dtype.itemsize, shape) @@ -255,7 +259,9 @@ class GPUArray: strides = tuple(strides) - self.shape = tuple(shape) + strides = tuple(int(v) for v in strides) + + self.shape = shape self.dtype = dtype self.strides = strides self.mem_size = self.size = s -- GitLab From bf012c532c49d03b509e29a3585661b1696c472f Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 15 Jul 2024 13:42:22 -0500 Subject: [PATCH 4/8] Fix compyte git submodule URL --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 5ca5d423..14599ac3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,4 +3,4 @@ url = https://github.com/inducer/bpl-subset [submodule "pycuda/compyte"] path = pycuda/compyte - url = https://github.com/inducer/compyte + url = https://github.com/inducer/compyte.git -- GitLab From 0b905ece74d53c301a08e839e966964975274e0d Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 15 Jul 2024 13:43:37 -0500 Subject: [PATCH 5/8] Update compyte --- pycuda/compyte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycuda/compyte b/pycuda/compyte index f6d87a19..955160ac 160000 --- a/pycuda/compyte +++ b/pycuda/compyte @@ -1 +1 @@ -Subproject commit f6d87a19ec50c86654845fee7fc6876154087ff1 +Subproject commit 955160ac2f504dabcd8641471a56146fa1afe35d -- GitLab From 1b760e4b8c5c1d8a8ea59c69708e83c474f0eecc Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 23 Jul 2024 14:44:48 -0500 Subject: [PATCH 6/8] Skip texture tests on CUDA 12+ --- test/test_driver.py | 33 +++++++++++++++++++++++++++++++++ test/test_gpuarray.py | 6 ++++++ 2 files changed, 39 insertions(+) diff --git a/test/test_driver.py b/test/test_driver.py index 9deae3be..be526971 100644 --- a/test/test_driver.py +++ b/test/test_driver.py @@ -181,6 +181,9 @@ class TestDriver: @mark_cuda_test def test_2d_texture(self): + if drv.get_driver_version() // 1000 >= 12: + pytest.skip("texture references were removed in CUDA 12") + mod = SourceModule( """ texture mtx_tex; @@ -208,6 +211,9 @@ class TestDriver: @mark_cuda_test def test_multiple_2d_textures(self): + if drv.get_driver_version() // 1000 >= 12: + pytest.skip("texture references were removed in CUDA 12") + mod = SourceModule( """ texture mtx_tex; @@ -242,6 +248,9 @@ class TestDriver: @mark_cuda_test def test_multichannel_2d_texture(self): + if drv.get_driver_version() // 1000 >= 12: + pytest.skip("texture references were removed in CUDA 12") + mod = SourceModule( """ #define CHANNELS 4 @@ -280,6 +289,9 @@ class TestDriver: @mark_cuda_test def test_multichannel_linear_texture(self): + if drv.get_driver_version() // 1000 >= 12: + pytest.skip("texture references were removed in CUDA 12") + mod = SourceModule( """ #define CHANNELS 4 @@ -315,6 +327,9 @@ class TestDriver: @mark_cuda_test def test_2d_fp_textures(self): + if drv.get_driver_version() // 1000 >= 12: + pytest.skip("texture references were removed in CUDA 12") + orden = "F" npoints = 32 @@ -369,6 +384,9 @@ class TestDriver: @mark_cuda_test def test_2d_fp_textures_layered(self): + if drv.get_driver_version() // 1000 >= 12: + pytest.skip("texture references were removed in CUDA 12") + orden = "F" npoints = 32 @@ -423,6 +441,9 @@ class TestDriver: @mark_cuda_test def test_3d_fp_textures(self): + if drv.get_driver_version() // 1000 >= 12: + pytest.skip("texture references were removed in CUDA 12") + orden = "C" npoints = 32 @@ -477,6 +498,9 @@ class TestDriver: @mark_cuda_test def test_3d_fp_surfaces(self): + if drv.get_driver_version() // 1000 >= 12: + pytest.skip("surface references were removed in CUDA 12") + orden = "C" npoints = 32 @@ -556,6 +580,9 @@ class TestDriver: @mark_cuda_test def test_2d_fp_surfaces(self): + if drv.get_driver_version() // 1000 >= 12: + pytest.skip("surface references were removed in CUDA 12") + orden = "C" npoints = 32 @@ -734,6 +761,9 @@ class TestDriver: @mark_cuda_test def test_3d_texture(self): + if drv.get_driver_version() // 1000 >= 12: + pytest.skip("texture references were removed in CUDA 12") + # adapted from code by Nicolas Pinto w = 2 h = 4 @@ -842,6 +872,9 @@ class TestDriver: @mark_cuda_test def test_fp_textures(self): + if drv.get_driver_version() // 1000 >= 12: + pytest.skip("texture references were removed in CUDA 12") + if drv.Context.get_device().compute_capability() < (1, 3): return diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index 2fa72157..0bc37eb4 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -696,6 +696,9 @@ class TestGPUArray: assert la.norm(a_cpu - a_gpu.get()) == 0, i def test_take(self): + if drv.get_driver_version() // 1000 >= 12: + pytest.skip("texture references were removed in CUDA 12") + idx = gpuarray.arange(0, 10000, 2, dtype=np.uint32) for dtype in [np.float32, np.complex64]: a = gpuarray.arange(0, 600000, dtype=np.uint32).astype(dtype) @@ -1055,6 +1058,9 @@ class TestGPUArray: assert la.norm(min_a_b_gpu.get() - np.minimum(a, b)) == 0 def test_take_put(self): + if drv.get_driver_version() // 1000 >= 12: + pytest.skip("texture references were removed in CUDA 12") + for n in [5, 17, 333]: one_field_size = 8 buf_gpu = gpuarray.zeros(n * one_field_size, dtype=np.float32) -- GitLab From 276ec2a88f5d97bd0418468dc66eac6db7ecec42 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 23 Jul 2024 15:01:12 -0500 Subject: [PATCH 7/8] xfail test_register_host_memory --- test/test_driver.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_driver.py b/test/test_driver.py index be526971..d167aed2 100644 --- a/test/test_driver.py +++ b/test/test_driver.py @@ -948,6 +948,8 @@ class TestDriver: @mark_cuda_test def test_register_host_memory(self): + pytest.xfail("known issue: must fix array creation") + if drv.get_version() < (4,): from py.test import skip -- GitLab From e99dbe3a084b32c44fbfab3ffd1be4c65a4f0e21 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 23 Jul 2024 15:02:02 -0500 Subject: [PATCH 8/8] Ifdef out pycuda helpers header on CUDA 12 --- pycuda/cuda/pycuda-helpers.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pycuda/cuda/pycuda-helpers.hpp b/pycuda/cuda/pycuda-helpers.hpp index 5f25092a..f1f7dec2 100644 --- a/pycuda/cuda/pycuda-helpers.hpp +++ b/pycuda/cuda/pycuda-helpers.hpp @@ -12,6 +12,7 @@ extern "C++" { typedef uint2 fp_tex_cfloat; typedef int4 fp_tex_cdouble; +#if __CUDACC_VER_MAJOR__ < 12 template __device__ pycuda::complex fp_tex1Dfetch(texture tex, int i) { @@ -244,6 +245,7 @@ extern "C++" { PYCUDA_GENERATE_FP_TEX_FUNCS(unsigned short int) PYCUDA_GENERATE_FP_TEX_FUNCS(char) PYCUDA_GENERATE_FP_TEX_FUNCS(unsigned char) +#endif } #endif -- GitLab