From 8a858f1292ec922b0c8e415196404756adb07f78 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 12 Aug 2019 11:20:54 -0500 Subject: [PATCH 1/2] Support for bindless textures (thanks Binu Mathew/Kraenion Labs for the patch) --- pycuda/driver.py | 2 +- src/cpp/cuda.hpp | 303 ++++++++++++++++++++++++++++++++++- src/wrapper/wrap_cudadrv.cpp | 34 +++- test/test_driver.py | 80 +++++++++ 4 files changed, 414 insertions(+), 5 deletions(-) diff --git a/pycuda/driver.py b/pycuda/driver.py index fd042a75..916d0980 100644 --- a/pycuda/driver.py +++ b/pycuda/driver.py @@ -188,7 +188,7 @@ def _add_functionality(): if isinstance(arg, np.number): arg_data.append(arg) format += arg.dtype.char - elif isinstance(arg, (DeviceAllocation, PooledDeviceAllocation)): + elif isinstance(arg, (DeviceAllocation, PooledDeviceAllocation, TextureObject, SurfaceObject)): arg_data.append(int(arg)) format += "P" elif isinstance(arg, ArgumentHandler): diff --git a/src/cpp/cuda.hpp b/src/cpp/cuda.hpp index da60bd57..e7bcdbf5 100644 --- a/src/cpp/cuda.hpp +++ b/src/cpp/cuda.hpp @@ -37,6 +37,7 @@ #include #include #include +#include #if (BOOST_VERSION/100) < 1035 #warning ***************************************************************** @@ -1190,6 +1191,305 @@ namespace pycuda // }}} + // {{{ bindless textures + +#if CUDAPP_CUDA_VERSION >= 6000 + + template + void tuple_to_array(T *dst, py::tuple src_py, unsigned n) + { + pycuda_size_t src_length = py::len(src_py); + if (src_length != n) + throw pycuda::error( + "setter:", CUDA_ERROR_INVALID_VALUE, + "incorrect number of elements in tuple"); + + for (unsigned i = 0; i < src_length; ++i) + { + R value = py::extract(src_py[i]); + dst[i] = (T)(value); + } + } + + template + py::tuple array_to_tuple_3(T *src) + { + return py::make_tuple((R)src[0], (R)src[1], (R)src[2]); + } + + template + py::tuple array_to_tuple_4(T *src) + { + return py::make_tuple((R)src[0], (R)src[1], (R)src[2], (R)src[3]); + } + + // {{{ texture descriptor + + struct WRAP_CUDA_TEXTURE_DESC : public CUDA_TEXTURE_DESC + { + WRAP_CUDA_TEXTURE_DESC() + { + // This class is a standard layout type. CUDA_TEXTURE_DESC + // needs to be memset to 0. When I made it a member + // of this class, wrapping of members such as .fields + // failed. So unorthodox as this might be, deriving + // a class and memset(this) works and is permissible because + // of standard layout. + // It can be verified that this class is standard layout by: + // #include + //printf("traits: %d\n", std::is_standard_layout::value); + // Do not add *any* member variables or virtual functions. + // That will make it not standard layout. + memset(this, 0, sizeof(CUDA_TEXTURE_DESC)); + } + + py::tuple get_address_mode() + { + return array_to_tuple_3(addressMode); + } + + void set_address_mode(py::tuple src_py) + { + tuple_to_array(addressMode, src_py, 3); + } + + py::tuple get_border_color() + { + return array_to_tuple_4(borderColor); + } + + void set_border_color(py::tuple src_py) + { + tuple_to_array(borderColor, src_py, 4); + } + }; + + // }}} + + // {{{ surface object + + class surface_object : public boost::noncopyable + { + private: + PyObject* m_pyobj; + CUsurfObject m_cusurf; + + public: + surface_object(py::object ary) + : m_cusurf(0), m_pyobj(NULL) + { + + m_pyobj = ary.ptr(); + + CUDA_RESOURCE_DESC resdesc; + memset(&resdesc, 0, sizeof(resdesc)); + + py::extract extract_array(ary); + if (!extract_array.check()) + throw pycuda::error("surface_object", CUDA_ERROR_INVALID_VALUE, + "ary argument is not an instance of Array"); + + + const array & pycuda_array = extract_array(); + resdesc.resType = CU_RESOURCE_TYPE_ARRAY; + resdesc.res.array.hArray = pycuda_array.handle(); + CUDAPP_CALL_GUARDED(cuSurfObjectCreate, (&m_cusurf, &resdesc)); + Py_INCREF(m_pyobj); // Prevent backing array from being garbage collected + } + + CUsurfObject to_int() + { + // CUsurfObject is a typedef for unsigned long long + return m_cusurf; + } + + ~surface_object() + { + cuSurfObjectDestroy(m_cusurf); + Py_XDECREF(m_pyobj); + } + + }; + + // }}} + + // {{{ texture object + + class texture_object : public boost::noncopyable + { + private: + PyObject* m_pyobj; + CUtexObject m_cutex; + + public: + texture_object(py::object ary, const CUDA_TEXTURE_DESC & texdesc) + : m_cutex(0), m_pyobj(NULL) + { + // Tried two different constructors earlier. One had signature + // texture_object(const array &, const cudaTextureDesc &texdesc) + // However, with that signature it was not distinguishable from + // this constructor. C++ treated both differently, but boost + // routed calls to this constructor. So we just use one constructor + // and check argument type. + + m_pyobj = ary.ptr(); + + CUDA_RESOURCE_DESC resdesc; + memset(&resdesc, 0, sizeof(resdesc)); + + py::extract extract_array(ary); + if (extract_array.check()) + { + const array & pycuda_array = extract_array(); + resdesc.resType = CU_RESOURCE_TYPE_ARRAY; + resdesc.res.array.hArray = pycuda_array.handle(); + CUDAPP_CALL_GUARDED(cuTexObjectCreate, (&m_cutex, &resdesc, &texdesc, NULL)); + Py_INCREF(m_pyobj); // Prevent backing array from being garbage collected + return; + } + + if (!PyArray_Check(ary.ptr())) + throw pycuda::error("texture_object", CUDA_ERROR_INVALID_VALUE, + "ary argument is not a numpy array"); + + + + PyArrayObject* np_a = (PyArrayObject*)(m_pyobj); + void * data = (float*)PyArray_DATA(np_a); + unsigned memtype; + CUDAPP_CALL_GUARDED(cuPointerGetAttribute, (&memtype, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)data)); + if(memtype != CU_MEMORYTYPE_DEVICE) + throw pycuda::error("texture_object", CUDA_ERROR_INVALID_VALUE, + "ary is not device memory"); + + int ndim = PyArray_NDIM(np_a); + int item_type = PyArray_TYPE(np_a); + int item_size = PyArray_ITEMSIZE(np_a); + npy_intp* shape_v = PyArray_SHAPE(np_a); + npy_intp* stride_v = PyArray_STRIDES(np_a); + + // This used to be a check for !PyArray_ISCONTIGUOUS(ary.ptr()) + // However, we use views into padded arrays to create the equivalent + // of pitch2d memory from managed memory. All views count as non-contiguous. + // So we do a different check here just to see if the last dimension stride is + // greater than the itemsize. Also as of Jan 2019, textures do not allow + // double precision. + if ((stride_v[ndim-1] != item_size) || (item_size > sizeof(float))) + throw pycuda::error("texture_object", CUDA_ERROR_INVALID_VALUE, + "ary argument is not contiguous/texture incompatible"); + + int nchan, ndim_effective; + if (ndim == 1) + { + nchan = 1; + ndim_effective = 1; + } + else + { + nchan = shape_v[ndim - 1]; + ndim_effective = ndim - 1; + } + + if ((nchan < 1) || (nchan > 4)) + throw pycuda::error("texture_object", CUDA_ERROR_INVALID_VALUE, + "number of texture channels should be in {1,2,3,4}"); + if (!((ndim_effective == 1) || (ndim_effective == 2))) + throw pycuda::error( + "texture_object", CUDA_ERROR_INVALID_VALUE, + "textures are limited to 1 or 2 dimensions of texels"); + + + if (ndim_effective == 1) + { + resdesc.resType = CU_RESOURCE_TYPE_LINEAR; + resdesc.res.linear.format = get_cuarray_format(item_type); + resdesc.res.linear.devPtr = (CUdeviceptr)data; + resdesc.res.linear.numChannels = nchan; + resdesc.res.linear.sizeInBytes = PyArray_NBYTES(np_a); + } + else + { + resdesc.resType = CU_RESOURCE_TYPE_PITCH2D; + resdesc.res.pitch2D.format = get_cuarray_format(item_type); + resdesc.res.pitch2D.devPtr = (CUdeviceptr)data; + resdesc.res.pitch2D.height = shape_v[0]; + if(nchan == 3) + { + // CUDA does not support 3 channel images even though RGB/BGR + // is a common case. For convolutional kernels we can still use + // the 3channel image without copying, but we need to consider + // it a 3x wide image of 1 channel. + resdesc.res.pitch2D.numChannels = 1; + resdesc.res.pitch2D.width = shape_v[1] * 3; + } + else + { + resdesc.res.pitch2D.numChannels = nchan; + resdesc.res.pitch2D.width = shape_v[1]; + } + resdesc.res.pitch2D.pitchInBytes = stride_v[0]; + } + + CUDAPP_CALL_GUARDED(cuTexObjectCreate, (&m_cutex, &resdesc, &texdesc, NULL)); + Py_INCREF(m_pyobj); // Prevent backing array from being garbage collected + } + + + CUarray_format get_cuarray_format(int type) + { + switch (type) + { + case NPY_INT8: + return CU_AD_FORMAT_SIGNED_INT8; + + case NPY_UINT8: + return CU_AD_FORMAT_UNSIGNED_INT8; + + case NPY_INT16: + return CU_AD_FORMAT_SIGNED_INT16; + + case NPY_UINT16: + return CU_AD_FORMAT_UNSIGNED_INT16; + + case NPY_INT32: + return CU_AD_FORMAT_SIGNED_INT32; + + case NPY_UINT32: + return CU_AD_FORMAT_UNSIGNED_INT32; + + case NPY_FLOAT16: + return CU_AD_FORMAT_HALF; + + case NPY_FLOAT32: + return CU_AD_FORMAT_FLOAT; + + default: + throw pycuda::error("texture_object", CUDA_ERROR_INVALID_VALUE, + "Unsupported numpy type"); + break; + } + } + + CUtexObject to_int() + { + // CUtexObject is a typedef for unsigned long long + return m_cutex; + } + + ~texture_object() + { + cuTexObjectDestroy(m_cutex); + Py_XDECREF(m_pyobj); + } + + }; + + // }}} + +#endif // CUDAPP_CUDA_VERSION >= 6000 + + // }}} + // {{{ surface reference #if CUDAPP_CUDA_VERSION >= 3010 class module; @@ -2194,9 +2494,8 @@ namespace pycuda } #endif // }}} -} - +} #endif diff --git a/src/wrapper/wrap_cudadrv.cpp b/src/wrapper/wrap_cudadrv.cpp index dfa3d1cd..07c9b0b5 100644 --- a/src/wrapper/wrap_cudadrv.cpp +++ b/src/wrapper/wrap_cudadrv.cpp @@ -260,7 +260,7 @@ namespace py_buffer_wrapper buf_wrapper; buf_wrapper.get(dest.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE); - CUDAPP_CALL_GUARDED_THREADED(cuMemcpyAtoH, + CUDAPP_CALL_GUARDED_THREADED(cuMemcpyAtoH, (buf_wrapper.m_buf.buf, ary.handle(), index, buf_wrapper.m_buf.len)); } @@ -1348,7 +1348,7 @@ BOOST_PYTHON_MODULE(_driver) wrp .DEF_SIMPLE_METHOD(get_device_pointer) - .def("attach", &cl::attach, + .def("attach", &cl::attach, (py::arg("mem_flags"), py::arg("stream")=py::object())) ; } @@ -1633,6 +1633,35 @@ BOOST_PYTHON_MODULE(_driver) } // }}} +#if CUDAPP_CUDA_VERSION >= 6000 + // {{{ texture descriptor + { + typedef WRAP_CUDA_TEXTURE_DESC cl; + py::class_("TextureDescriptor") + .add_property("address_mode", &cl::get_address_mode, &cl::set_address_mode) + .def_readwrite("filter_mode", &cl::filterMode) + .def_readwrite("flags", &cl::flags) + .add_property("border_color", &cl::get_border_color, &cl::set_border_color); + } + // }}} + + // {{{ texture object + { + typedef texture_object cl; + py::class_("TextureObject", py::init()) + .def("__int__", &cl::to_int); + } + // }}} + + // {{{ surface object + { + typedef surface_object cl; + py::class_("SurfaceObject", py::init()) + .def("__int__", &cl::to_int); + } + // }}} +#endif + // {{{ surface reference #if CUDAPP_CUDA_VERSION >= 3010 { @@ -1669,6 +1698,7 @@ BOOST_PYTHON_MODULE(_driver) #ifdef HAVE_CURAND pycuda_expose_curand(); #endif + } // vim: foldmethod=marker diff --git a/test/test_driver.py b/test/test_driver.py index b440eff0..8425e7e3 100644 --- a/test/test_driver.py +++ b/test/test_driver.py @@ -947,6 +947,86 @@ class TestDriver: test_kernel = mod.get_function('test_kernel') test_kernel(grid=(2, 1), block=(1, 1, 1)) + @mark_cuda_test + def test_bindless_textures(self): + if drv.get_version() < (6, 0, 0): + pytest.skip("bindless textures not supported on CUDA < 6") + + mod = SourceModule(""" + __global__ void gpu_add(float *a, float *b, float *res) + { + int idx = threadIdx.x + threadIdx.y*16; + res[idx] = a[idx] + b[idx]; + } + + // texture object is a kernel argument + __global__ void tex_kernel1( + cudaTextureObject_t tex, cudaSurfaceObject_t surf) + { + int i = blockIdx.x *blockDim.x + threadIdx.x; + float x = tex1Dfetch(tex, i); + // do some work using x ... + } + """) + + print('TextureObject exists. Members:', dir(drv.TextureObject)) + + tdesc = drv.TextureDescriptor() + print('TextureDescriptor exists. Members:', dir(tdesc)) + + print('Setting address mode') + print('before', tdesc.address_mode) + tdesc.address_mode = (1, 2, 3) + print('after', tdesc.address_mode) + + adesc = drv.ArrayDescriptor() + adesc.width = 1024 + adesc.height = 768 + adesc.format = drv.array_format.UNSIGNED_INT8 + adesc.num_channels = 4 + sarray = drv.Array(adesc) + print('sarray made') + surf1 = drv.SurfaceObject(sarray) + print('tex1 made') + + a_um_a = drv.managed_empty( + (16, 16, 4), np.float32, mem_flags=drv.mem_attach_flags.GLOBAL) + a_um_a[:] = np.random.randn(*a_um_a.shape) + + import sys + print('pre ref_count', sys.getrefcount(a_um_a)) + tex2 = drv.TextureObject(a_um_a, tdesc) + print('post ref_count', sys.getrefcount(a_um_a)) + print('tex2 made') + + print('post ref_count', sys.getrefcount(a_um_a)) + print('{0:016x}'.format(id(tex2)), '{0:016x}'.format(int(tex2))) + + func = mod.get_function("tex_kernel1") + func(a_um_a, tex2, surf1, block=(16, 16, 1)) + drv.Context.synchronize() + + # Uniform memory version of hello world. + a_um_a = drv.managed_empty( + (16, 16), np.float32, mem_flags=drv.mem_attach_flags.GLOBAL) + a_um_a[:] = np.random.randn(*a_um_a.shape) + + b_um_a = drv.managed_empty( + (16, 16), np.float32, mem_flags=drv.mem_attach_flags.GLOBAL) + b_um_a[:] = np.random.randn(*b_um_a.shape) + + res_um_a = drv.managed_empty( + (16, 16), np.float32, mem_flags=drv.mem_attach_flags.GLOBAL) + + func = mod.get_function("gpu_add") + func(a_um_a, b_um_a, res_um_a, block=(16, 16, 1)) + drv.Context.synchronize() + + res_um_a = res_um_a + print(res_um_a) + + assert(np.allclose(res_um_a, a_um_a+b_um_a)) + def test_import_pyopencl_before_pycuda(): try: -- GitLab From ac18f220b7097fb294c84ec286ab0d80f3f3c5ad Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 12 Aug 2019 11:43:54 -0500 Subject: [PATCH 2/2] Improve bindless texture code --- src/cpp/cuda.hpp | 137 ++++++++++++++--------------------- src/wrapper/wrap_cudadrv.cpp | 12 ++- 2 files changed, 64 insertions(+), 85 deletions(-) diff --git a/src/cpp/cuda.hpp b/src/cpp/cuda.hpp index e7bcdbf5..d04111c7 100644 --- a/src/cpp/cuda.hpp +++ b/src/cpp/cuda.hpp @@ -1271,30 +1271,19 @@ namespace pycuda class surface_object : public boost::noncopyable { private: - PyObject* m_pyobj; + boost::shared_ptr m_array; CUsurfObject m_cusurf; public: - surface_object(py::object ary) - : m_cusurf(0), m_pyobj(NULL) + surface_object(boost::shared_ptr ary) + : m_array(ary), m_cusurf(0) { - - m_pyobj = ary.ptr(); - CUDA_RESOURCE_DESC resdesc; memset(&resdesc, 0, sizeof(resdesc)); - py::extract extract_array(ary); - if (!extract_array.check()) - throw pycuda::error("surface_object", CUDA_ERROR_INVALID_VALUE, - "ary argument is not an instance of Array"); - - - const array & pycuda_array = extract_array(); resdesc.resType = CU_RESOURCE_TYPE_ARRAY; - resdesc.res.array.hArray = pycuda_array.handle(); + resdesc.res.array.hArray = m_array->handle(); CUDAPP_CALL_GUARDED(cuSurfObjectCreate, (&m_cusurf, &resdesc)); - Py_INCREF(m_pyobj); // Prevent backing array from being garbage collected } CUsurfObject to_int() @@ -1305,8 +1294,7 @@ namespace pycuda ~surface_object() { - cuSurfObjectDestroy(m_cusurf); - Py_XDECREF(m_pyobj); + CUDAPP_CALL_GUARDED_CLEANUP(cuSurfObjectDestroy, (m_cusurf)); } }; @@ -1318,49 +1306,38 @@ namespace pycuda class texture_object : public boost::noncopyable { private: - PyObject* m_pyobj; + // either a numpy array or a CUDA array + py::object m_array; CUtexObject m_cutex; public: texture_object(py::object ary, const CUDA_TEXTURE_DESC & texdesc) - : m_cutex(0), m_pyobj(NULL) + : m_array(ary), m_cutex(0) { - // Tried two different constructors earlier. One had signature - // texture_object(const array &, const cudaTextureDesc &texdesc) - // However, with that signature it was not distinguishable from - // this constructor. C++ treated both differently, but boost - // routed calls to this constructor. So we just use one constructor - // and check argument type. - - m_pyobj = ary.ptr(); - CUDA_RESOURCE_DESC resdesc; memset(&resdesc, 0, sizeof(resdesc)); py::extract extract_array(ary); if (extract_array.check()) - { - const array & pycuda_array = extract_array(); - resdesc.resType = CU_RESOURCE_TYPE_ARRAY; - resdesc.res.array.hArray = pycuda_array.handle(); - CUDAPP_CALL_GUARDED(cuTexObjectCreate, (&m_cutex, &resdesc, &texdesc, NULL)); - Py_INCREF(m_pyobj); // Prevent backing array from being garbage collected - return; - } + { + const array & pycuda_array = extract_array(); + resdesc.resType = CU_RESOURCE_TYPE_ARRAY; + resdesc.res.array.hArray = pycuda_array.handle(); + CUDAPP_CALL_GUARDED(cuTexObjectCreate, (&m_cutex, &resdesc, &texdesc, NULL)); + return; + } - if (!PyArray_Check(ary.ptr())) + if (!PyArray_Check(m_array.ptr())) throw pycuda::error("texture_object", CUDA_ERROR_INVALID_VALUE, - "ary argument is not a numpy array"); + "ary argument is not a numpy array"); - - - PyArrayObject* np_a = (PyArrayObject*)(m_pyobj); + PyArrayObject* np_a = (PyArrayObject*)(m_array.ptr()); void * data = (float*)PyArray_DATA(np_a); unsigned memtype; CUDAPP_CALL_GUARDED(cuPointerGetAttribute, (&memtype, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)data)); if(memtype != CU_MEMORYTYPE_DEVICE) throw pycuda::error("texture_object", CUDA_ERROR_INVALID_VALUE, - "ary is not device memory"); + "ary is not device memory"); int ndim = PyArray_NDIM(np_a); int item_type = PyArray_TYPE(np_a); @@ -1376,23 +1353,23 @@ namespace pycuda // double precision. if ((stride_v[ndim-1] != item_size) || (item_size > sizeof(float))) throw pycuda::error("texture_object", CUDA_ERROR_INVALID_VALUE, - "ary argument is not contiguous/texture incompatible"); + "ary argument is not contiguous/texture incompatible"); int nchan, ndim_effective; if (ndim == 1) - { - nchan = 1; - ndim_effective = 1; - } + { + nchan = 1; + ndim_effective = 1; + } else - { - nchan = shape_v[ndim - 1]; - ndim_effective = ndim - 1; - } + { + nchan = shape_v[ndim - 1]; + ndim_effective = ndim - 1; + } if ((nchan < 1) || (nchan > 4)) throw pycuda::error("texture_object", CUDA_ERROR_INVALID_VALUE, - "number of texture channels should be in {1,2,3,4}"); + "number of texture channels should be in {1,2,3,4}"); if (!((ndim_effective == 1) || (ndim_effective == 2))) throw pycuda::error( "texture_object", CUDA_ERROR_INVALID_VALUE, @@ -1400,38 +1377,37 @@ namespace pycuda if (ndim_effective == 1) + { + resdesc.resType = CU_RESOURCE_TYPE_LINEAR; + resdesc.res.linear.format = get_cuarray_format(item_type); + resdesc.res.linear.devPtr = (CUdeviceptr)data; + resdesc.res.linear.numChannels = nchan; + resdesc.res.linear.sizeInBytes = PyArray_NBYTES(np_a); + } + else + { + resdesc.resType = CU_RESOURCE_TYPE_PITCH2D; + resdesc.res.pitch2D.format = get_cuarray_format(item_type); + resdesc.res.pitch2D.devPtr = (CUdeviceptr)data; + resdesc.res.pitch2D.height = shape_v[0]; + if(nchan == 3) { - resdesc.resType = CU_RESOURCE_TYPE_LINEAR; - resdesc.res.linear.format = get_cuarray_format(item_type); - resdesc.res.linear.devPtr = (CUdeviceptr)data; - resdesc.res.linear.numChannels = nchan; - resdesc.res.linear.sizeInBytes = PyArray_NBYTES(np_a); + // CUDA does not support 3 channel images even though RGB/BGR + // is a common case. For convolutional kernels we can still use + // the 3channel image without copying, but we need to consider + // it a 3x wide image of 1 channel. + resdesc.res.pitch2D.numChannels = 1; + resdesc.res.pitch2D.width = shape_v[1] * 3; } - else + else { - resdesc.resType = CU_RESOURCE_TYPE_PITCH2D; - resdesc.res.pitch2D.format = get_cuarray_format(item_type); - resdesc.res.pitch2D.devPtr = (CUdeviceptr)data; - resdesc.res.pitch2D.height = shape_v[0]; - if(nchan == 3) - { - // CUDA does not support 3 channel images even though RGB/BGR - // is a common case. For convolutional kernels we can still use - // the 3channel image without copying, but we need to consider - // it a 3x wide image of 1 channel. - resdesc.res.pitch2D.numChannels = 1; - resdesc.res.pitch2D.width = shape_v[1] * 3; - } - else - { - resdesc.res.pitch2D.numChannels = nchan; - resdesc.res.pitch2D.width = shape_v[1]; - } - resdesc.res.pitch2D.pitchInBytes = stride_v[0]; + resdesc.res.pitch2D.numChannels = nchan; + resdesc.res.pitch2D.width = shape_v[1]; } + resdesc.res.pitch2D.pitchInBytes = stride_v[0]; + } CUDAPP_CALL_GUARDED(cuTexObjectCreate, (&m_cutex, &resdesc, &texdesc, NULL)); - Py_INCREF(m_pyobj); // Prevent backing array from being garbage collected } @@ -1472,14 +1448,12 @@ namespace pycuda CUtexObject to_int() { - // CUtexObject is a typedef for unsigned long long return m_cutex; } ~texture_object() { - cuTexObjectDestroy(m_cutex); - Py_XDECREF(m_pyobj); + CUDAPP_CALL_GUARDED_CLEANUP(cuTexObjectDestroy, (m_cutex)); } }; @@ -1499,7 +1473,6 @@ namespace pycuda private: CUsurfref m_surfref; - // life support for array and module boost::shared_ptr m_array; boost::shared_ptr m_module; diff --git a/src/wrapper/wrap_cudadrv.cpp b/src/wrapper/wrap_cudadrv.cpp index 07c9b0b5..39467d94 100644 --- a/src/wrapper/wrap_cudadrv.cpp +++ b/src/wrapper/wrap_cudadrv.cpp @@ -1635,6 +1635,7 @@ BOOST_PYTHON_MODULE(_driver) #if CUDAPP_CUDA_VERSION >= 6000 // {{{ texture descriptor + { typedef WRAP_CUDA_TEXTURE_DESC cl; py::class_("TextureDescriptor") @@ -1643,23 +1644,28 @@ BOOST_PYTHON_MODULE(_driver) .def_readwrite("flags", &cl::flags) .add_property("border_color", &cl::get_border_color, &cl::set_border_color); } + // }}} // {{{ texture object + { typedef texture_object cl; py::class_("TextureObject", py::init()) .def("__int__", &cl::to_int); } + // }}} - // {{{ surface object + // {{{ surface object + { typedef surface_object cl; - py::class_("SurfaceObject", py::init()) + py::class_("SurfaceObject", py::init >()) .def("__int__", &cl::to_int); } - // }}} + + // }}} #endif // {{{ surface reference -- GitLab