Newer
Older
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
CUDA_ARRAY3D_DESCRIPTOR get_descriptor_3d()
{
CUDA_ARRAY3D_DESCRIPTOR result;
CUDAPP_CALL_GUARDED(cuArray3DGetDescriptor, (&result, m_array));
return result;
}
#endif
CUarray handle() const
{ return m_array; }
};
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
class module;
class texture_reference : public boost::noncopyable
{
private:
CUtexref m_texref;
bool m_managed;
// life support for array and module
boost::shared_ptr<array> m_array;
boost::shared_ptr<module> m_module;
public:
texture_reference()
: m_managed(true)
{ CUDAPP_CALL_GUARDED(cuTexRefCreate, (&m_texref)); }
texture_reference(CUtexref tr, bool managed)
: m_texref(tr), m_managed(managed)
{ }
~texture_reference()
if (m_managed)
{
CUDAPP_CALL_GUARDED_CLEANUP(cuTexRefDestroy, (m_texref));
}
}
void set_module(boost::shared_ptr<module> mod)
{ m_module = mod; }
CUtexref handle() const
{ return m_texref; }
void set_array(boost::shared_ptr<array> ary)
{
CUDAPP_CALL_GUARDED(cuTexRefSetArray, (m_texref,
ary->handle(), CU_TRSA_OVERRIDE_FORMAT));
m_array = ary;
}
pycuda_size_t set_address(CUdeviceptr dptr, unsigned int bytes, bool allow_offset=false)
CUDAPP_CALL_GUARDED(cuTexRefSetAddress, (&byte_offset,
if (!allow_offset && byte_offset != 0)
throw pycuda::error("texture_reference::set_address", CUDA_ERROR_INVALID_VALUE,
"texture binding resulted in offset, but allow_offset was false");
m_array.reset();
return byte_offset;
}
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2020
const CUDA_ARRAY_DESCRIPTOR &descr, unsigned int pitch)
{
CUDAPP_CALL_GUARDED(cuTexRefSetAddress2D, (m_texref, &descr, dptr, pitch));
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
void set_format(CUarray_format fmt, int num_packed_components)
{ CUDAPP_CALL_GUARDED(cuTexRefSetFormat, (m_texref, fmt, num_packed_components)); }
void set_address_mode(int dim, CUaddress_mode am)
{ CUDAPP_CALL_GUARDED(cuTexRefSetAddressMode, (m_texref, dim, am)); }
void set_filter_mode(CUfilter_mode fm)
{ CUDAPP_CALL_GUARDED(cuTexRefSetFilterMode, (m_texref, fm)); }
void set_flags(unsigned int flags)
{ CUDAPP_CALL_GUARDED(cuTexRefSetFlags, (m_texref, flags)); }
CUdeviceptr get_address()
{
CUdeviceptr result;
CUDAPP_CALL_GUARDED(cuTexRefGetAddress, (&result, m_texref));
return result;
}
array *get_array()
{
CUarray result;
CUDAPP_CALL_GUARDED(cuTexRefGetArray, (&result, m_texref));
return new array(result, false);
}
CUaddress_mode get_address_mode(int dim)
{
CUaddress_mode result;
CUDAPP_CALL_GUARDED(cuTexRefGetAddressMode, (&result, m_texref, dim));
return result;
}
CUfilter_mode get_filter_mode()
{
CUfilter_mode result;
CUDAPP_CALL_GUARDED(cuTexRefGetFilterMode, (&result, m_texref));
return result;
}
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
py::tuple get_format()
{
CUarray_format fmt;
int num_channels;
CUDAPP_CALL_GUARDED(cuTexRefGetFormat, (&fmt, &num_channels, m_texref));
return py::make_tuple(fmt, num_channels);
}
#endif
unsigned int get_flags()
{
unsigned int result;
CUDAPP_CALL_GUARDED(cuTexRefGetFlags, (&result, m_texref));
return result;
}
};
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3010
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
class module;
class surface_reference : public boost::noncopyable
{
private:
CUsurfref m_surfref;
// life support for array and module
boost::shared_ptr<array> m_array;
boost::shared_ptr<module> m_module;
public:
surface_reference(CUsurfref sr)
: m_surfref(sr)
{ }
void set_module(boost::shared_ptr<module> mod)
{ m_module = mod; }
CUsurfref handle() const
{ return m_surfref; }
void set_array(boost::shared_ptr<array> ary, unsigned int flags)
{
CUDAPP_CALL_GUARDED(cuSurfRefSetArray, (m_surfref, ary->handle(), flags));
m_array = ary;
}
array *get_array()
{
CUarray result;
CUDAPP_CALL_GUARDED(cuSurfRefGetArray, (&result, m_surfref));
return new array(result, false);
}
};
#endif
class function;
class module : public boost::noncopyable, public context_dependent
{
private:
CUmodule m_module;
public:
module(CUmodule mod)
{ }
~module()
{
try
{
scoped_context_activation ca(get_context());
CUDAPP_CALL_GUARDED_CLEANUP(cuModuleUnload, (m_module));
}
CUDAPP_CATCH_CLEANUP_ON_DEAD_CONTEXT(module);
CUmodule handle() const
{ return m_module; }
function get_function(const char *name);
py::tuple get_global(const char *name)
{
CUdeviceptr devptr;
CUDAPP_CALL_GUARDED(cuModuleGetGlobal, (&devptr, &bytes, m_module, name));
return py::make_tuple(devptr, bytes);
}
};
module *module_from_file(const char *filename)
{
CUmodule mod;
CUDAPP_CALL_GUARDED(cuModuleLoad, (&mod, filename));
return new module(mod);
}
texture_reference *module_get_texref(
boost::shared_ptr<module> mod, const char *name)
{
CUtexref tr;
CUDAPP_CALL_GUARDED(cuModuleGetTexRef, (&tr, mod->handle(), name));
std::auto_ptr<texture_reference> result(
new texture_reference(tr, false));
result->set_module(mod);
return result.release();
}
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3010
inline
surface_reference *module_get_surfref(
boost::shared_ptr<module> mod, const char *name)
{
CUsurfref sr;
CUDAPP_CALL_GUARDED(cuModuleGetSurfRef, (&sr, mod->handle(), name));
std::auto_ptr<surface_reference> result(
new surface_reference(sr));
result->set_module(mod);
return result.release();
}
#endif
class function
{
private:
CUfunction m_function;
function(CUfunction func, std::string const &sym)
: m_function(func), m_symbol(sym)
{ }
void set_block_shape(int x, int y, int z)
CUDAPP_CALL_GUARDED_WITH_TRACE_INFO(
cuFuncSetBlockShape, (m_function, x, y, z), m_symbol);
void set_shared_size(unsigned int bytes)
CUDAPP_CALL_GUARDED_WITH_TRACE_INFO(
cuFuncSetSharedSize, (m_function, bytes), m_symbol);
void param_set_size(unsigned int bytes)
CUDAPP_CALL_GUARDED_WITH_TRACE_INFO(
cuParamSetSize, (m_function, bytes), m_symbol);
void param_set(int offset, unsigned int value)
CUDAPP_CALL_GUARDED_WITH_TRACE_INFO(
cuParamSeti, (m_function, offset, value), m_symbol);
void param_set(int offset, float value)
CUDAPP_CALL_GUARDED_WITH_TRACE_INFO(
cuParamSetf, (m_function, offset, value), m_symbol);
void param_setv(int offset, void *buf, size_t len)
// maybe the unsigned int will change, it does not seem right
CUDAPP_CALL_GUARDED_WITH_TRACE_INFO(
cuParamSetv, (m_function, offset, buf, (unsigned int) len), m_symbol);
}
void param_set_texref(const texture_reference &tr)
{
CUDAPP_CALL_GUARDED_WITH_TRACE_INFO(cuParamSetTexRef, (m_function,
CU_PARAM_TR_DEFAULT, tr.handle()), m_symbol);
}
void launch()
CUDAPP_CALL_GUARDED_THREADED_WITH_TRACE_INFO(
void launch_grid(int grid_width, int grid_height)
CUDAPP_CALL_GUARDED_THREADED_WITH_TRACE_INFO(
cuLaunchGrid, (m_function, grid_width, grid_height), m_symbol);
void launch_grid_async(int grid_width, int grid_height, const stream &s)
CUDAPP_CALL_GUARDED_THREADED_WITH_TRACE_INFO(
cuLaunchGridAsync, (m_function, grid_width, grid_height, s.handle()),
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2020
int get_attribute(CUfunction_attribute attr) const
{
int result;
CUDAPP_CALL_GUARDED_WITH_TRACE_INFO(
cuFuncGetAttribute, (&result, attr, m_function), m_symbol);
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3000 && defined(CUDAPP_POST_30_BETA)
void set_cache_config(CUfunc_cache fc)
{
CUDAPP_CALL_GUARDED_WITH_TRACE_INFO(
cuFuncSetCacheConfig, (m_function, fc), m_symbol);
}
#endif
#if CUDAPP_CUDA_VERSION >= 4000
void launch_kernel(py::tuple grid_dim_py, py::tuple block_dim_py,
py::object parameter_buffer,
unsigned shared_mem_bytes, py::object stream_py)
{
const unsigned axis_count = 3;
unsigned grid_dim[axis_count];
unsigned block_dim[axis_count];
for (unsigned i = 0; i < axis_count; ++i)
{
grid_dim[i] = 1;
block_dim[i] = 1;
}
pycuda_size_t gd_length = py::len(grid_dim_py);
if (gd_length > axis_count)
throw pycuda::error("function::launch_kernel", CUDA_ERROR_INVALID_HANDLE,
"too many grid dimensions in kernel launch");
for (unsigned i = 0; i < gd_length; ++i)
grid_dim[i] = py::extract<unsigned>(grid_dim_py[i]);
pycuda_size_t bd_length = py::len(block_dim_py);
if (bd_length > axis_count)
throw pycuda::error("function::launch_kernel", CUDA_ERROR_INVALID_HANDLE,
"too many block dimensions in kernel launch");
for (unsigned i = 0; i < bd_length; ++i)
block_dim[i] = py::extract<unsigned>(block_dim_py[i]);
PYCUDA_PARSE_STREAM_PY;
const void *par_buf;
PYCUDA_BUFFER_SIZE_T py_par_len;
if (PyObject_AsReadBuffer(parameter_buffer.ptr(), &par_buf, &py_par_len))
throw py::error_already_set();
size_t par_len = py_par_len;
void *config[] = {
CU_LAUNCH_PARAM_BUFFER_POINTER, const_cast<void *>(par_buf),
CU_LAUNCH_PARAM_BUFFER_SIZE, &par_len,
CU_LAUNCH_PARAM_END
};
CUDAPP_CALL_GUARDED(
cuLaunchKernel, (m_function,
grid_dim[0], grid_dim[1], grid_dim[2],
block_dim[0], block_dim[1], block_dim[2],
shared_mem_bytes, s_handle, 0, config
));
}
#if CUDAPP_CUDA_VERSION >= 4020
void set_shared_config(CUsharedconfig config)
{
CUDAPP_CALL_GUARDED_WITH_TRACE_INFO(
cuFuncSetSharedMemConfig, (m_function, config), m_symbol);
}
#endif
function module::get_function(const char *name)
{
CUfunction func;
CUDAPP_CALL_GUARDED(cuModuleGetFunction, (&func, m_module, name));
CUDAPP_CALL_GUARDED(cuMemGetInfo, (&free, &total));
return py::make_tuple(free, total);
}
CUdeviceptr mem_alloc(size_t bytes)
{
CUdeviceptr devptr;
CUDAPP_CALL_GUARDED(cuMemAlloc, (&devptr, bytes));
return devptr;
}
CUDAPP_CALL_GUARDED_CLEANUP(cuMemFree, (devptr));
// A class the user can override to make device_allocation-
// workalikes.
class pointer_holder_base
{
public:
virtual ~pointer_holder_base() { }
virtual CUdeviceptr get_pointer() = 0;
Andreas Klöckner
committed
operator CUdeviceptr()
{ return get_pointer(); }
Andreas Klöckner
committed
py::object as_buffer(size_t size, size_t offset)
{
return py::object(
py::handle<>(
PyBuffer_FromMemory((void *) (get_pointer() + size), size)));
}
class device_allocation : public boost::noncopyable, public context_dependent
{
private:
CUdeviceptr m_devptr;
public:
device_allocation(CUdeviceptr devptr)
try
{
scoped_context_activation ca(get_context());
mem_free(m_devptr);
}
CUDAPP_CATCH_CLEANUP_ON_DEAD_CONTEXT(device_allocation);
release_context();
throw pycuda::error("device_allocation::free", CUDA_ERROR_INVALID_HANDLE);
~device_allocation()
{
free();
{ return m_devptr; }
Andreas Klöckner
committed
py::object as_buffer(size_t size, size_t offset)
{
return py::object(
py::handle<>(
PyBuffer_FromMemory((void *) (m_devptr + size), size)));
}
inline Py_ssize_t mem_alloc_pitch(
std::auto_ptr<device_allocation> &da,
unsigned int width, unsigned int height, unsigned int access_size)
{
CUdeviceptr devptr;
CUDAPP_CALL_GUARDED(cuMemAllocPitch, (&devptr, &pitch, width, height, access_size));
da = std::auto_ptr<device_allocation>(new device_allocation(devptr));
return pitch;
py::tuple mem_get_address_range(CUdeviceptr ptr)
{
CUdeviceptr base;
CUDAPP_CALL_GUARDED(cuMemGetAddressRange, (&base, &size, ptr));
return py::make_tuple(base, size);
}
void memcpy_dtoa(array const &ary, unsigned int index, CUdeviceptr src, unsigned int len)
{ CUDAPP_CALL_GUARDED_THREADED(cuMemcpyDtoA, (ary.handle(), index, src, len)); }
void memcpy_atod(CUdeviceptr dst, array const &ary, unsigned int index, unsigned int len)
{ CUDAPP_CALL_GUARDED_THREADED(cuMemcpyAtoD, (dst, ary.handle(), index, len)); }
void memcpy_atoa(
array const &dst, unsigned int dst_index,
array const &src, unsigned int src_index,
unsigned int len)
{ CUDAPP_CALL_GUARDED_THREADED(cuMemcpyAtoA, (dst.handle(), dst_index, src.handle(), src_index, len)); }
// {{{ ipc_mem_handle
#if CUDAPP_CUDA_VERSION >= 4010 && PY_VERSION_HEX >= 0x02060000
class ipc_mem_handle : public boost::noncopyable, public context_dependent
{
private:
bool m_valid;
protected:
CUdeviceptr m_devptr;
public:
ipc_mem_handle(py::object obj, CUipcMem_flags flags=CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS)
: m_valid(true)
{
if (!PyByteArray_Check(obj.ptr()))
throw pycuda::error("event_from_ipc_handle", CUDA_ERROR_INVALID_VALUE,
"argument is not a bytes array");
CUipcMemHandle handle;
if (PyByteArray_GET_SIZE(obj.ptr()) != sizeof(handle))
throw pycuda::error("event_from_ipc_handle", CUDA_ERROR_INVALID_VALUE,
"handle has the wrong size");
memcpy(&handle, PyByteArray_AS_STRING(obj.ptr()), sizeof(handle));
CUDAPP_CALL_GUARDED(cuIpcOpenMemHandle, (&m_devptr, handle, flags));
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
}
void close()
{
if (m_valid)
{
try
{
scoped_context_activation ca(get_context());
CUDAPP_CALL_GUARDED_CLEANUP(cuIpcCloseMemHandle, (m_devptr));
mem_free(m_devptr);
}
CUDAPP_CATCH_CLEANUP_ON_DEAD_CONTEXT(ipc_mem_handle);
release_context();
m_valid = false;
}
else
throw pycuda::error("ipc_mem_handle::close", CUDA_ERROR_INVALID_HANDLE);
}
~ipc_mem_handle()
{
if (m_valid)
close();
}
operator CUdeviceptr() const
{ return m_devptr; }
};
inline
py::object mem_get_ipc_handle(CUdeviceptr devptr)
{
CUipcMemHandle handle;
CUDAPP_CALL_GUARDED(cuIpcGetMemHandle, (&handle, devptr));
return py::object(py::handle<>(PyByteArray_FromStringAndSize(
reinterpret_cast<const char *>(&handle),
sizeof(handle))));
}
#endif
// }}}
#define MEMCPY_SETTERS \
void set_src_host(py::object buf_py) \
{ \
srcMemoryType = CU_MEMORYTYPE_HOST; \
PYCUDA_BUFFER_SIZE_T len; \
if (PyObject_AsReadBuffer(buf_py.ptr(), &srcHost, &len)) \
throw py::error_already_set(); \
} \
\
void set_src_array(array const &ary) \
{ \
srcMemoryType = CU_MEMORYTYPE_ARRAY; \
srcArray = ary.handle(); \
} \
\
void set_src_device(CUdeviceptr devptr) \
{ \
srcMemoryType = CU_MEMORYTYPE_DEVICE; \
srcDevice = devptr; \
} \
\
void set_dst_host(py::object buf_py) \
{ \
dstMemoryType = CU_MEMORYTYPE_HOST; \
PYCUDA_BUFFER_SIZE_T len; \
if (PyObject_AsWriteBuffer(buf_py.ptr(), &dstHost, &len)) \
throw py::error_already_set(); \
} \
\
void set_dst_array(array const &ary) \
{ \
dstMemoryType = CU_MEMORYTYPE_ARRAY; \
dstArray = ary.handle(); \
} \
\
void set_dst_device(CUdeviceptr devptr) \
{ \
dstMemoryType = CU_MEMORYTYPE_DEVICE; \
dstDevice = devptr; \
}
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
#if CUDAPP_CUDA_VERSION >= 4000
#define MEMCPY_SETTERS_UNIFIED \
void set_src_unified(py::object buf_py) \
{ \
srcMemoryType = CU_MEMORYTYPE_UNIFIED; \
PYCUDA_BUFFER_SIZE_T len; \
if (PyObject_AsReadBuffer(buf_py.ptr(), &srcHost, &len)) \
throw py::error_already_set(); \
} \
\
void set_dst_unified(py::object buf_py) \
{ \
dstMemoryType = CU_MEMORYTYPE_UNIFIED; \
PYCUDA_BUFFER_SIZE_T len; \
if (PyObject_AsWriteBuffer(buf_py.ptr(), &dstHost, &len)) \
throw py::error_already_set(); \
}
#else
#define MEMCPY_SETTERS_UNIFIED /* empty */
#endif
struct memcpy_2d : public CUDA_MEMCPY2D
{
memcpy_2d()
{
srcXInBytes = 0;
srcY = 0;
dstXInBytes = 0;
dstY = 0;
}
MEMCPY_SETTERS;
MEMCPY_SETTERS_UNIFIED;
void execute(bool aligned=false) const
{
if (aligned)
{ CUDAPP_CALL_GUARDED_THREADED(cuMemcpy2D, (this)); }
{ CUDAPP_CALL_GUARDED_THREADED(cuMemcpy2DUnaligned, (this)); }
}
void execute_async(const stream &s) const
{ CUDAPP_CALL_GUARDED_THREADED(cuMemcpy2DAsync, (this, s.handle())); }
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
struct memcpy_3d : public CUDA_MEMCPY3D
{
memcpy_3d()
{
reserved0 = 0;
reserved1 = 0;
srcXInBytes = 0;
srcY = 0;
srcZ = 0;
srcLOD = 0;
dstXInBytes = 0;
dstY = 0;
dstZ = 0;
dstLOD = 0;
}
MEMCPY_SETTERS;
MEMCPY_SETTERS_UNIFIED;
void execute() const
{
CUDAPP_CALL_GUARDED_THREADED(cuMemcpy3D, (this));
}
void execute_async(const stream &s) const
{ CUDAPP_CALL_GUARDED_THREADED(cuMemcpy3DAsync, (this, s.handle())); }
};
#endif
#if CUDAPP_CUDA_VERSION >= 4000
struct memcpy_3d_peer : public CUDA_MEMCPY3D_PEER
{
memcpy_3d_peer()
{
srcXInBytes = 0;
srcY = 0;
srcZ = 0;
srcLOD = 0;
dstXInBytes = 0;
dstY = 0;
dstZ = 0;
dstLOD = 0;
}
MEMCPY_SETTERS;
MEMCPY_SETTERS_UNIFIED;
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
void set_src_context(context const &ctx)
{
srcContext = ctx.handle();
}
void set_dst_context(context const &ctx)
{
dstContext = ctx.handle();
}
void execute() const
{
CUDAPP_CALL_GUARDED_THREADED(cuMemcpy3DPeer, (this));
}
void execute_async(const stream &s) const
{ CUDAPP_CALL_GUARDED_THREADED(cuMemcpy3DPeerAsync, (this, s.handle())); }
};
#endif
inline void *mem_host_alloc(size_t size, unsigned flags=0)
{
void *m_data;
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2020
CUDAPP_CALL_GUARDED(cuMemHostAlloc, (&m_data, size, flags));
#else
if (flags != 0)
throw pycuda::error("mem_host_alloc", CUDA_ERROR_INVALID_VALUE,
"nonzero flags in mem_host_alloc not allowed in CUDA 2.1 and older");
CUDAPP_CALL_GUARDED(cuMemAllocHost, (&m_data, size));
return m_data;
}
inline void mem_host_free(void *ptr)
{
CUDAPP_CALL_GUARDED_CLEANUP(cuMemFreeHost, (ptr));
}
#if CUDAPP_CUDA_VERSION >= 4000
inline void *mem_host_register(void *ptr, size_t bytes, unsigned int flags=0)
{
CUDAPP_CALL_GUARDED(cuMemHostRegister, (ptr, bytes, flags));
return ptr;
}
inline void mem_host_unregister(void *ptr)
{
CUDAPP_CALL_GUARDED_CLEANUP(cuMemHostUnregister, (ptr));
}
#endif
inline void *aligned_malloc(size_t size, size_t alignment, void **original_pointer)
{
// alignment must be a power of two.
if ((alignment & (alignment - 1)) != 0)
throw pycuda::error("aligned_malloc", CUDA_ERROR_INVALID_VALUE,
"alignment must be a power of two");
if (alignment == 0)
throw pycuda::error("aligned_malloc", CUDA_ERROR_INVALID_VALUE,
"alignment must non-zero");
void *p = malloc(size + (alignment - 1));
if (!p)
throw pycuda::error("aligned_malloc", CUDA_ERROR_OUT_OF_MEMORY,
"aligned malloc failed");
*original_pointer = p;
p = (void *)((((ptrdiff_t)(p)) + (alignment-1)) & -alignment);
return p;
struct host_pointer : public boost::noncopyable, public context_dependent
host_pointer()
: m_valid(false)
{ }
host_pointer(void *ptr)
: m_valid(true), m_data(ptr)
{ }
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
virtual ~host_pointer()
{ }
void *data()
{ return m_data; }
#if CUDAPP_CUDA_VERSION >= 2020
CUdeviceptr get_device_pointer()
{
CUdeviceptr result;
CUDAPP_CALL_GUARDED(cuMemHostGetDevicePointer, (&result, m_data, 0));
return result;
}
#endif
};
struct pagelocked_host_allocation : public host_pointer
{
public:
pagelocked_host_allocation(size_t bytesize, unsigned flags=0)
: host_pointer(mem_host_alloc(bytesize, flags))
Andreas Klöckner
committed
/* Don't try to be clever and coalesce these in the base class.
* Won't work: Destructors may not call virtual functions.
*/
~pagelocked_host_allocation()
{
if (m_valid)
free();
}
try
{
scoped_context_activation ca(get_context());
CUDAPP_CATCH_CLEANUP_ON_DEAD_CONTEXT(pagelocked_host_allocation);
release_context();
m_valid = false;
throw pycuda::error("pagelocked_host_allocation::free", CUDA_ERROR_INVALID_HANDLE);
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3020
unsigned int get_flags()
{
unsigned int flags;
CUDAPP_CALL_GUARDED(cuMemHostGetFlags, (&flags, m_data));
return flags;
}
#endif
};
struct aligned_host_allocation : public host_pointer
{
void *m_original_pointer;
public:
aligned_host_allocation(size_t size, size_t alignment)
: host_pointer(aligned_malloc(size, alignment, &m_original_pointer))
Andreas Klöckner
committed
/* Don't try to be clever and coalesce these in the base class.
* Won't work: Destructors may not call virtual functions.
*/
~aligned_host_allocation()
{
if (m_valid)
free();
}
void free()
{
if (m_valid)
{
::free(m_original_pointer);
m_valid = false;
}
else
throw pycuda::error("aligned_host_allocation::free", CUDA_ERROR_INVALID_HANDLE);
}
};
#if CUDAPP_CUDA_VERSION >= 4000
struct registered_host_memory : public host_pointer
{
private:
py::object m_base;
public:
registered_host_memory(void *p, size_t bytes, unsigned int flags=0,
py::object base=py::object())
: host_pointer(mem_host_register(p, bytes, flags)), m_base(base)
{
}
Andreas Klöckner
committed
/* Don't try to be clever and coalesce these in the base class.
* Won't work: Destructors may not call virtual functions.
*/
~registered_host_memory()
{
if (m_valid)
free();
}
void free()
{
if (m_valid)
{
try
{
scoped_context_activation ca(get_context());
mem_host_unregister(m_data);
}
CUDAPP_CATCH_CLEANUP_ON_DEAD_CONTEXT(host_allocation);
release_context();
m_valid = false;
}
else
throw pycuda::error("registered_host_memory::free", CUDA_ERROR_INVALID_HANDLE);
}
py::object base() const
{
return m_base;
}
class event : public boost::noncopyable, public context_dependent
{
private:
CUevent m_event;
public:
event(unsigned int flags=0)
{ CUDAPP_CALL_GUARDED(cuEventCreate, (&m_event, flags)); }