Newer
Older
srcArray = ary.handle(); \
} \
\
void set_src_device(CUdeviceptr devptr) \
{ \
srcMemoryType = CU_MEMORYTYPE_DEVICE; \
srcDevice = devptr; \
} \
\
void set_dst_host(py::object buf_py) \
{ \
dstMemoryType = CU_MEMORYTYPE_HOST; \
PYCUDA_BUFFER_SIZE_T len; \
if (PyObject_AsWriteBuffer(buf_py.ptr(), &dstHost, &len)) \
throw py::error_already_set(); \
} \
\
void set_dst_array(array const &ary) \
{ \
dstMemoryType = CU_MEMORYTYPE_ARRAY; \
dstArray = ary.handle(); \
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
} \
\
void set_dst_device(CUdeviceptr devptr) \
{ \
dstMemoryType = CU_MEMORYTYPE_DEVICE; \
dstDevice = devptr; \
}
struct memcpy_2d : public CUDA_MEMCPY2D
{
memcpy_2d()
{
srcXInBytes = 0;
srcY = 0;
dstXInBytes = 0;
dstY = 0;
}
MEMCPY_SETTERS;
void execute(bool aligned) const
{
if (aligned)
{ CUDAPP_CALL_GUARDED_THREADED(cuMemcpy2D, (this)); }
{ CUDAPP_CALL_GUARDED_THREADED(cuMemcpy2DUnaligned, (this)); }
}
void execute_async(const stream &s) const
{ CUDAPP_CALL_GUARDED_THREADED(cuMemcpy2DAsync, (this, s.handle())); }
};
#if CUDA_VERSION >= 2000
struct memcpy_3d : public CUDA_MEMCPY3D
{
memcpy_3d()
{
reserved0 = 0;
reserved1 = 0;
srcXInBytes = 0;
srcY = 0;
srcZ = 0;
srcLOD = 0;
dstXInBytes = 0;
dstY = 0;
dstZ = 0;
dstLOD = 0;
}
MEMCPY_SETTERS;
void execute() const
{
CUDAPP_CALL_GUARDED_THREADED(cuMemcpy3D, (this));
}
void execute_async(const stream &s) const
{ CUDAPP_CALL_GUARDED_THREADED(cuMemcpy3DAsync, (this, s.handle())); }
};
#endif
// host memory --------------------------------------------------------------
inline void *mem_alloc_host(unsigned int size, unsigned flags=0)
{
void *m_data;
#if CUDA_VERSION >= 2020
CUDAPP_CALL_GUARDED(cuMemHostAlloc, (&m_data, size, flags));
#else
if (flags != 0)
throw cuda::error("mem_alloc_host", CUDA_ERROR_INVALID_VALUE,
"nonzero flags in mem_alloc_host not allowed in CUDA 2.1 and older");
CUDAPP_CALL_GUARDED(cuMemAllocHost, (&m_data, size));
return m_data;
}
inline void mem_free_host(void *ptr)
{
CUDAPP_CALL_GUARDED(cuMemFreeHost, (ptr));
}
struct host_allocation : public boost::noncopyable, public context_dependent
{
private:
host_allocation(unsigned bytesize, unsigned flags=0)
: m_valid(true), m_data(mem_alloc_host(bytesize, flags))
{ }
~host_allocation()
{
if (m_valid)
free();
}
void free()
{
try
{
scoped_context_activation ca(get_context());
mem_free_host(m_data);
}
CUDAPP_CATCH_WARN_OOT_LEAK(host_allocation);
release_context();
m_valid = false;
else
throw cuda::error("host_allocation::free", CUDA_ERROR_INVALID_HANDLE);
void *data()
{ return m_data; }
#if CUDA_VERSION >= 2020
CUdeviceptr get_device_pointer()
{
CUdeviceptr result;
CUDAPP_CALL_GUARDED(cuMemHostGetDevicePointer, (&result, m_data, 0));
return result;
}
#endif
};
// events -------------------------------------------------------------------
class event : public boost::noncopyable, public context_dependent
{
private:
CUevent m_event;
public:
event(unsigned int flags=0)
{ CUDAPP_CALL_GUARDED(cuEventCreate, (&m_event, flags)); }
~event()
try
{
scoped_context_activation ca(get_context());
CUDAPP_CALL_GUARDED(cuEventDestroy, (m_event));
}
void record()
{ CUDAPP_CALL_GUARDED(cuEventRecord, (m_event, 0)); }
void record_in_stream(stream const &str)
{ CUDAPP_CALL_GUARDED(cuEventRecord, (m_event, str.handle())); }
void synchronize()
{ CUDAPP_CALL_GUARDED_THREADED(cuEventSynchronize, (m_event)); }
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
bool query() const
{
#ifdef TRACE_CUDA
std::cerr << "cuEventQuery" << std::endl;
#endif
CUresult result = cuEventQuery(m_event);
switch (result)
{
case CUDA_SUCCESS:
return true;
case CUDA_ERROR_NOT_READY:
return false;
default:
throw error("cuEventQuery", result);
}
}
float time_since(event const &start)
{
float result;
CUDAPP_CALL_GUARDED(cuEventElapsedTime, (&result, start.m_event, m_event));
return result;
}
float time_till(event const &end)
{
float result;
CUDAPP_CALL_GUARDED(cuEventElapsedTime, (&result, m_event, end.m_event));
return result;
}
};
}
#endif