Newer
Older
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
\
void set_dst_device(CUdeviceptr devptr) \
{ \
dstMemoryType = CU_MEMORYTYPE_DEVICE; \
dstDevice = devptr; \
}
struct memcpy_2d : public CUDA_MEMCPY2D
{
memcpy_2d()
{
srcXInBytes = 0;
srcY = 0;
dstXInBytes = 0;
dstY = 0;
}
MEMCPY_SETTERS;
void execute(bool aligned) const
{
if (aligned)
{ CUDAPP_CALL_GUARDED_THREADED(cuMemcpy2D, (this)); }
{ CUDAPP_CALL_GUARDED_THREADED(cuMemcpy2DUnaligned, (this)); }
}
void execute_async(const stream &s) const
{ CUDAPP_CALL_GUARDED_THREADED(cuMemcpy2DAsync, (this, s.handle())); }
};
#if CUDA_VERSION >= 2000
struct memcpy_3d : public CUDA_MEMCPY3D
{
memcpy_3d()
{
reserved0 = 0;
reserved1 = 0;
srcXInBytes = 0;
srcY = 0;
srcZ = 0;
srcLOD = 0;
dstXInBytes = 0;
dstY = 0;
dstZ = 0;
dstLOD = 0;
}
MEMCPY_SETTERS;
void execute() const
{
CUDAPP_CALL_GUARDED_THREADED(cuMemcpy3D, (this));
}
void execute_async(const stream &s) const
{ CUDAPP_CALL_GUARDED_THREADED(cuMemcpy3DAsync, (this, s.handle())); }
};
#endif
// host memory --------------------------------------------------------------
inline void *mem_alloc_host(unsigned int size)
{
void *m_data;
CUDAPP_CALL_GUARDED(cuMemAllocHost, (&m_data, size));
return m_data;
}
inline void mem_free_host(void *ptr)
{
CUDAPP_CALL_GUARDED(cuMemFreeHost, (ptr));
}
struct host_allocation : public boost::noncopyable
{
private:
void *m_data;
public:
host_allocation(unsigned bytesize)
: m_data(mem_alloc_host(bytesize))
{ }
~host_allocation()
{ free(); }
void free()
{
if (m_data)
{
mem_free_host(m_data);
m_data = 0;
}
}
void *data()
{ return m_data; }
};
// events -------------------------------------------------------------------
class event : public boost::noncopyable, public context_dependent
{
private:
CUevent m_event;
public:
event(unsigned int flags=0)
{ CUDAPP_CALL_GUARDED(cuEventCreate, (&m_event, flags)); }
~event()
try
{
scoped_context_activation ca(get_context());
CUDAPP_CALL_GUARDED(cuEventDestroy, (m_event));
}
CUDA_CATCH_WARN_OOT_LEAK(event);
void record()
{ CUDAPP_CALL_GUARDED(cuEventRecord, (m_event, 0)); }
void record_in_stream(stream const &str)
{ CUDAPP_CALL_GUARDED(cuEventRecord, (m_event, str.handle())); }
void synchronize()
{ CUDAPP_CALL_GUARDED_THREADED(cuEventSynchronize, (m_event)); }
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
bool query() const
{
#ifdef TRACE_CUDA
std::cerr << "cuEventQuery" << std::endl;
#endif
CUresult result = cuEventQuery(m_event);
switch (result)
{
case CUDA_SUCCESS:
return true;
case CUDA_ERROR_NOT_READY:
return false;
default:
throw error("cuEventQuery", result);
}
}
float time_since(event const &start)
{
float result;
CUDAPP_CALL_GUARDED(cuEventElapsedTime, (&result, start.m_event, m_event));
return result;
}
float time_till(event const &end)
{
float result;
CUDAPP_CALL_GUARDED(cuEventElapsedTime, (&result, m_event, end.m_event));
return result;
}
};
}
#endif