Newer
Older
{
PYCUDA_PARSE_STREAM_PY;
CUDAPP_CALL_GUARDED(cuStreamAttachMemAsync, (s_handle, m_devptr, 0, flags));
}
Stan Seibert
committed
};
#endif
#if CUDAPP_CUDA_VERSION >= 4000
struct registered_host_memory : public host_pointer
{
private:
py::object m_base;
public:
registered_host_memory(void *p, size_t bytes, unsigned int flags=0,
py::object base=py::object())
: host_pointer(mem_host_register(p, bytes, flags)), m_base(base)
{
}
Andreas Klöckner
committed
/* Don't try to be clever and coalesce these in the base class.
* Won't work: Destructors may not call virtual functions.
*/
~registered_host_memory()
{
if (m_valid)
free();
}
void free()
{
if (m_valid)
{
try
{
scoped_context_activation ca(get_context());
mem_host_unregister(m_data);
}
CUDAPP_CATCH_CLEANUP_ON_DEAD_CONTEXT(host_allocation);
release_context();
m_valid = false;
}
else
throw pycuda::error("registered_host_memory::free", CUDA_ERROR_INVALID_HANDLE);
}
py::object base() const
{
return m_base;
}
class event : public boost::noncopyable, public context_dependent
{
private:
CUevent m_event;
public:
event(unsigned int flags=0)
{ CUDAPP_CALL_GUARDED(cuEventCreate, (&m_event, flags)); }
try
{
scoped_context_activation ca(get_context());
CUDAPP_CALL_GUARDED_CLEANUP(cuEventDestroy, (m_event));
}
CUDAPP_CATCH_CLEANUP_ON_DEAD_CONTEXT(event);
event *record(py::object stream_py)
PYCUDA_PARSE_STREAM_PY;
CUDAPP_CALL_GUARDED(cuEventRecord, (m_event, s_handle));
CUevent handle() const
{ return m_event; }
{
CUDAPP_CALL_GUARDED_THREADED(cuEventSynchronize, (m_event));
bool query() const
CUDAPP_PRINT_CALL_TRACE("cuEventQuery");
CUresult result = cuEventQuery(m_event);
switch (result)
{
return false;
default:
CUDAPP_PRINT_ERROR_TRACE("cuEventQuery", result);
throw error("cuEventQuery", result);
}
}
float time_since(event const &start)
{
float result;
CUDAPP_CALL_GUARDED(cuEventElapsedTime, (&result, start.m_event, m_event));
return result;
}
float time_till(event const &end)
{
float result;
CUDAPP_CALL_GUARDED(cuEventElapsedTime, (&result, m_event, end.m_event));
return result;
}
#if CUDAPP_CUDA_VERSION >= 4010 && PY_VERSION_HEX >= 0x02060000
py::object ipc_handle()
{
CUipcEventHandle handle;
CUDAPP_CALL_GUARDED(cuIpcGetEventHandle, (&handle, m_event));
return py::object(py::handle<>(PyByteArray_FromStringAndSize(
reinterpret_cast<const char *>(&handle),
sizeof(handle))));
}
#endif
#if CUDAPP_CUDA_VERSION >= 3020
inline void stream::wait_for_event(const event &evt)
{
CUDAPP_CALL_GUARDED(cuStreamWaitEvent, (m_stream, evt.handle(), 0));
}
#endif
#if CUDAPP_CUDA_VERSION >= 4010 && PY_VERSION_HEX >= 0x02060000
inline
event *event_from_ipc_handle(py::object obj)
{
if (!PyByteArray_Check(obj.ptr()))
throw pycuda::error("event_from_ipc_handle", CUDA_ERROR_INVALID_VALUE,
"argument is not a bytes array");
CUipcEventHandle handle;
if (PyByteArray_GET_SIZE(obj.ptr()) != sizeof(handle))
throw pycuda::error("event_from_ipc_handle", CUDA_ERROR_INVALID_VALUE,
"handle has the wrong size");
memcpy(&handle, PyByteArray_AS_STRING(obj.ptr()), sizeof(handle));
CUevent evt;
CUDAPP_CALL_GUARDED(cuIpcOpenEventHandle, (&evt, handle));
return new event(evt);
}
#endif
inline void initialize_profiler(
const char *config_file,
const char *output_file,
{
CUDAPP_CALL_GUARDED(cuProfilerInitialize, (config_file, output_file, output_mode));
}
inline void start_profiler()
{
CUDAPP_CALL_GUARDED(cuProfilerStart, ());
}
inline void stop_profiler()
{
CUDAPP_CALL_GUARDED(cuProfilerStop, ());
}
#endif