Newer
Older
// A C++ wrapper for CUDA
#ifndef _AFJDFJSDFSD_PYCUDA_HEADER_SEEN_CUDA_HPP
#define _AFJDFJSDFSD_PYCUDA_HEADER_SEEN_CUDA_HPP
// {{{ includes, configuration
#include <cuda.h>
#ifdef CUDAPP_PRETEND_CUDA_VERSION
#define CUDAPP_CUDA_VERSION CUDAPP_PRETEND_CUDA_VERSION
#else
#define CUDAPP_CUDA_VERSION CUDA_VERSION
#endif
#if CUDAPP_CUDA_VERSION >= 4000
#include <cudaProfiler.h>
#endif
#include <stdint.h>
#include <stdexcept>
#include <boost/shared_ptr.hpp>
#include <boost/foreach.hpp>
#include <utility>
#include <stack>
#include <iostream>
#include <vector>
#include <boost/python.hpp>
#include <boost/thread/thread.hpp>
#include <boost/thread/tss.hpp>
#include <boost/version.hpp>
#if (BOOST_VERSION/100) < 1035
#warning *****************************************************************
#warning **** Your version of Boost C++ is likely too old for PyCUDA. ****
#warning *****************************************************************
// MAYBE? cuMemcpy, cuPointerGetAttribute
// TODO: cuCtxSetCurrent, cuCtxGetCurrent
// (use once the old, deprecated functions have been removed from CUDA)
// #define CUDAPP_TRACE_CUDA
#define CUDAPP_POST_30_BETA
Andreas Klöckner
committed
#ifdef CUDAPP_PRETEND_CUDA_VERSION
#define CUDAPP_CUDA_VERSION CUDAPP_PRETEND_CUDA_VERSION
#else
#define CUDAPP_CUDA_VERSION CUDA_VERSION
#endif
#if (PY_VERSION_HEX < 0x02060000)
#error PyCUDA does not support Python 2 versions earlier than 2.6.
#endif
#if (PY_VERSION_HEX >= 0x03000000) && (PY_VERSION_HEX < 0x03030000)
#error PyCUDA does not support Python 3 versions earlier than 3.3.
#endif
typedef Py_ssize_t PYCUDA_BUFFER_SIZE_T;
#define PYCUDA_PARSE_STREAM_PY \
CUstream s_handle; \
if (stream_py.ptr() != Py_None) \
{ \
const stream &s = py::extract<const stream &>(stream_py); \
s_handle = s.handle(); \
} \
else \
s_handle = 0;
#ifdef CUDAPP_TRACE_CUDA
#define CUDAPP_PRINT_CALL_TRACE(NAME) \
std::cerr << NAME << std::endl;
#define CUDAPP_PRINT_CALL_TRACE_INFO(NAME, EXTRA_INFO) \
std::cerr << NAME << " (" << EXTRA_INFO << ')' << std::endl;
#define CUDAPP_PRINT_ERROR_TRACE(NAME, CODE) \
if (CODE != CUDA_SUCCESS) \
std::cerr << NAME << " failed with code " << CODE << std::endl;
#else
#define CUDAPP_PRINT_CALL_TRACE(NAME) /*nothing*/
#define CUDAPP_PRINT_CALL_TRACE_INFO(NAME, EXTRA_INFO) /*nothing*/
#define CUDAPP_PRINT_ERROR_TRACE(NAME, CODE) /*nothing*/
#define CUDAPP_CALL_GUARDED_THREADED_WITH_TRACE_INFO(NAME, ARGLIST, TRACE_INFO) \
{ \
CUDAPP_PRINT_CALL_TRACE_INFO(#NAME, TRACE_INFO); \
CUresult cu_status_code; \
Py_BEGIN_ALLOW_THREADS \
cu_status_code = NAME ARGLIST; \
Py_END_ALLOW_THREADS \
if (cu_status_code != CUDA_SUCCESS) \
throw pycuda::error(#NAME, cu_status_code);\
}
#define CUDAPP_CALL_GUARDED_WITH_TRACE_INFO(NAME, ARGLIST, TRACE_INFO) \
{ \
CUDAPP_PRINT_CALL_TRACE_INFO(#NAME, TRACE_INFO); \
CUresult cu_status_code; \
cu_status_code = NAME ARGLIST; \
CUDAPP_PRINT_ERROR_TRACE(#NAME, cu_status_code); \
if (cu_status_code != CUDA_SUCCESS) \
throw pycuda::error(#NAME, cu_status_code);\
#define CUDAPP_CALL_GUARDED_THREADED(NAME, ARGLIST) \
CUDAPP_PRINT_CALL_TRACE(#NAME); \
CUresult cu_status_code; \
Py_BEGIN_ALLOW_THREADS \
cu_status_code = NAME ARGLIST; \
Py_END_ALLOW_THREADS \
CUDAPP_PRINT_ERROR_TRACE(#NAME, cu_status_code); \
if (cu_status_code != CUDA_SUCCESS) \
throw pycuda::error(#NAME, cu_status_code);\
#define CUDAPP_CALL_GUARDED(NAME, ARGLIST) \
{ \
CUDAPP_PRINT_CALL_TRACE(#NAME); \
CUresult cu_status_code; \
cu_status_code = NAME ARGLIST; \
CUDAPP_PRINT_ERROR_TRACE(#NAME, cu_status_code); \
if (cu_status_code != CUDA_SUCCESS) \
throw pycuda::error(#NAME, cu_status_code);\
#define CUDAPP_CALL_GUARDED_CLEANUP(NAME, ARGLIST) \
{ \
CUDAPP_PRINT_CALL_TRACE(#NAME); \
CUresult cu_status_code; \
cu_status_code = NAME ARGLIST; \
CUDAPP_PRINT_ERROR_TRACE(#NAME, cu_status_code); \
if (cu_status_code != CUDA_SUCCESS) \
std::cerr \
<< "PyCUDA WARNING: a clean-up operation failed (dead context maybe?)" \
<< std::endl \
<< pycuda::error::make_message(#NAME, cu_status_code) \
<< std::endl; \
}
#define CUDAPP_CATCH_CLEANUP_ON_DEAD_CONTEXT(TYPE) \
catch (pycuda::cannot_activate_out_of_thread_context) \
catch (pycuda::cannot_activate_dead_context) \
{ \
/* PyErr_Warn( \
PyExc_UserWarning, #TYPE " in dead context was implicitly cleaned up");*/ \
}
// In all likelihood, this TYPE's managing thread has exited, and
// therefore its context has already been deleted. No need to harp
// on the fact that we still thought there was cleanup to do.
{
namespace py = boost::python;
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3020
unsigned int
typedef
#if defined(_WIN32) && defined(_WIN64)
long long
#else
long
#endif
hash_type;
class error : public std::runtime_error
{
private:
const char *m_routine;
CUresult m_code;
public:
static std::string make_message(const char *rout, CUresult c, const char *msg=0)
{
std::string result = rout;
result += " failed: ";
result += curesult_to_str(c);
if (msg)
{
result += " - ";
result += msg;
}
return result;
}
error(const char *rout, CUresult c, const char *msg=0)
: std::runtime_error(make_message(rout, c, msg)),
m_routine(rout), m_code(c)
{ }
const char *routine() const
{
return m_routine;
}
CUresult code() const
{
return m_code;
}
bool is_out_of_memory() const
{
return code() == CUDA_ERROR_OUT_OF_MEMORY;
}
static const char *curesult_to_str(CUresult e)
{
switch (e)
{
case CUDA_SUCCESS: return "success";
case CUDA_ERROR_INVALID_VALUE: return "invalid value";
case CUDA_ERROR_OUT_OF_MEMORY: return "out of memory";
case CUDA_ERROR_NOT_INITIALIZED: return "not initialized";
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
case CUDA_ERROR_DEINITIALIZED: return "deinitialized";
#endif
#if CUDAPP_CUDA_VERSION >= 4000
case CUDA_ERROR_PROFILER_DISABLED: return "profiler disabled";
case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "profiler not initialized";
case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "profiler already started";
case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "profiler already stopped";
#endif
case CUDA_ERROR_NO_DEVICE: return "no device";
case CUDA_ERROR_INVALID_DEVICE: return "invalid device";
case CUDA_ERROR_INVALID_IMAGE: return "invalid image";
case CUDA_ERROR_INVALID_CONTEXT: return "invalid context";
case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: return "context already current";
case CUDA_ERROR_MAP_FAILED: return "map failed";
case CUDA_ERROR_UNMAP_FAILED: return "unmap failed";
case CUDA_ERROR_ARRAY_IS_MAPPED: return "array is mapped";
case CUDA_ERROR_ALREADY_MAPPED: return "already mapped";
case CUDA_ERROR_NO_BINARY_FOR_GPU: return "no binary for gpu";
case CUDA_ERROR_ALREADY_ACQUIRED: return "already acquired";
case CUDA_ERROR_NOT_MAPPED: return "not mapped";
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3000
case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: return "not mapped as array";
case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "not mapped as pointer";
#ifdef CUDAPP_POST_30_BETA
case CUDA_ERROR_ECC_UNCORRECTABLE: return "ECC uncorrectable";
#endif
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3010
case CUDA_ERROR_UNSUPPORTED_LIMIT: return "unsupported limit";
#if CUDAPP_CUDA_VERSION >= 4000
case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "context already in use";
#endif
case CUDA_ERROR_INVALID_SOURCE: return "invalid source";
case CUDA_ERROR_FILE_NOT_FOUND: return "file not found";
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3010
case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
return "shared object symbol not found";
case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
return "shared object init failed";
#endif
case CUDA_ERROR_INVALID_HANDLE: return "invalid handle";
case CUDA_ERROR_NOT_FOUND: return "not found";
case CUDA_ERROR_NOT_READY: return "not ready";
case CUDA_ERROR_LAUNCH_FAILED: return "launch failed";
case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "launch out of resources";
case CUDA_ERROR_LAUNCH_TIMEOUT: return "launch timeout";
case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "launch incompatible texturing";
#if CUDAPP_CUDA_VERSION >= 4000
case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "peer access already enabled";
case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "peer access not enabled";
case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "primary context active";
case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "context is destroyed";
#endif
Andreas Klöckner
committed
#if (CUDAPP_CUDA_VERSION >= 3000) && (CUDAPP_CUDA_VERSION < 3020)
return "attempted to retrieve 64-bit pointer via 32-bit api function";
return "attempted to retrieve 64-bit size via 32-bit api function";
#if CUDAPP_CUDA_VERSION >= 4010
case CUDA_ERROR_ASSERT:
return "device-side assert triggered";
case CUDA_ERROR_TOO_MANY_PEERS:
return "too many peers";
case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
return "host memory already registered";
case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
return "host memory not registered";
#endif
Stan Seibert
committed
#if CUDAPP_CUDA_VERSION >= 5000
case CUDA_ERROR_NOT_SUPPORTED:
return "operation not supported on current system or device";
#endif
case CUDA_ERROR_UNKNOWN: return "unknown";
}
}
};
struct cannot_activate_out_of_thread_context : public std::logic_error
cannot_activate_out_of_thread_context(std::string const &w)
: std::logic_error(w)
{ }
};
struct cannot_activate_dead_context : public std::logic_error
cannot_activate_dead_context(std::string const &w)
: std::logic_error(w)
{ }
};
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
// {{{ buffer interface helper
class py_buffer_wrapper : public boost::noncopyable
{
private:
bool m_initialized;
public:
Py_buffer m_buf;
py_buffer_wrapper()
: m_initialized(false)
{}
void get(PyObject *obj, int flags)
{
if (PyObject_GetBuffer(obj, &m_buf, flags))
throw py::error_already_set();
m_initialized = true;
}
virtual ~py_buffer_wrapper()
{
if (m_initialized)
PyBuffer_Release(&m_buf);
}
};
// }}}
// {{{ version query ------------------------------------------------------------
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2020
inline int get_driver_version()
{
int result;
CUDAPP_CALL_GUARDED(cuDriverGetVersion, (&result));
return result;
}
#endif
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
class context;
class device
{
private:
CUdevice m_device;
public:
device(CUdevice dev)
: m_device(dev)
{ }
static int count()
{
int result;
CUDAPP_CALL_GUARDED(cuDeviceGetCount, (&result));
return result;
}
std::string name()
{
char buffer[1024];
CUDAPP_CALL_GUARDED(cuDeviceGetName, (buffer, sizeof(buffer), m_device));
return buffer;
}
#if CUDAPP_CUDA_VERSION >= 4010
std::string pci_bus_id()
{
char buffer[1024];
CUDAPP_CALL_GUARDED(cuDeviceGetPCIBusId, (buffer, sizeof(buffer), m_device));
return buffer;
}
#endif
py::tuple compute_capability()
{
int major, minor;
CUDAPP_CALL_GUARDED(cuDeviceComputeCapability, (&major, &minor, m_device));
return py::make_tuple(major, minor);
}
CUDAPP_CALL_GUARDED(cuDeviceTotalMem, (&bytes, m_device));
return bytes;
}
{
int result;
CUDAPP_CALL_GUARDED(cuDeviceGetAttribute, (&result, attr, m_device));
return result;
}
bool operator==(const device &other) const
{
return m_device == other.m_device;
}
bool operator!=(const device &other) const
{
return m_device != other.m_device;
}
{
return m_device;
}
boost::shared_ptr<context> make_context(unsigned int flags);
CUdevice handle() const
{ return m_device; }
#if CUDAPP_CUDA_VERSION >= 4000
bool can_access_peer(device const &other)
{
int result;
CUDAPP_CALL_GUARDED(cuDeviceCanAccessPeer, (&result, handle(), other.handle()));
return result;
}
#endif
void init(unsigned int flags)
{
CUDAPP_CALL_GUARDED(cuInit, (flags));
device *make_device(int ordinal)
CUdevice result;
CUDAPP_CALL_GUARDED(cuDeviceGet, (&result, ordinal));
return new device(result);
}
#if CUDAPP_CUDA_VERSION >= 4010
inline
device *make_device_from_pci_bus_id(std::string const pci_bus_id)
{
CUdevice result;
CUDAPP_CALL_GUARDED(cuDeviceGetByPCIBusId, (&result,
const_cast<char *>(pci_bus_id.c_str())));
return new device(result);
}
#endif
/* A word on context management: We don't let CUDA's context stack get more
* than one deep. CUDA only supports pushing floating contexts. We may wish
* to push contexts that are already active at a deeper stack level, so we
* maintain all contexts floating other than the top one.
*/
namespace gl {
boost::shared_ptr<context>
make_gl_context(device const &dev, unsigned int flags);
}
Andreas Klöckner
committed
class context_stack;
extern boost::thread_specific_ptr<context_stack> context_stack_ptr;
Andreas Klöckner
committed
class context_stack
{
Andreas Klöckner
committed
/* This wrapper is necessary because we need to pop the contents
* off the stack before we destroy each of the contexts. This, in turn,
* is because the contexts need to be able to access the stack in order
* to be destroyed.
*/
private:
typedef std::stack<boost::shared_ptr<context> > stack_t;
typedef stack_t::value_type value_type;;
stack_t m_stack;
Andreas Klöckner
committed
public:
~context_stack();
bool empty() const
{ return m_stack.empty(); }
value_type &top()
{ return m_stack.top(); }
void pop()
{ m_stack.pop(); }
void push(value_type v)
{ m_stack.push(v); }
static context_stack &get()
{
if (context_stack_ptr.get() == 0)
context_stack_ptr.reset(new context_stack);
return *context_stack_ptr;
}
};
class context : boost::noncopyable
{
private:
CUcontext m_context;
bool m_valid;
unsigned m_use_count;
boost::thread::id m_thread;
context(CUcontext ctx)
: m_context(ctx), m_valid(true), m_use_count(1),
m_thread(boost::this_thread::get_id())
{ }
~context()
Andreas Klöckner
committed
/* It's possible that we get here with a non-zero m_use_count. Since the context
* stack holds shared_ptrs, this must mean that the context stack itself is getting
* destroyed, which means it's ok for this context to sign off, too.
*/
CUcontext handle() const
{ return m_context; }
bool operator==(const context &other) const
{
return m_context == other.m_context;
}
bool operator!=(const context &other) const
{
return m_context != other.m_context;
}
return hash_type(m_context) ^ hash_type(this);
boost::thread::id thread_id() const
{ return m_thread; }
bool is_valid() const
{
return m_valid;
}
static boost::shared_ptr<context> attach(unsigned int flags)
{
CUcontext current;
CUDAPP_CALL_GUARDED(cuCtxAttach, (¤t, flags));
boost::shared_ptr<context> result(new context(current));
context_stack::get().push(result);
return result;
}
void detach()
{
if (m_valid)
{
bool active_before_destruction = current_context().get() == this;
if (active_before_destruction)
CUDAPP_CALL_GUARDED_CLEANUP(cuCtxDetach, (m_context));
{
if (m_thread == boost::this_thread::get_id())
{
Andreas Klöckner
committed
CUDAPP_CALL_GUARDED_CLEANUP(cuCtxPushCurrent, (m_context));
CUDAPP_CALL_GUARDED_CLEANUP(cuCtxDetach, (m_context));
Andreas Klöckner
committed
/* pop is implicit in detach */
}
else
{
// In all likelihood, this context's managing thread has exited, and
// therefore this context has already been deleted. No need to harp
// on the fact that we still thought there was cleanup to do.
// std::cerr << "PyCUDA WARNING: leaked out-of-thread context " << std::endl;
}
}
m_valid = false;
if (active_before_destruction)
{
boost::shared_ptr<context> new_active = current_context(this);
if (new_active.get())
{
CUDAPP_CALL_GUARDED(cuCtxPushCurrent, (new_active->m_context));
}
else
throw error("context::detach", CUDA_ERROR_INVALID_CONTEXT,
"cannot detach from invalid context");
}
static device get_device()
CUDAPP_CALL_GUARDED(cuCtxGetDevice, (&dev));
return device(dev);
}
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
static void prepare_context_switch()
{
Andreas Klöckner
committed
if (!context_stack::get().empty())
{
CUcontext popped;
CUDAPP_CALL_GUARDED(cuCtxPopCurrent, (&popped));
prepare_context_switch();
Andreas Klöckner
committed
context_stack &ctx_stack = context_stack::get();
Andreas Klöckner
committed
if (ctx_stack.empty())
{
throw error("context::pop", CUDA_ERROR_INVALID_CONTEXT,
"cannot pop non-current context");
}
boost::shared_ptr<context> current = current_context();
if (current)
--current->m_use_count;
current = current_context();
CUDAPP_CALL_GUARDED(cuCtxPushCurrent, (current_context()->m_context));
}
#else
static void prepare_context_switch() { }
#endif
static void synchronize()
{ CUDAPP_CALL_GUARDED_THREADED(cuCtxSynchronize, ()); }
static boost::shared_ptr<context> current_context(context *except=0)
while (true)
{
Andreas Klöckner
committed
if (context_stack::get().empty())
return boost::shared_ptr<context>();
Andreas Klöckner
committed
boost::shared_ptr<context> result(context_stack::get().top());
&& result->is_valid())
// good, weak pointer didn't expire
return result;
// context invalid, pop it and try again.
Andreas Klöckner
committed
context_stack::get().pop();
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3010
static void set_limit(CUlimit limit, size_t value)
{
CUDAPP_CALL_GUARDED(cuCtxSetLimit, (limit, value));
}
static size_t get_limit(CUlimit limit)
{
size_t value;
CUDAPP_CALL_GUARDED(cuCtxGetLimit, (&value, limit));
return value;
}
#endif
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3020
static CUfunc_cache get_cache_config()
{
CUfunc_cache value;
CUDAPP_CALL_GUARDED(cuCtxGetCacheConfig, (&value));
return value;
}
static void set_cache_config(CUfunc_cache cc)
{
CUDAPP_CALL_GUARDED(cuCtxSetCacheConfig, (cc));
}
unsigned int get_api_version()
{
unsigned int value;
CUDAPP_CALL_GUARDED(cuCtxGetApiVersion, (m_context, &value));
return value;
}
#endif
#if CUDAPP_CUDA_VERSION >= 4000
static void enable_peer_access(context const &peer, unsigned int flags)
{
CUDAPP_CALL_GUARDED(cuCtxEnablePeerAccess, (peer.handle(), flags));
}
static void disable_peer_access(context const &peer)
{
CUDAPP_CALL_GUARDED(cuCtxDisablePeerAccess, (peer.handle()));
}
#endif
#if CUDAPP_CUDA_VERSION >= 4020
static CUsharedconfig get_shared_config()
{
CUsharedconfig config;
CUDAPP_CALL_GUARDED(cuCtxGetSharedMemConfig, (&config));
return config;
}
static void set_shared_config(CUsharedconfig config)
{
CUDAPP_CALL_GUARDED(cuCtxSetSharedMemConfig, (config));
}
#endif
friend class device;
friend void context_push(boost::shared_ptr<context> ctx);
gl::make_gl_context(device const &dev, unsigned int flags);
};
boost::shared_ptr<context> device::make_context(unsigned int flags)
{
context::prepare_context_switch();
CUcontext ctx;
CUDAPP_CALL_GUARDED(cuCtxCreate, (&ctx, flags, m_device));
boost::shared_ptr<context> result(new context(ctx));
Andreas Klöckner
committed
context_stack::get().push(result);
return result;
}
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
void context_push(boost::shared_ptr<context> ctx)
context::prepare_context_switch();
CUDAPP_CALL_GUARDED(cuCtxPushCurrent, (ctx->m_context));
Andreas Klöckner
committed
context_stack::get().push(ctx);
++ctx->m_use_count;
}
#endif
Andreas Klöckner
committed
inline context_stack::~context_stack()
{
if (!m_stack.empty())
{
Andreas Klöckner
committed
<< "-------------------------------------------------------------------" << std::endl
<< "PyCUDA ERROR: The context stack was not empty upon module cleanup." << std::endl
<< "-------------------------------------------------------------------" << std::endl
<< "A context was still active when the context stack was being" << std::endl
<< "cleaned up. At this point in our execution, CUDA may already" << std::endl
<< "have been deinitialized, so there is no way we can finish" << std::endl
<< "cleanly. The program will be aborted now." << std::endl
<< "Use Context.pop() to avoid this problem." << std::endl
<< "-------------------------------------------------------------------" << std::endl;
abort();
}
}
class explicit_context_dependent
{
private:
boost::shared_ptr<context> m_ward_context;
public:
void acquire_context()
{
m_ward_context = context::current_context();
if (m_ward_context.get() == 0)
throw error("explicit_context_dependent",
CUDA_ERROR_INVALID_CONTEXT,
"no currently active context?");
}
void release_context()
{
m_ward_context.reset();
}
boost::shared_ptr<context> get_context()
{
return m_ward_context;
}
class context_dependent : public explicit_context_dependent
{
private:
boost::shared_ptr<context> m_ward_context;
public:
context_dependent()
{ acquire_context(); }
};
class scoped_context_activation
{
private:
boost::shared_ptr<context> m_context;
bool m_did_switch;
public:
scoped_context_activation(boost::shared_ptr<context> ctx)
: m_context(ctx)
if (!m_context->is_valid())
throw pycuda::cannot_activate_dead_context(
"cannot activate dead context");
m_did_switch = context::current_context() != m_context;
if (m_did_switch)
{
if (boost::this_thread::get_id() != m_context->thread_id())
throw pycuda::cannot_activate_out_of_thread_context(
"cannot activate out-of-thread context");
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
context_push(m_context);
#else
throw pycuda::error("scoped_context_activation", CUDA_ERROR_INVALID_CONTEXT,
"not available in CUDA < 2.0");
#endif
}
~scoped_context_activation()
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
if (m_did_switch)
m_context->pop();
#endif
class stream : public boost::noncopyable, public context_dependent
{
private:
CUstream m_stream;
public:
stream(unsigned int flags=0)
{ CUDAPP_CALL_GUARDED(cuStreamCreate, (&m_stream, flags)); }
~stream()
try
{
CUDAPP_CALL_GUARDED_CLEANUP(cuStreamDestroy, (m_stream));
}
CUDAPP_CATCH_CLEANUP_ON_DEAD_CONTEXT(stream);
void synchronize()
{ CUDAPP_CALL_GUARDED_THREADED(cuStreamSynchronize, (m_stream)); }
CUstream handle() const
{ return m_stream; }
intptr_t handle_int() const
{ return (intptr_t) m_stream; }
#if CUDAPP_CUDA_VERSION >= 3020
void wait_for_event(const event &evt);
#endif
bool is_done() const
CUDAPP_PRINT_CALL_TRACE("cuStreamQuery");
CUresult result = cuStreamQuery(m_stream);
switch (result)
{
return false;
default:
CUDAPP_PRINT_ERROR_TRACE("cuStreamQuery", result);
throw error("cuStreamQuery", result);
}
}
};
class array : public boost::noncopyable, public context_dependent