Newer
Older
// A C++ wrapper for CUDA
#ifndef _AFJDFJSDFSD_PYCUDA_HEADER_SEEN_CUDA_HPP
#define _AFJDFJSDFSD_PYCUDA_HEADER_SEEN_CUDA_HPP
#include <cuda.h>
#ifdef CUDAPP_PRETEND_CUDA_VERSION
#define CUDAPP_CUDA_VERSION CUDAPP_PRETEND_CUDA_VERSION
#else
#define CUDAPP_CUDA_VERSION CUDA_VERSION
#endif
#if CUDAPP_CUDA_VERSION >= 4000
#include <cudaProfiler.h>
#endif
#include <stdint.h>
#include <stdexcept>
#include <boost/shared_ptr.hpp>
#include <boost/foreach.hpp>
#include <utility>
#include <stack>
#include <iostream>
#include <vector>
#include <boost/python.hpp>
#include <boost/thread/thread.hpp>
#include <boost/thread/tss.hpp>
#include <boost/version.hpp>
#if (BOOST_VERSION/100) < 1035
#warning *****************************************************************
#warning **** Your version of Boost C++ is likely too old for PyCUDA. ****
#warning *****************************************************************
// MAYBE? cuMemcpy, cuPointerGetAttribute
// TODO: cuCtxSetCurrent, cuCtxGetCurrent
// (use once the old, deprecated functions have been removed from CUDA)
// #define CUDAPP_TRACE_CUDA
#define CUDAPP_POST_30_BETA
Andreas Klöckner
committed
#ifdef CUDAPP_PRETEND_CUDA_VERSION
#define CUDAPP_CUDA_VERSION CUDAPP_PRETEND_CUDA_VERSION
#else
#define CUDAPP_CUDA_VERSION CUDA_VERSION
#endif
#if PY_VERSION_HEX >= 0x02050000
typedef Py_ssize_t PYCUDA_BUFFER_SIZE_T;
#else
typedef int PYCUDA_BUFFER_SIZE_T;
#endif
#define PYCUDA_PARSE_STREAM_PY \
CUstream s_handle; \
if (stream_py.ptr() != Py_None) \
{ \
const stream &s = py::extract<const stream &>(stream_py); \
s_handle = s.handle(); \
} \
else \
s_handle = 0;
#ifdef CUDAPP_TRACE_CUDA
#define CUDAPP_PRINT_CALL_TRACE(NAME) \
std::cerr << NAME << std::endl;
#define CUDAPP_PRINT_CALL_TRACE_INFO(NAME, EXTRA_INFO) \
std::cerr << NAME << " (" << EXTRA_INFO << ')' << std::endl;
#define CUDAPP_PRINT_ERROR_TRACE(NAME, CODE) \
if (CODE != CUDA_SUCCESS) \
std::cerr << NAME << " failed with code " << CODE << std::endl;
#else
#define CUDAPP_PRINT_CALL_TRACE(NAME) /*nothing*/
#define CUDAPP_PRINT_CALL_TRACE_INFO(NAME, EXTRA_INFO) /*nothing*/
#define CUDAPP_PRINT_ERROR_TRACE(NAME, CODE) /*nothing*/
#define CUDAPP_CALL_GUARDED_THREADED_WITH_TRACE_INFO(NAME, ARGLIST, TRACE_INFO) \
{ \
CUDAPP_PRINT_CALL_TRACE_INFO(#NAME, TRACE_INFO); \
CUresult cu_status_code; \
Py_BEGIN_ALLOW_THREADS \
cu_status_code = NAME ARGLIST; \
Py_END_ALLOW_THREADS \
if (cu_status_code != CUDA_SUCCESS) \
throw pycuda::error(#NAME, cu_status_code);\
}
#define CUDAPP_CALL_GUARDED_WITH_TRACE_INFO(NAME, ARGLIST, TRACE_INFO) \
{ \
CUDAPP_PRINT_CALL_TRACE_INFO(#NAME, TRACE_INFO); \
CUresult cu_status_code; \
cu_status_code = NAME ARGLIST; \
CUDAPP_PRINT_ERROR_TRACE(#NAME, cu_status_code); \
if (cu_status_code != CUDA_SUCCESS) \
throw pycuda::error(#NAME, cu_status_code);\
#define CUDAPP_CALL_GUARDED_THREADED(NAME, ARGLIST) \
CUDAPP_PRINT_CALL_TRACE(#NAME); \
CUresult cu_status_code; \
Py_BEGIN_ALLOW_THREADS \
cu_status_code = NAME ARGLIST; \
Py_END_ALLOW_THREADS \
CUDAPP_PRINT_ERROR_TRACE(#NAME, cu_status_code); \
if (cu_status_code != CUDA_SUCCESS) \
throw pycuda::error(#NAME, cu_status_code);\
#define CUDAPP_CALL_GUARDED(NAME, ARGLIST) \
{ \
CUDAPP_PRINT_CALL_TRACE(#NAME); \
CUresult cu_status_code; \
cu_status_code = NAME ARGLIST; \
CUDAPP_PRINT_ERROR_TRACE(#NAME, cu_status_code); \
if (cu_status_code != CUDA_SUCCESS) \
throw pycuda::error(#NAME, cu_status_code);\
#define CUDAPP_CALL_GUARDED_CLEANUP(NAME, ARGLIST) \
{ \
CUDAPP_PRINT_CALL_TRACE(#NAME); \
CUresult cu_status_code; \
cu_status_code = NAME ARGLIST; \
CUDAPP_PRINT_ERROR_TRACE(#NAME, cu_status_code); \
if (cu_status_code != CUDA_SUCCESS) \
std::cerr \
<< "PyCUDA WARNING: a clean-up operation failed (dead context maybe?)" \
<< std::endl \
<< pycuda::error::make_message(#NAME, cu_status_code) \
<< std::endl; \
}
#define CUDAPP_CATCH_CLEANUP_ON_DEAD_CONTEXT(TYPE) \
catch (pycuda::cannot_activate_out_of_thread_context) \
catch (pycuda::cannot_activate_dead_context) \
{ \
/* PyErr_Warn( \
PyExc_UserWarning, #TYPE " in dead context was implicitly cleaned up");*/ \
}
// In all likelihood, this TYPE's managing thread has exited, and
// therefore its context has already been deleted. No need to harp
// on the fact that we still thought there was cleanup to do.
{
namespace py = boost::python;
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3020
size_t
#else
unsigned int
#endif
pycuda_size_t;
typedef
#if defined(_WIN32) && defined(_WIN64)
long long
#else
long
#endif
hash_type;
class error : public std::runtime_error
{
private:
const char *m_routine;
CUresult m_code;
public:
static std::string make_message(const char *rout, CUresult c, const char *msg=0)
{
std::string result = rout;
result += " failed: ";
result += curesult_to_str(c);
if (msg)
{
result += " - ";
result += msg;
}
return result;
}
error(const char *rout, CUresult c, const char *msg=0)
: std::runtime_error(make_message(rout, c, msg)),
m_routine(rout), m_code(c)
{ }
const char *routine() const
{
return m_routine;
}
CUresult code() const
{
return m_code;
}
bool is_out_of_memory() const
{
return code() == CUDA_ERROR_OUT_OF_MEMORY;
}
static const char *curesult_to_str(CUresult e)
{
switch (e)
{
case CUDA_SUCCESS: return "success";
case CUDA_ERROR_INVALID_VALUE: return "invalid value";
case CUDA_ERROR_OUT_OF_MEMORY: return "out of memory";
case CUDA_ERROR_NOT_INITIALIZED: return "not initialized";
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
case CUDA_ERROR_DEINITIALIZED: return "deinitialized";
#endif
#if CUDAPP_CUDA_VERSION >= 4000
case CUDA_ERROR_PROFILER_DISABLED: return "profiler disabled";
case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "profiler not initialized";
case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "profiler already started";
case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "profiler already stopped";
#endif
case CUDA_ERROR_NO_DEVICE: return "no device";
case CUDA_ERROR_INVALID_DEVICE: return "invalid device";
case CUDA_ERROR_INVALID_IMAGE: return "invalid image";
case CUDA_ERROR_INVALID_CONTEXT: return "invalid context";
case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: return "context already current";
case CUDA_ERROR_MAP_FAILED: return "map failed";
case CUDA_ERROR_UNMAP_FAILED: return "unmap failed";
case CUDA_ERROR_ARRAY_IS_MAPPED: return "array is mapped";
case CUDA_ERROR_ALREADY_MAPPED: return "already mapped";
case CUDA_ERROR_NO_BINARY_FOR_GPU: return "no binary for gpu";
case CUDA_ERROR_ALREADY_ACQUIRED: return "already acquired";
case CUDA_ERROR_NOT_MAPPED: return "not mapped";
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3000
case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: return "not mapped as array";
case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "not mapped as pointer";
#ifdef CUDAPP_POST_30_BETA
case CUDA_ERROR_ECC_UNCORRECTABLE: return "ECC uncorrectable";
#endif
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3010
case CUDA_ERROR_UNSUPPORTED_LIMIT: return "unsupported limit";
#if CUDAPP_CUDA_VERSION >= 4000
case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "context already in use";
#endif
case CUDA_ERROR_INVALID_SOURCE: return "invalid source";
case CUDA_ERROR_FILE_NOT_FOUND: return "file not found";
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3010
case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
return "shared object symbol not found";
case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
return "shared object init failed";
#endif
case CUDA_ERROR_INVALID_HANDLE: return "invalid handle";
case CUDA_ERROR_NOT_FOUND: return "not found";
case CUDA_ERROR_NOT_READY: return "not ready";
case CUDA_ERROR_LAUNCH_FAILED: return "launch failed";
case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "launch out of resources";
case CUDA_ERROR_LAUNCH_TIMEOUT: return "launch timeout";
case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "launch incompatible texturing";
#if CUDAPP_CUDA_VERSION >= 4000
case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "peer access already enabled";
case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "peer access not enabled";
case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "primary context active";
case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "context is destroyed";
#endif
Andreas Klöckner
committed
#if (CUDAPP_CUDA_VERSION >= 3000) && (CUDAPP_CUDA_VERSION < 3020)
return "attempted to retrieve 64-bit pointer via 32-bit api function";
return "attempted to retrieve 64-bit size via 32-bit api function";
#if CUDAPP_CUDA_VERSION >= 4010
case CUDA_ERROR_ASSERT:
return "device-side assert triggered";
case CUDA_ERROR_TOO_MANY_PEERS:
return "too many peers";
case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
return "host memory already registered";
case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
return "host memory not registered";
#endif
case CUDA_ERROR_UNKNOWN: return "unknown";
}
}
};
struct cannot_activate_out_of_thread_context : public std::logic_error
cannot_activate_out_of_thread_context(std::string const &w)
: std::logic_error(w)
{ }
};
struct cannot_activate_dead_context : public std::logic_error
cannot_activate_dead_context(std::string const &w)
: std::logic_error(w)
{ }
};
// {{{ version query ------------------------------------------------------------
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2020
inline int get_driver_version()
{
int result;
CUDAPP_CALL_GUARDED(cuDriverGetVersion, (&result));
return result;
}
#endif
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
class context;
class device
{
private:
CUdevice m_device;
public:
device(CUdevice dev)
: m_device(dev)
{ }
static int count()
{
int result;
CUDAPP_CALL_GUARDED(cuDeviceGetCount, (&result));
return result;
}
std::string name()
{
char buffer[1024];
CUDAPP_CALL_GUARDED(cuDeviceGetName, (buffer, sizeof(buffer), m_device));
return buffer;
}
#if CUDAPP_CUDA_VERSION >= 4010
std::string pci_bus_id()
{
char buffer[1024];
CUDAPP_CALL_GUARDED(cuDeviceGetPCIBusId, (buffer, sizeof(buffer), m_device));
return buffer;
}
#endif
py::tuple compute_capability()
{
int major, minor;
CUDAPP_CALL_GUARDED(cuDeviceComputeCapability, (&major, &minor, m_device));
return py::make_tuple(major, minor);
}
CUDAPP_CALL_GUARDED(cuDeviceTotalMem, (&bytes, m_device));
return bytes;
}
{
int result;
CUDAPP_CALL_GUARDED(cuDeviceGetAttribute, (&result, attr, m_device));
return result;
}
bool operator==(const device &other) const
{
return m_device == other.m_device;
}
bool operator!=(const device &other) const
{
return m_device != other.m_device;
}
{
return m_device;
}
boost::shared_ptr<context> make_context(unsigned int flags);
CUdevice handle() const
{ return m_device; }
#if CUDAPP_CUDA_VERSION >= 4000
bool can_access_peer(device const &other)
{
int result;
CUDAPP_CALL_GUARDED(cuDeviceCanAccessPeer, (&result, handle(), other.handle()));
return result;
}
#endif
void init(unsigned int flags)
{
CUDAPP_CALL_GUARDED(cuInit, (flags));
device *make_device(int ordinal)
CUdevice result;
CUDAPP_CALL_GUARDED(cuDeviceGet, (&result, ordinal));
return new device(result);
}
#if CUDAPP_CUDA_VERSION >= 4010
inline
device *make_device_from_pci_bus_id(std::string const pci_bus_id)
{
CUdevice result;
CUDAPP_CALL_GUARDED(cuDeviceGetByPCIBusId, (&result,
const_cast<char *>(pci_bus_id.c_str())));
return new device(result);
}
#endif
/* A word on context management: We don't let CUDA's context stack get more
* than one deep. CUDA only supports pushing floating contexts. We may wish
* to push contexts that are already active at a deeper stack level, so we
* maintain all contexts floating other than the top one.
*/
namespace gl {
boost::shared_ptr<context>
make_gl_context(device const &dev, unsigned int flags);
}
Andreas Klöckner
committed
class context_stack;
extern boost::thread_specific_ptr<context_stack> context_stack_ptr;
Andreas Klöckner
committed
class context_stack
{
Andreas Klöckner
committed
/* This wrapper is necessary because we need to pop the contents
* off the stack before we destroy each of the contexts. This, in turn,
* is because the contexts need to be able to access the stack in order
* to be destroyed.
*/
private:
typedef std::stack<boost::shared_ptr<context> > stack_t;
typedef stack_t::value_type value_type;;
stack_t m_stack;
Andreas Klöckner
committed
public:
~context_stack();
bool empty() const
{ return m_stack.empty(); }
value_type &top()
{ return m_stack.top(); }
void pop()
{ m_stack.pop(); }
void push(value_type v)
{ m_stack.push(v); }
static context_stack &get()
{
if (context_stack_ptr.get() == 0)
context_stack_ptr.reset(new context_stack);
return *context_stack_ptr;
}
};
class context : boost::noncopyable
{
private:
CUcontext m_context;
bool m_valid;
unsigned m_use_count;
boost::thread::id m_thread;
context(CUcontext ctx)
: m_context(ctx), m_valid(true), m_use_count(1),
m_thread(boost::this_thread::get_id())
{ }
~context()
Andreas Klöckner
committed
/* It's possible that we get here with a non-zero m_use_count. Since the context
* stack holds shared_ptrs, this must mean that the context stack itself is getting
* destroyed, which means it's ok for this context to sign off, too.
*/
CUcontext handle() const
{ return m_context; }
bool operator==(const context &other) const
{
return m_context == other.m_context;
}
bool operator!=(const context &other) const
{
return m_context != other.m_context;
}
return hash_type(m_context) ^ hash_type(this);
boost::thread::id thread_id() const
{ return m_thread; }
bool is_valid() const
{
return m_valid;
}
void detach()
{
if (m_valid)
{
bool active_before_destruction = current_context().get() == this;
if (active_before_destruction)
CUDAPP_CALL_GUARDED_CLEANUP(cuCtxDetach, (m_context));
{
if (m_thread == boost::this_thread::get_id())
{
Andreas Klöckner
committed
CUDAPP_CALL_GUARDED_CLEANUP(cuCtxPushCurrent, (m_context));
CUDAPP_CALL_GUARDED_CLEANUP(cuCtxDetach, (m_context));
Andreas Klöckner
committed
/* pop is implicit in detach */
}
else
{
// In all likelihood, this context's managing thread has exited, and
// therefore this context has already been deleted. No need to harp
// on the fact that we still thought there was cleanup to do.
// std::cerr << "PyCUDA WARNING: leaked out-of-thread context " << std::endl;
}
}
m_valid = false;
if (active_before_destruction)
{
boost::shared_ptr<context> new_active = current_context(this);
if (new_active.get())
{
CUDAPP_CALL_GUARDED(cuCtxPushCurrent, (new_active->m_context));
}
else
throw error("context::detach", CUDA_ERROR_INVALID_CONTEXT,
"cannot detach from invalid context");
}
static device get_device()
CUDAPP_CALL_GUARDED(cuCtxGetDevice, (&dev));
return device(dev);
}
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
static void prepare_context_switch()
{
Andreas Klöckner
committed
if (!context_stack::get().empty())
{
CUcontext popped;
CUDAPP_CALL_GUARDED(cuCtxPopCurrent, (&popped));
prepare_context_switch();
Andreas Klöckner
committed
context_stack &ctx_stack = context_stack::get();
Andreas Klöckner
committed
if (ctx_stack.empty())
{
throw error("context::pop", CUDA_ERROR_INVALID_CONTEXT,
"cannot pop non-current context");
}
boost::shared_ptr<context> current = current_context();
if (current)
--current->m_use_count;
current = current_context();
CUDAPP_CALL_GUARDED(cuCtxPushCurrent, (current_context()->m_context));
}
#else
static void prepare_context_switch() { }
#endif
static void synchronize()
{ CUDAPP_CALL_GUARDED_THREADED(cuCtxSynchronize, ()); }
static boost::shared_ptr<context> current_context(context *except=0)
while (true)
{
Andreas Klöckner
committed
if (context_stack::get().empty())
return boost::shared_ptr<context>();
Andreas Klöckner
committed
boost::shared_ptr<context> result(context_stack::get().top());
&& result->is_valid())
// good, weak pointer didn't expire
return result;
// context invalid, pop it and try again.
Andreas Klöckner
committed
context_stack::get().pop();
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3010
static void set_limit(CUlimit limit, size_t value)
{
CUDAPP_CALL_GUARDED(cuCtxSetLimit, (limit, value));
}
static size_t get_limit(CUlimit limit)
{
size_t value;
CUDAPP_CALL_GUARDED(cuCtxGetLimit, (&value, limit));
return value;
}
#endif
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3020
static CUfunc_cache get_cache_config()
{
CUfunc_cache value;
CUDAPP_CALL_GUARDED(cuCtxGetCacheConfig, (&value));
return value;
}
static void set_cache_config(CUfunc_cache cc)
{
CUDAPP_CALL_GUARDED(cuCtxSetCacheConfig, (cc));
}
unsigned int get_api_version()
{
unsigned int value;
CUDAPP_CALL_GUARDED(cuCtxGetApiVersion, (m_context, &value));
return value;
}
#endif
#if CUDAPP_CUDA_VERSION >= 4000
static void enable_peer_access(context const &peer, unsigned int flags)
{
CUDAPP_CALL_GUARDED(cuCtxEnablePeerAccess, (peer.handle(), flags));
}
static void disable_peer_access(context const &peer)
{
CUDAPP_CALL_GUARDED(cuCtxDisablePeerAccess, (peer.handle()));
}
#endif
friend class device;
friend void context_push(boost::shared_ptr<context> ctx);
gl::make_gl_context(device const &dev, unsigned int flags);
};
boost::shared_ptr<context> device::make_context(unsigned int flags)
{
context::prepare_context_switch();
CUcontext ctx;
CUDAPP_CALL_GUARDED(cuCtxCreate, (&ctx, flags, m_device));
boost::shared_ptr<context> result(new context(ctx));
Andreas Klöckner
committed
context_stack::get().push(result);
return result;
}
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
void context_push(boost::shared_ptr<context> ctx)
context::prepare_context_switch();
CUDAPP_CALL_GUARDED(cuCtxPushCurrent, (ctx->m_context));
Andreas Klöckner
committed
context_stack::get().push(ctx);
++ctx->m_use_count;
}
#endif
Andreas Klöckner
committed
inline context_stack::~context_stack()
{
if (!m_stack.empty())
{
Andreas Klöckner
committed
<< "-------------------------------------------------------------------" << std::endl
<< "PyCUDA ERROR: The context stack was not empty upon module cleanup." << std::endl
<< "-------------------------------------------------------------------" << std::endl
<< "A context was still active when the context stack was being" << std::endl
<< "cleaned up. At this point in our execution, CUDA may already" << std::endl
<< "have been deinitialized, so there is no way we can finish" << std::endl
<< "cleanly. The program will be aborted now." << std::endl
<< "Use Context.pop() to avoid this problem." << std::endl
<< "-------------------------------------------------------------------" << std::endl;
abort();
}
}
class explicit_context_dependent
{
private:
boost::shared_ptr<context> m_ward_context;
public:
void acquire_context()
{
m_ward_context = context::current_context();
if (m_ward_context.get() == 0)
throw error("explicit_context_dependent",
CUDA_ERROR_INVALID_CONTEXT,
"no currently active context?");
}
void release_context()
{
m_ward_context.reset();
}
boost::shared_ptr<context> get_context()
{
return m_ward_context;
}
class context_dependent : public explicit_context_dependent
{
private:
boost::shared_ptr<context> m_ward_context;
public:
context_dependent()
{ acquire_context(); }
};
class scoped_context_activation
{
private:
boost::shared_ptr<context> m_context;
bool m_did_switch;
public:
scoped_context_activation(boost::shared_ptr<context> ctx)
: m_context(ctx)
if (!m_context->is_valid())
throw pycuda::cannot_activate_dead_context(
"cannot activate dead context");
m_did_switch = context::current_context() != m_context;
if (m_did_switch)
{
if (boost::this_thread::get_id() != m_context->thread_id())
throw pycuda::cannot_activate_out_of_thread_context(
"cannot activate out-of-thread context");
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
context_push(m_context);
#else
throw pycuda::error("scoped_context_activation", CUDA_ERROR_INVALID_CONTEXT,
"not available in CUDA < 2.0");
#endif
}
~scoped_context_activation()
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
if (m_did_switch)
m_context->pop();
#endif
class stream : public boost::noncopyable, public context_dependent
{
private:
CUstream m_stream;
public:
stream(unsigned int flags=0)
{ CUDAPP_CALL_GUARDED(cuStreamCreate, (&m_stream, flags)); }
~stream()
try
{
CUDAPP_CALL_GUARDED_CLEANUP(cuStreamDestroy, (m_stream));
}
CUDAPP_CATCH_CLEANUP_ON_DEAD_CONTEXT(stream);
void synchronize()
{ CUDAPP_CALL_GUARDED_THREADED(cuStreamSynchronize, (m_stream)); }
CUstream handle() const
{ return m_stream; }
intptr_t handle_int() const
{ return (intptr_t) m_stream; }
#if CUDAPP_CUDA_VERSION >= 3020
void wait_for_event(const event &evt);
#endif
bool is_done() const
CUDAPP_PRINT_CALL_TRACE("cuStreamQuery");
CUresult result = cuStreamQuery(m_stream);
switch (result)
{
return false;
default:
CUDAPP_PRINT_ERROR_TRACE("cuStreamQuery", result);
throw error("cuStreamQuery", result);
}
}
};
class array : public boost::noncopyable, public context_dependent
{
private:
CUarray m_array;
bool m_managed;
public:
array(const CUDA_ARRAY_DESCRIPTOR &descr)
{ CUDAPP_CALL_GUARDED(cuArrayCreate, (&m_array, &descr)); }
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
array(const CUDA_ARRAY3D_DESCRIPTOR &descr)
{ CUDAPP_CALL_GUARDED(cuArray3DCreate, (&m_array, &descr)); }
#endif
array(CUarray ary, bool managed)
{ }
~array()
{ free(); }
void free()
{
if (m_managed)
{
try
{
scoped_context_activation ca(get_context());
CUDAPP_CALL_GUARDED_CLEANUP(cuArrayDestroy, (m_array));
CUDAPP_CATCH_CLEANUP_ON_DEAD_CONTEXT(array);
m_managed = false;
release_context();
}
}
CUDA_ARRAY_DESCRIPTOR get_descriptor()
{
CUDA_ARRAY_DESCRIPTOR result;
CUDAPP_CALL_GUARDED(cuArrayGetDescriptor, (&result, m_array));
return result;
}
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
CUDA_ARRAY3D_DESCRIPTOR get_descriptor_3d()
{
CUDA_ARRAY3D_DESCRIPTOR result;
CUDAPP_CALL_GUARDED(cuArray3DGetDescriptor, (&result, m_array));
return result;
}
#endif
CUarray handle() const
{ return m_array; }
};
class module;
class texture_reference : public boost::noncopyable
{
private:
CUtexref m_texref;
bool m_managed;
// life support for array and module
boost::shared_ptr<array> m_array;