Newer
Older
// A C++ wrapper for CUDA
#ifndef _AFJDFJSDFSD_PYCUDA_HEADER_SEEN_CUDA_HPP
#define _AFJDFJSDFSD_PYCUDA_HEADER_SEEN_CUDA_HPP
#include <cuda.h>
#ifdef CUDAPP_PRETEND_CUDA_VERSION
#define CUDAPP_CUDA_VERSION CUDAPP_PRETEND_CUDA_VERSION
#else
#define CUDAPP_CUDA_VERSION CUDA_VERSION
#endif
#if CUDAPP_CUDA_VERSION >= 4000
#include <cudaProfiler.h>
#endif
#include <stdint.h>
#include <stdexcept>
#include <boost/shared_ptr.hpp>
#include <boost/foreach.hpp>
#include <utility>
#include <stack>
#include <iostream>
#include <vector>
#include <boost/python.hpp>
#include <boost/thread/thread.hpp>
#include <boost/thread/tss.hpp>
#include <boost/version.hpp>
#if (BOOST_VERSION/100) < 1035
#warning *****************************************************************
#warning **** Your version of Boost C++ is likely too old for PyCUDA. ****
#warning *****************************************************************
// TODO: cuMemcpy, cuMemcpyPeer, cuMemcpyPeerAsync
// TODO: in structured memcpy: set_{src,dest}_unified()
//
// TODO: cuCtxSetCurrent, cuCtxGetCurrent
// (use once the old, deprecated functions have been removed from CUDA)
// #define CUDAPP_TRACE_CUDA
#define CUDAPP_POST_30_BETA
Andreas Klöckner
committed
#ifdef CUDAPP_PRETEND_CUDA_VERSION
#define CUDAPP_CUDA_VERSION CUDAPP_PRETEND_CUDA_VERSION
#else
#define CUDAPP_CUDA_VERSION CUDA_VERSION
#endif
#if PY_VERSION_HEX >= 0x02050000
typedef Py_ssize_t PYCUDA_BUFFER_SIZE_T;
#else
typedef int PYCUDA_BUFFER_SIZE_T;
#endif
#ifdef CUDAPP_TRACE_CUDA
#define CUDAPP_PRINT_CALL_TRACE(NAME) \
std::cerr << NAME << std::endl;
#define CUDAPP_PRINT_CALL_TRACE_INFO(NAME, EXTRA_INFO) \
std::cerr << NAME << " (" << EXTRA_INFO << ')' << std::endl;
#define CUDAPP_PRINT_ERROR_TRACE(NAME, CODE) \
if (CODE != CUDA_SUCCESS) \
std::cerr << NAME << " failed with code " << CODE << std::endl;
#else
#define CUDAPP_PRINT_CALL_TRACE(NAME) /*nothing*/
#define CUDAPP_PRINT_CALL_TRACE_INFO(NAME, EXTRA_INFO) /*nothing*/
#define CUDAPP_PRINT_ERROR_TRACE(NAME, CODE) /*nothing*/
#define CUDAPP_CALL_GUARDED_THREADED_WITH_TRACE_INFO(NAME, ARGLIST, TRACE_INFO) \
{ \
CUDAPP_PRINT_CALL_TRACE_INFO(#NAME, TRACE_INFO); \
CUresult cu_status_code; \
Py_BEGIN_ALLOW_THREADS \
cu_status_code = NAME ARGLIST; \
Py_END_ALLOW_THREADS \
if (cu_status_code != CUDA_SUCCESS) \
throw pycuda::error(#NAME, cu_status_code);\
}
#define CUDAPP_CALL_GUARDED_WITH_TRACE_INFO(NAME, ARGLIST, TRACE_INFO) \
{ \
CUDAPP_PRINT_CALL_TRACE_INFO(#NAME, TRACE_INFO); \
CUresult cu_status_code; \
cu_status_code = NAME ARGLIST; \
CUDAPP_PRINT_ERROR_TRACE(#NAME, cu_status_code); \
if (cu_status_code != CUDA_SUCCESS) \
throw pycuda::error(#NAME, cu_status_code);\
#define CUDAPP_CALL_GUARDED_THREADED(NAME, ARGLIST) \
CUDAPP_PRINT_CALL_TRACE(#NAME); \
CUresult cu_status_code; \
Py_BEGIN_ALLOW_THREADS \
cu_status_code = NAME ARGLIST; \
Py_END_ALLOW_THREADS \
CUDAPP_PRINT_ERROR_TRACE(#NAME, cu_status_code); \
if (cu_status_code != CUDA_SUCCESS) \
throw pycuda::error(#NAME, cu_status_code);\
#define CUDAPP_CALL_GUARDED(NAME, ARGLIST) \
{ \
CUDAPP_PRINT_CALL_TRACE(#NAME); \
CUresult cu_status_code; \
cu_status_code = NAME ARGLIST; \
CUDAPP_PRINT_ERROR_TRACE(#NAME, cu_status_code); \
if (cu_status_code != CUDA_SUCCESS) \
throw pycuda::error(#NAME, cu_status_code);\
#define CUDAPP_CALL_GUARDED_CLEANUP(NAME, ARGLIST) \
{ \
CUDAPP_PRINT_CALL_TRACE(#NAME); \
CUresult cu_status_code; \
cu_status_code = NAME ARGLIST; \
CUDAPP_PRINT_ERROR_TRACE(#NAME, cu_status_code); \
if (cu_status_code != CUDA_SUCCESS) \
std::cerr \
<< "PyCUDA WARNING: a clean-up operation failed (dead context maybe?)" \
<< std::endl \
<< pycuda::error::make_message(#NAME, cu_status_code) \
<< std::endl; \
}
#define CUDAPP_CATCH_CLEANUP_ON_DEAD_CONTEXT(TYPE) \
catch (pycuda::cannot_activate_out_of_thread_context) \
catch (pycuda::cannot_activate_dead_context) \
{ \
/* PyErr_Warn( \
PyExc_UserWarning, #TYPE " in dead context was implicitly cleaned up");*/ \
}
// In all likelihood, this TYPE's managing thread has exited, and
// therefore its context has already been deleted. No need to harp
// on the fact that we still thought there was cleanup to do.
{
namespace py = boost::python;
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3020
size_t
#else
unsigned int
#endif
pycuda_size_t;
typedef
#if defined(_WIN32) && defined(_WIN64)
long long
#else
long
#endif
hash_type;
class error : public std::runtime_error
{
private:
const char *m_routine;
CUresult m_code;
public:
static std::string make_message(const char *rout, CUresult c, const char *msg=0)
{
std::string result = rout;
result += " failed: ";
result += curesult_to_str(c);
if (msg)
{
result += " - ";
result += msg;
}
return result;
}
error(const char *rout, CUresult c, const char *msg=0)
: std::runtime_error(make_message(rout, c, msg)),
m_routine(rout), m_code(c)
{ }
const char *routine() const
{
return m_routine;
}
CUresult code() const
{
return m_code;
}
bool is_out_of_memory() const
{
return code() == CUDA_ERROR_OUT_OF_MEMORY;
}
static const char *curesult_to_str(CUresult e)
{
switch (e)
{
case CUDA_SUCCESS: return "success";
case CUDA_ERROR_INVALID_VALUE: return "invalid value";
case CUDA_ERROR_OUT_OF_MEMORY: return "out of memory";
case CUDA_ERROR_NOT_INITIALIZED: return "not initialized";
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
case CUDA_ERROR_DEINITIALIZED: return "deinitialized";
#endif
#if CUDAPP_CUDA_VERSION >= 4000
case CUDA_ERROR_PROFILER_DISABLED: return "profiler disabled";
case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "profiler not initialized";
case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "profiler already started";
case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "profiler already stopped";
#endif
case CUDA_ERROR_NO_DEVICE: return "no device";
case CUDA_ERROR_INVALID_DEVICE: return "invalid device";
case CUDA_ERROR_INVALID_IMAGE: return "invalid image";
case CUDA_ERROR_INVALID_CONTEXT: return "invalid context";
case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: return "context already current";
case CUDA_ERROR_MAP_FAILED: return "map failed";
case CUDA_ERROR_UNMAP_FAILED: return "unmap failed";
case CUDA_ERROR_ARRAY_IS_MAPPED: return "array is mapped";
case CUDA_ERROR_ALREADY_MAPPED: return "already mapped";
case CUDA_ERROR_NO_BINARY_FOR_GPU: return "no binary for gpu";
case CUDA_ERROR_ALREADY_ACQUIRED: return "already acquired";
case CUDA_ERROR_NOT_MAPPED: return "not mapped";
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3000
case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: return "not mapped as array";
case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "not mapped as pointer";
#ifdef CUDAPP_POST_30_BETA
case CUDA_ERROR_ECC_UNCORRECTABLE: return "ECC uncorrectable";
#endif
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3010
case CUDA_ERROR_UNSUPPORTED_LIMIT: return "unsupported limit";
#if CUDAPP_CUDA_VERSION >= 4000
case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "context already in use";
#endif
case CUDA_ERROR_INVALID_SOURCE: return "invalid source";
case CUDA_ERROR_FILE_NOT_FOUND: return "file not found";
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3010
case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
return "shared object symbol not found";
case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
return "shared object init failed";
#endif
case CUDA_ERROR_INVALID_HANDLE: return "invalid handle";
case CUDA_ERROR_NOT_FOUND: return "not found";
case CUDA_ERROR_NOT_READY: return "not ready";
case CUDA_ERROR_LAUNCH_FAILED: return "launch failed";
case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "launch out of resources";
case CUDA_ERROR_LAUNCH_TIMEOUT: return "launch timeout";
case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "launch incompatible texturing";
#if CUDAPP_CUDA_VERSION >= 4000
case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "peer access already enabled";
case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "peer access not enabled";
case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "primary context active";
case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "context is destroyed";
#endif
Andreas Klöckner
committed
#if (CUDAPP_CUDA_VERSION >= 3000) && (CUDAPP_CUDA_VERSION < 3020)
return "attempted to retrieve 64-bit pointer via 32-bit api function";
return "attempted to retrieve 64-bit size via 32-bit api function";
case CUDA_ERROR_UNKNOWN: return "unknown";
default: return "invalid error code";
}
}
};
struct cannot_activate_out_of_thread_context : public std::logic_error
cannot_activate_out_of_thread_context(std::string const &w)
: std::logic_error(w)
{ }
};
struct cannot_activate_dead_context : public std::logic_error
cannot_activate_dead_context(std::string const &w)
: std::logic_error(w)
{ }
};
// {{{ version query ------------------------------------------------------------
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2020
inline int get_driver_version()
{
int result;
CUDAPP_CALL_GUARDED(cuDriverGetVersion, (&result));
return result;
}
#endif
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
class context;
class device
{
private:
CUdevice m_device;
public:
device(CUdevice dev)
: m_device(dev)
{ }
static int count()
{
int result;
CUDAPP_CALL_GUARDED(cuDeviceGetCount, (&result));
return result;
}
std::string name()
{
char buffer[1024];
CUDAPP_CALL_GUARDED(cuDeviceGetName, (buffer, sizeof(buffer), m_device));
return buffer;
}
py::tuple compute_capability()
{
int major, minor;
CUDAPP_CALL_GUARDED(cuDeviceComputeCapability, (&major, &minor, m_device));
return py::make_tuple(major, minor);
}
CUDAPP_CALL_GUARDED(cuDeviceTotalMem, (&bytes, m_device));
return bytes;
}
{
int result;
CUDAPP_CALL_GUARDED(cuDeviceGetAttribute, (&result, attr, m_device));
return result;
}
bool operator==(const device &other) const
{
return m_device == other.m_device;
}
bool operator!=(const device &other) const
{
return m_device != other.m_device;
}
{
return m_device;
}
boost::shared_ptr<context> make_context(unsigned int flags);
CUdevice handle() const
{ return m_device; }
#if CUDAPP_CUDA_VERSION >= 4000
bool can_access_peer(device const &other)
{
int result;
CUDAPP_CALL_GUARDED(cuDeviceCanAccessPeer, (&result, handle(), other.handle()));
return result;
}
#endif
void init(unsigned int flags)
{
CUDAPP_CALL_GUARDED(cuInit, (flags));
device *make_device(int ordinal)
CUdevice result;
CUDAPP_CALL_GUARDED(cuDeviceGet, (&result, ordinal));
return new device(result);
}
/* A word on context management: We don't let CUDA's context stack get more
* than one deep. CUDA only supports pushing floating contexts. We may wish
* to push contexts that are already active at a deeper stack level, so we
* maintain all contexts floating other than the top one.
*/
namespace gl {
boost::shared_ptr<context>
make_gl_context(device const &dev, unsigned int flags);
}
Andreas Klöckner
committed
class context_stack;
extern boost::thread_specific_ptr<context_stack> context_stack_ptr;
Andreas Klöckner
committed
class context_stack
{
Andreas Klöckner
committed
/* This wrapper is necessary because we need to pop the contents
* off the stack before we destroy each of the contexts. This, in turn,
* is because the contexts need to be able to access the stack in order
* to be destroyed.
*/
private:
typedef std::stack<boost::shared_ptr<context> > stack_t;
typedef stack_t::value_type value_type;;
stack_t m_stack;
Andreas Klöckner
committed
public:
~context_stack();
bool empty() const
{ return m_stack.empty(); }
value_type &top()
{ return m_stack.top(); }
void pop()
{ m_stack.pop(); }
void push(value_type v)
{ m_stack.push(v); }
static context_stack &get()
{
if (context_stack_ptr.get() == 0)
context_stack_ptr.reset(new context_stack);
return *context_stack_ptr;
}
};
class context : boost::noncopyable
{
private:
CUcontext m_context;
bool m_valid;
unsigned m_use_count;
boost::thread::id m_thread;
context(CUcontext ctx)
: m_context(ctx), m_valid(true), m_use_count(1),
m_thread(boost::this_thread::get_id())
{ }
~context()
Andreas Klöckner
committed
/* It's possible that we get here with a non-zero m_use_count. Since the context
* stack holds shared_ptrs, this must mean that the context stack itself is getting
* destroyed, which means it's ok for this context to sign off, too.
*/
CUcontext handle() const
{ return m_context; }
bool operator==(const context &other) const
{
return m_context == other.m_context;
}
bool operator!=(const context &other) const
{
return m_context != other.m_context;
}
return hash_type(m_context) ^ hash_type(this);
boost::thread::id thread_id() const
{ return m_thread; }
bool is_valid() const
{
return m_valid;
}
void detach()
{
if (m_valid)
{
bool active_before_destruction = current_context().get() == this;
if (active_before_destruction)
CUDAPP_CALL_GUARDED_CLEANUP(cuCtxDetach, (m_context));
{
if (m_thread == boost::this_thread::get_id())
{
Andreas Klöckner
committed
CUDAPP_CALL_GUARDED_CLEANUP(cuCtxPushCurrent, (m_context));
CUDAPP_CALL_GUARDED_CLEANUP(cuCtxDetach, (m_context));
Andreas Klöckner
committed
/* pop is implicit in detach */
}
else
{
// In all likelihood, this context's managing thread has exited, and
// therefore this context has already been deleted. No need to harp
// on the fact that we still thought there was cleanup to do.
// std::cerr << "PyCUDA WARNING: leaked out-of-thread context " << std::endl;
}
}
m_valid = false;
if (active_before_destruction)
{
boost::shared_ptr<context> new_active = current_context(this);
if (new_active.get())
{
CUDAPP_CALL_GUARDED(cuCtxPushCurrent, (new_active->m_context));
}
else
throw error("context::detach", CUDA_ERROR_INVALID_CONTEXT,
"cannot detach from invalid context");
}
static device get_device()
CUDAPP_CALL_GUARDED(cuCtxGetDevice, (&dev));
return device(dev);
}
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
static void prepare_context_switch()
{
Andreas Klöckner
committed
if (!context_stack::get().empty())
{
CUcontext popped;
CUDAPP_CALL_GUARDED(cuCtxPopCurrent, (&popped));
prepare_context_switch();
Andreas Klöckner
committed
context_stack &ctx_stack = context_stack::get();
Andreas Klöckner
committed
if (ctx_stack.empty())
{
throw error("context::pop", CUDA_ERROR_INVALID_CONTEXT,
"cannot pop non-current context");
}
boost::shared_ptr<context> current = current_context();
if (current)
--current->m_use_count;
current = current_context();
CUDAPP_CALL_GUARDED(cuCtxPushCurrent, (current_context()->m_context));
}
#else
static void prepare_context_switch() { }
#endif
static void synchronize()
{ CUDAPP_CALL_GUARDED_THREADED(cuCtxSynchronize, ()); }
static boost::shared_ptr<context> current_context(context *except=0)
while (true)
{
Andreas Klöckner
committed
if (context_stack::get().empty())
return boost::shared_ptr<context>();
Andreas Klöckner
committed
boost::shared_ptr<context> result(context_stack::get().top());
&& result->is_valid())
// good, weak pointer didn't expire
return result;
// context invalid, pop it and try again.
Andreas Klöckner
committed
context_stack::get().pop();
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3010
static void set_limit(CUlimit limit, size_t value)
{
CUDAPP_CALL_GUARDED(cuCtxSetLimit, (limit, value));
}
static size_t get_limit(CUlimit limit)
{
size_t value;
CUDAPP_CALL_GUARDED(cuCtxGetLimit, (&value, limit));
return value;
}
#endif
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 3020
static CUfunc_cache get_cache_config()
{
CUfunc_cache value;
CUDAPP_CALL_GUARDED(cuCtxGetCacheConfig, (&value));
return value;
}
static void set_cache_config(CUfunc_cache cc)
{
CUDAPP_CALL_GUARDED(cuCtxSetCacheConfig, (cc));
}
unsigned int get_api_version()
{
unsigned int value;
CUDAPP_CALL_GUARDED(cuCtxGetApiVersion, (m_context, &value));
return value;
}
#endif
#if CUDAPP_CUDA_VERSION >= 4000
static void enable_peer_access(context const &peer, unsigned int flags)
{
CUDAPP_CALL_GUARDED(cuCtxEnablePeerAccess, (peer.handle(), flags));
}
static void disable_peer_access(context const &peer)
{
CUDAPP_CALL_GUARDED(cuCtxDisablePeerAccess, (peer.handle()));
}
#endif
friend class device;
friend void context_push(boost::shared_ptr<context> ctx);
gl::make_gl_context(device const &dev, unsigned int flags);
};
boost::shared_ptr<context> device::make_context(unsigned int flags)
{
context::prepare_context_switch();
CUcontext ctx;
CUDAPP_CALL_GUARDED(cuCtxCreate, (&ctx, flags, m_device));
boost::shared_ptr<context> result(new context(ctx));
Andreas Klöckner
committed
context_stack::get().push(result);
return result;
}
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
void context_push(boost::shared_ptr<context> ctx)
context::prepare_context_switch();
CUDAPP_CALL_GUARDED(cuCtxPushCurrent, (ctx->m_context));
Andreas Klöckner
committed
context_stack::get().push(ctx);
++ctx->m_use_count;
}
#endif
Andreas Klöckner
committed
inline context_stack::~context_stack()
{
if (!m_stack.empty())
{
Andreas Klöckner
committed
<< "-------------------------------------------------------------------" << std::endl
<< "PyCUDA ERROR: The context stack was not empty upon module cleanup." << std::endl
<< "-------------------------------------------------------------------" << std::endl
<< "A context was still active when the context stack was being" << std::endl
<< "cleaned up. At this point in our execution, CUDA may already" << std::endl
<< "have been deinitialized, so there is no way we can finish" << std::endl
<< "cleanly. The program will be aborted now." << std::endl
<< "Use Context.pop() to avoid this problem." << std::endl
<< "-------------------------------------------------------------------" << std::endl;
abort();
}
}
class explicit_context_dependent
{
private:
boost::shared_ptr<context> m_ward_context;
public:
void acquire_context()
{
m_ward_context = context::current_context();
if (m_ward_context.get() == 0)
throw error("explicit_context_dependent",
CUDA_ERROR_INVALID_CONTEXT,
"no currently active context?");
}
void release_context()
{
m_ward_context.reset();
}
boost::shared_ptr<context> get_context()
{
return m_ward_context;
}
class context_dependent : public explicit_context_dependent
{
private:
boost::shared_ptr<context> m_ward_context;
public:
context_dependent()
{ acquire_context(); }
};
class scoped_context_activation
{
private:
boost::shared_ptr<context> m_context;
bool m_did_switch;
public:
scoped_context_activation(boost::shared_ptr<context> ctx)
: m_context(ctx)
if (!m_context->is_valid())
throw pycuda::cannot_activate_dead_context(
"cannot activate dead context");
m_did_switch = context::current_context() != m_context;
if (m_did_switch)
{
if (boost::this_thread::get_id() != m_context->thread_id())
throw pycuda::cannot_activate_out_of_thread_context(
"cannot activate out-of-thread context");
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
context_push(m_context);
#else
throw pycuda::error("scoped_context_activation", CUDA_ERROR_INVALID_CONTEXT,
"not available in CUDA < 2.0");
#endif
}
~scoped_context_activation()
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
if (m_did_switch)
m_context->pop();
#endif
class stream : public boost::noncopyable, public context_dependent
{
private:
CUstream m_stream;
public:
stream(unsigned int flags=0)
{ CUDAPP_CALL_GUARDED(cuStreamCreate, (&m_stream, flags)); }
~stream()
try
{
CUDAPP_CALL_GUARDED_CLEANUP(cuStreamDestroy, (m_stream));
}
CUDAPP_CATCH_CLEANUP_ON_DEAD_CONTEXT(stream);
void synchronize()
{ CUDAPP_CALL_GUARDED_THREADED(cuStreamSynchronize, (m_stream)); }
CUstream handle() const
{ return m_stream; }
intptr_t handle_int() const
{ return (intptr_t) m_stream; }
#if CUDAPP_CUDA_VERSION >= 3020
void wait_for_event(const event &evt);
#endif
bool is_done() const
CUDAPP_PRINT_CALL_TRACE("cuStreamQuery");
CUresult result = cuStreamQuery(m_stream);
switch (result)
{
return false;
default:
CUDAPP_PRINT_ERROR_TRACE("cuStreamQuery", result);
throw error("cuStreamQuery", result);
}
}
};
class array : public boost::noncopyable, public context_dependent
{
private:
CUarray m_array;
bool m_managed;
public:
array(const CUDA_ARRAY_DESCRIPTOR &descr)
{ CUDAPP_CALL_GUARDED(cuArrayCreate, (&m_array, &descr)); }
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
array(const CUDA_ARRAY3D_DESCRIPTOR &descr)
{ CUDAPP_CALL_GUARDED(cuArray3DCreate, (&m_array, &descr)); }
#endif
array(CUarray ary, bool managed)
{ }
~array()
{ free(); }
void free()
{
if (m_managed)
{
try
{
scoped_context_activation ca(get_context());
CUDAPP_CALL_GUARDED_CLEANUP(cuArrayDestroy, (m_array));
CUDAPP_CATCH_CLEANUP_ON_DEAD_CONTEXT(array);
m_managed = false;
release_context();
}
}
CUDA_ARRAY_DESCRIPTOR get_descriptor()
{
CUDA_ARRAY_DESCRIPTOR result;
CUDAPP_CALL_GUARDED(cuArrayGetDescriptor, (&result, m_array));
return result;
}
Andreas Klöckner
committed
#if CUDAPP_CUDA_VERSION >= 2000
CUDA_ARRAY3D_DESCRIPTOR get_descriptor_3d()
{
CUDA_ARRAY3D_DESCRIPTOR result;
CUDAPP_CALL_GUARDED(cuArray3DGetDescriptor, (&result, m_array));
return result;
}
#endif
CUarray handle() const
{ return m_array; }
};
class module;
class texture_reference : public boost::noncopyable
{
private:
CUtexref m_texref;
bool m_managed;
// life support for array and module
boost::shared_ptr<array> m_array;
boost::shared_ptr<module> m_module;
public:
texture_reference()
: m_managed(true)
{ CUDAPP_CALL_GUARDED(cuTexRefCreate, (&m_texref)); }
texture_reference(CUtexref tr, bool managed)
: m_texref(tr), m_managed(managed)
{ }
~texture_reference()
if (m_managed)
{
CUDAPP_CALL_GUARDED_CLEANUP(cuTexRefDestroy, (m_texref));
}
}
void set_module(boost::shared_ptr<module> mod)
{ m_module = mod; }
CUtexref handle() const
{ return m_texref; }
void set_array(boost::shared_ptr<array> ary)
{
CUDAPP_CALL_GUARDED(cuTexRefSetArray, (m_texref,
ary->handle(), CU_TRSA_OVERRIDE_FORMAT));
m_array = ary;
}
pycuda_size_t set_address(CUdeviceptr dptr, unsigned int bytes, bool allow_offset=false)
CUDAPP_CALL_GUARDED(cuTexRefSetAddress, (&byte_offset,
if (!allow_offset && byte_offset != 0)
throw pycuda::error("texture_reference::set_address", CUDA_ERROR_INVALID_VALUE,
"texture binding resulted in offset, but allow_offset was false");