Skip to content
cuda.hpp 56.4 KiB
Newer Older
  struct managed_allocation : public device_allocation
      managed_allocation(size_t bytesize, unsigned flags=0)
        : device_allocation(mem_managed_alloc(bytesize, flags))
      { }

      // The device pointer is also valid on the host
      void *data()
      void attach(unsigned flags, py::object stream_py)
      {
        PYCUDA_PARSE_STREAM_PY;

        CUDAPP_CALL_GUARDED(cuStreamAttachMemAsync, (s_handle, m_devptr, 0, flags));
      }

#if CUDAPP_CUDA_VERSION >= 4000
  struct registered_host_memory : public host_pointer
  {
    private:
      py::object m_base;

    public:
      registered_host_memory(void *p, size_t bytes, unsigned int flags=0,
          py::object base=py::object())
        : host_pointer(mem_host_register(p, bytes, flags)), m_base(base)
      {
      }

      /* Don't try to be clever and coalesce these in the base class.
       * Won't work: Destructors may not call virtual functions.
       */
      ~registered_host_memory()
      {
        if (m_valid)
          free();
      }

      void free()
      {
        if (m_valid)
        {
          try
          {
            scoped_context_activation ca(get_context());
            mem_host_unregister(m_data);
          }
          CUDAPP_CATCH_CLEANUP_ON_DEAD_CONTEXT(host_allocation);
          release_context();
          m_valid = false;
        }
        else
          throw pycuda::error("registered_host_memory::free", CUDA_ERROR_INVALID_HANDLE);
      }

      py::object base() const
      {
        return m_base;
      }
  // }}}
  // {{{ event
Andreas Kloeckner's avatar
Andreas Kloeckner committed
  class event : public boost::noncopyable, public context_dependent
      event(unsigned int flags=0)
      { CUDAPP_CALL_GUARDED(cuEventCreate, (&m_event, flags)); }

Andreas Klöckner's avatar
Andreas Klöckner committed
      event(CUevent evt)
      : m_event(evt)
      { }

        try
        {
          scoped_context_activation ca(get_context());
          CUDAPP_CALL_GUARDED_CLEANUP(cuEventDestroy, (m_event));
        CUDAPP_CATCH_CLEANUP_ON_DEAD_CONTEXT(event);
      event *record(py::object stream_py)
        PYCUDA_PARSE_STREAM_PY;
        CUDAPP_CALL_GUARDED(cuEventRecord, (m_event, s_handle));
      CUevent handle() const
      { return m_event; }

      event *synchronize()
      {
        CUDAPP_CALL_GUARDED_THREADED(cuEventSynchronize, (m_event));
        CUDAPP_PRINT_CALL_TRACE("cuEventQuery");

        CUresult result = cuEventQuery(m_event);
        switch (result)
        {
          case CUDA_SUCCESS:
          case CUDA_ERROR_NOT_READY:
            CUDAPP_PRINT_ERROR_TRACE("cuEventQuery", result);
            throw error("cuEventQuery", result);
        }
      }

      float time_since(event const &start)
      {
        float result;
        CUDAPP_CALL_GUARDED(cuEventElapsedTime, (&result, start.m_event, m_event));
        return result;
      }

      float time_till(event const &end)
      {
        float result;
        CUDAPP_CALL_GUARDED(cuEventElapsedTime, (&result, m_event, end.m_event));
        return result;
      }
Andreas Klöckner's avatar
Andreas Klöckner committed

#if CUDAPP_CUDA_VERSION >= 4010 && PY_VERSION_HEX >= 0x02060000
      py::object ipc_handle()
      {
        CUipcEventHandle handle;
        CUDAPP_CALL_GUARDED(cuIpcGetEventHandle, (&handle, m_event));
        return py::object(py::handle<>(PyByteArray_FromStringAndSize(
              reinterpret_cast<const char *>(&handle),
              sizeof(handle))));
      }
#endif
#if CUDAPP_CUDA_VERSION >= 3020
  inline void stream::wait_for_event(const event &evt)
  {
    CUDAPP_CALL_GUARDED(cuStreamWaitEvent, (m_stream, evt.handle(), 0));
  }
#endif

Andreas Klöckner's avatar
Andreas Klöckner committed
#if CUDAPP_CUDA_VERSION >= 4010 && PY_VERSION_HEX >= 0x02060000
  inline
  event *event_from_ipc_handle(py::object obj)
  {
    if (!PyByteArray_Check(obj.ptr()))
      throw pycuda::error("event_from_ipc_handle", CUDA_ERROR_INVALID_VALUE,
          "argument is not a bytes array");
    CUipcEventHandle handle;
    if (PyByteArray_GET_SIZE(obj.ptr()) != sizeof(handle))
      throw pycuda::error("event_from_ipc_handle", CUDA_ERROR_INVALID_VALUE,
          "handle has the wrong size");
    memcpy(&handle, PyByteArray_AS_STRING(obj.ptr()), sizeof(handle));

    CUevent evt;
    CUDAPP_CALL_GUARDED(cuIpcOpenEventHandle, (&evt, handle));

    return new event(evt);
  }
#endif

  // }}}
#if CUDAPP_CUDA_VERSION >= 4000
  inline void initialize_profiler(
      const char *config_file,
      const char *output_file,
      CUoutput_mode output_mode)
  {
    CUDAPP_CALL_GUARDED(cuProfilerInitialize, (config_file, output_file, output_mode));
  }

  inline void start_profiler()
  {
    CUDAPP_CALL_GUARDED(cuProfilerStart, ());
  }

  inline void stop_profiler()
  {
    CUDAPP_CALL_GUARDED(cuProfilerStop, ());
// vim: foldmethod=marker