Skip to content
cuda.hpp 29.9 KiB
Newer Older
    \
    void set_dst_device(CUdeviceptr devptr)  \
    { \
      dstMemoryType = CU_MEMORYTYPE_DEVICE; \
      dstDevice = devptr; \
    }





  struct memcpy_2d : public CUDA_MEMCPY2D
  {
    memcpy_2d()
    {
      srcXInBytes = 0;
      srcY = 0;

      dstXInBytes = 0;
      dstY = 0;
    }

    MEMCPY_SETTERS;

    void execute(bool aligned) const
    {
      if (aligned)
      { CUDAPP_CALL_GUARDED_THREADED(cuMemcpy2D, (this)); }
      { CUDAPP_CALL_GUARDED_THREADED(cuMemcpy2DUnaligned, (this)); }
    }

    void execute_async(const stream &s) const
    { CUDAPP_CALL_GUARDED_THREADED(cuMemcpy2DAsync, (this, s.handle())); }
  };

#if CUDA_VERSION >= 2000
  struct memcpy_3d : public CUDA_MEMCPY3D
  {
    memcpy_3d()
    {
      reserved0 = 0;
      reserved1 = 0;

      srcXInBytes = 0;
      srcY = 0;
      srcZ = 0;
      CUDAPP_CALL_GUARDED_THREADED(cuMemcpy3D, (this));
    }

    void execute_async(const stream &s) const
    { CUDAPP_CALL_GUARDED_THREADED(cuMemcpy3DAsync, (this, s.handle())); }
  };
#endif


  // host memory --------------------------------------------------------------
  inline void *mem_alloc_host(unsigned int size)
  {
    void *m_data;
    CUDAPP_CALL_GUARDED(cuMemAllocHost, (&m_data, size));
    return m_data;
  }

  inline void mem_free_host(void *ptr)
  {
    CUDAPP_CALL_GUARDED(cuMemFreeHost, (ptr));
  }




  struct host_allocation : public boost::noncopyable, public context_dependent

    public:
      host_allocation(unsigned bytesize)
        : m_valid(true), m_data(mem_alloc_host(bytesize))
          try
          {
            scoped_context_activation ca(get_context());
            mem_free_host(m_data); 
          }
          CUDA_CATCH_WARN_OOT_LEAK(host_allocation);

          release_context();
          m_valid = false;
        else
          throw cuda::error("host_allocation::free", CUDA_ERROR_INVALID_HANDLE);
      
      void *data()
      { return m_data; }
  };




  // events -------------------------------------------------------------------
Andreas Kloeckner's avatar
Andreas Kloeckner committed
  class event : public boost::noncopyable, public context_dependent
  {
    private:
      CUevent m_event;

    public:
      event(unsigned int flags=0)
      { CUDAPP_CALL_GUARDED(cuEventCreate, (&m_event, flags)); }

      ~event()
        try
        {
          scoped_context_activation ca(get_context());
          CUDAPP_CALL_GUARDED(cuEventDestroy, (m_event)); 
        }
        CUDA_CATCH_WARN_OOT_LEAK(event);

      void record()
      { CUDAPP_CALL_GUARDED(cuEventRecord, (m_event, 0)); }

      void record_in_stream(stream const &str)
      { CUDAPP_CALL_GUARDED(cuEventRecord, (m_event, str.handle())); }
      { CUDAPP_CALL_GUARDED_THREADED(cuEventSynchronize, (m_event)); }

      bool query() const
      { 
#ifdef TRACE_CUDA
        std::cerr << "cuEventQuery" << std::endl;
#endif
        CUresult result = cuEventQuery(m_event);
        switch (result)
        {
          case CUDA_SUCCESS: 
            return true;
          case CUDA_ERROR_NOT_READY: 
            return false;
          default:
            throw error("cuEventQuery", result);
        }
      }

      float time_since(event const &start)
      {
        float result;
        CUDAPP_CALL_GUARDED(cuEventElapsedTime, (&result, start.m_event, m_event));
        return result;
      }

      float time_till(event const &end)
      {
        float result;
        CUDAPP_CALL_GUARDED(cuEventElapsedTime, (&result, m_event, end.m_event));
        return result;
      }
  };
}




#endif