Skip to content
Snippets Groups Projects
wrap_cl.hpp 157 KiB
Newer Older
  • Learn to ignore specific revisions
  • Andreas Klöckner's avatar
    Andreas Klöckner committed
    
    
    #if PYOPENCL_CL_VERSION >= 0x2010
      inline
      event *enqueue_svm_migratemem(
          command_queue &cq,
          py::sequence svms,
          cl_mem_migration_flags flags,
          py::object py_wait_for
          )
      {
        PYOPENCL_PARSE_WAIT_FOR;
    
        std::vector<const void *> svm_pointers;
        std::vector<size_t> sizes;
    
        for (py::handle py_svm: svms)
        {
    
          svm_pointer &svm(py::cast<svm_pointer &>(py_svm));
    
          svm_pointers.push_back(svm.svm_ptr());
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
          sizes.push_back(svm.size());
        }
    
        cl_event evt;
        PYOPENCL_CALL_GUARDED(
            clEnqueueSVMMigrateMem,
            (
             cq.data(),
             svm_pointers.size(),
             svm_pointers.empty() ? nullptr : &svm_pointers.front(),
             sizes.empty() ? nullptr : &sizes.front(),
             flags,
             PYOPENCL_WAITLIST_ARGS,
             &evt
            ));
    
        PYOPENCL_RETURN_NEW_EVENT(evt);
      }
    #endif
    
      // }}}
    
    
    
      // {{{ sampler
    
    
      class sampler : noncopyable
    
      {
        private:
          cl_sampler m_sampler;
    
        public:
    
    #if PYOPENCL_CL_VERSION >= 0x2000
          sampler(context const &ctx, py::sequence py_props)
          {
            int hex_plat_version = ctx.get_hex_platform_version();
    
            if (hex_plat_version  < 0x2000)
            {
              std::cerr <<
                "sampler properties given as an iterable, "
                "which uses an OpenCL 2+-only interface, "
                "but the context's platform does not "
                "declare OpenCL 2 support. Proceeding "
                "as requested, but the next thing you see "
                "may be a crash." << std:: endl;
            }
    
    
            PYOPENCL_STACK_CONTAINER(cl_sampler_properties, props, py::len(py_props) + 1);
    
            {
              size_t i = 0;
              for (auto prop: py_props)
                props[i++] = py::cast<cl_sampler_properties>(prop);
              props[i++] = 0;
            }
    
            cl_int status_code;
            PYOPENCL_PRINT_CALL_TRACE("clCreateSamplerWithProperties");
    
            m_sampler = clCreateSamplerWithProperties(
                ctx.data(),
    
                &status_code);
    
            if (status_code != CL_SUCCESS)
              throw pyopencl::error("Sampler", status_code);
          }
    #endif
    
    
          sampler(context const &ctx, bool normalized_coordinates,
              cl_addressing_mode am, cl_filter_mode fm)
          {
            PYOPENCL_PRINT_CALL_TRACE("clCreateSampler");
    
            int hex_plat_version = ctx.get_hex_platform_version();
    #if PYOPENCL_CL_VERSION >= 0x2000
            if (hex_plat_version  >= 0x2000)
            {
                cl_sampler_properties props_list[] = {
                  CL_SAMPLER_NORMALIZED_COORDS, normalized_coordinates,
                  CL_SAMPLER_ADDRESSING_MODE, am,
                  CL_SAMPLER_FILTER_MODE, fm,
                  0,
                };
    
                cl_int status_code;
    
                PYOPENCL_PRINT_CALL_TRACE("clCreateSamplerWithProperties");
                m_sampler = clCreateSamplerWithProperties(
                    ctx.data(), props_list, &status_code);
    
                if (status_code != CL_SUCCESS)
                  throw pyopencl::error("Sampler", status_code);
            }
            else
    #endif
            {
              cl_int status_code;
    
    
    #if defined(__GNUG__) && !defined(__clang__)
    #pragma GCC diagnostic push
    #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
    #endif
    
              m_sampler = clCreateSampler(
                  ctx.data(),
                  normalized_coordinates,
                  am, fm, &status_code);
    
    #if defined(__GNUG__) && !defined(__clang__)
    #pragma GCC diagnostic pop
    #endif
    
              if (status_code != CL_SUCCESS)
                throw pyopencl::error("Sampler", status_code);
            }
    
          }
    
          sampler(cl_sampler samp, bool retain)
            : m_sampler(samp)
          {
            if (retain)
              PYOPENCL_CALL_GUARDED(clRetainSampler, (samp));
          }
    
          ~sampler()
          {
            PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseSampler, (m_sampler));
          }
    
          cl_sampler data() const
          {
            return m_sampler;
          }
    
          PYOPENCL_EQUALITY_TESTS(sampler);
    
          py::object get_info(cl_sampler_info param_name) const
          {
            switch (param_name)
            {
              case CL_SAMPLER_REFERENCE_COUNT:
    
                PYOPENCL_GET_TYPED_INFO(Sampler, m_sampler, param_name,
    
                    cl_uint);
              case CL_SAMPLER_CONTEXT:
                PYOPENCL_GET_OPAQUE_INFO(Sampler, m_sampler, param_name,
                    cl_context, context);
              case CL_SAMPLER_ADDRESSING_MODE:
    
                PYOPENCL_GET_TYPED_INFO(Sampler, m_sampler, param_name,
    
                    cl_addressing_mode);
              case CL_SAMPLER_FILTER_MODE:
    
                PYOPENCL_GET_TYPED_INFO(Sampler, m_sampler, param_name,
    
                    cl_filter_mode);
              case CL_SAMPLER_NORMALIZED_COORDS:
    
                PYOPENCL_GET_TYPED_INFO(Sampler, m_sampler, param_name,
    
    #if PYOPENCL_CL_VERSION >= 0x3000
              case CL_SAMPLER_PROPERTIES:
                {
                  std::vector<cl_sampler_properties> result;
                  PYOPENCL_GET_VEC_INFO(Sampler, m_sampler, param_name, result);
                  PYOPENCL_RETURN_VECTOR(cl_sampler_properties, result);
                }
    #endif
    
    #ifdef CL_SAMPLER_MIP_FILTER_MODE_KHR
              case CL_SAMPLER_MIP_FILTER_MODE_KHR:
                PYOPENCL_GET_TYPED_INFO(Sampler, m_sampler, param_name,
                    cl_filter_mode);
              case CL_SAMPLER_LOD_MIN_KHR:
              case CL_SAMPLER_LOD_MAX_KHR:
                PYOPENCL_GET_TYPED_INFO(Sampler, m_sampler, param_name, float);
    #endif
    
    
              default:
                throw error("Sampler.get_info", CL_INVALID_VALUE);
            }
          }
      };
    
      // }}}
    
    
      class program : noncopyable
    
          enum program_kind_type { KND_UNKNOWN, KND_SOURCE, KND_BINARY, KND_IL };
    
    
        private:
          cl_program m_program;
          program_kind_type m_program_kind;
    
        public:
          program(cl_program prog, bool retain, program_kind_type progkind=KND_UNKNOWN)
            : m_program(prog), m_program_kind(progkind)
          {
            if (retain)
              PYOPENCL_CALL_GUARDED(clRetainProgram, (prog));
          }
    
          ~program()
          {
            PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseProgram, (m_program));
          }
    
          cl_program data() const
          {
            return m_program;
          }
    
          program_kind_type kind() const
          {
            return m_program_kind;
          }
    
          PYOPENCL_EQUALITY_TESTS(program);
    
          py::object get_info(cl_program_info param_name) const
          {
            switch (param_name)
            {
              case CL_PROGRAM_REFERENCE_COUNT:
    
                PYOPENCL_GET_TYPED_INFO(Program, m_program, param_name,
    
                    cl_uint);
              case CL_PROGRAM_CONTEXT:
                PYOPENCL_GET_OPAQUE_INFO(Program, m_program, param_name,
                    cl_context, context);
              case CL_PROGRAM_NUM_DEVICES:
    
                PYOPENCL_GET_TYPED_INFO(Program, m_program, param_name, cl_uint);
    
              case CL_PROGRAM_DEVICES:
                {
                  std::vector<cl_device_id> result;
                  PYOPENCL_GET_VEC_INFO(Program, m_program, param_name, result);
    
                  py::list py_result;
    
                  for (cl_device_id did: result)
    
                    py_result.append(handle_from_new_ptr(
                          new pyopencl::device(did)));
                  return py_result;
                }
              case CL_PROGRAM_SOURCE:
                PYOPENCL_GET_STR_INFO(Program, m_program, param_name);
              case CL_PROGRAM_BINARY_SIZES:
                {
                  std::vector<size_t> result;
                  PYOPENCL_GET_VEC_INFO(Program, m_program, param_name, result);
                  PYOPENCL_RETURN_VECTOR(size_t, result);
                }
              case CL_PROGRAM_BINARIES:
                // {{{
                {
                  std::vector<size_t> sizes;
                  PYOPENCL_GET_VEC_INFO(Program, m_program, CL_PROGRAM_BINARY_SIZES, sizes);
    
                  size_t total_size = std::accumulate(sizes.begin(), sizes.end(), 0);
    
    
                  std::unique_ptr<unsigned char []> result(
    
                      new unsigned char[total_size]);
                  std::vector<unsigned char *> result_ptrs;
    
                  unsigned char *ptr = result.get();
                  for (unsigned i = 0; i < sizes.size(); ++i)
                  {
                    result_ptrs.push_back(ptr);
                    ptr += sizes[i];
                  }
    
                  PYOPENCL_CALL_GUARDED(clGetProgramInfo,
                      (m_program, param_name, sizes.size()*sizeof(unsigned char *),
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
                       result_ptrs.empty( ) ? nullptr : &result_ptrs.front(), 0)); \
    
    
                  py::list py_result;
                  ptr = result.get();
                  for (unsigned i = 0; i < sizes.size(); ++i)
                  {
    
                    py::object binary_pyobj(
                        py::reinterpret_steal<py::object>(
    
    #if PY_VERSION_HEX >= 0x03000000
                        PyBytes_FromStringAndSize(
                          reinterpret_cast<char *>(ptr), sizes[i])
    #else
                        PyString_FromStringAndSize(
                          reinterpret_cast<char *>(ptr), sizes[i])
    #endif
    
                    py_result.append(binary_pyobj);
                    ptr += sizes[i];
                  }
                  return py_result;
                }
                // }}}
    #if PYOPENCL_CL_VERSION >= 0x1020
              case CL_PROGRAM_NUM_KERNELS:
    
                PYOPENCL_GET_TYPED_INFO(Program, m_program, param_name,
    
                    size_t);
              case CL_PROGRAM_KERNEL_NAMES:
                PYOPENCL_GET_STR_INFO(Program, m_program, param_name);
    #endif
    
    #if PYOPENCL_CL_VERSION >= 0x2010
              case CL_PROGRAM_IL:
                PYOPENCL_GET_STR_INFO(Program, m_program, param_name);
    #endif
    #if PYOPENCL_CL_VERSION >= 0x2020
              case CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT:
              case CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT:
                PYOPENCL_GET_TYPED_INFO(Program, m_program, param_name, cl_bool);
    #endif
    
    
              default:
                throw error("Program.get_info", CL_INVALID_VALUE);
            }
          }
    
          py::object get_build_info(
              device const &dev,
              cl_program_build_info param_name) const
          {
            switch (param_name)
            {
    #define PYOPENCL_FIRST_ARG m_program, dev.data() // hackety hack
              case CL_PROGRAM_BUILD_STATUS:
    
                PYOPENCL_GET_TYPED_INFO(ProgramBuild,
    
                    PYOPENCL_FIRST_ARG, param_name,
                    cl_build_status);
              case CL_PROGRAM_BUILD_OPTIONS:
              case CL_PROGRAM_BUILD_LOG:
                PYOPENCL_GET_STR_INFO(ProgramBuild,
                    PYOPENCL_FIRST_ARG, param_name);
    #if PYOPENCL_CL_VERSION >= 0x1020
              case CL_PROGRAM_BINARY_TYPE:
    
                PYOPENCL_GET_TYPED_INFO(ProgramBuild,
    
                    PYOPENCL_FIRST_ARG, param_name,
                    cl_program_binary_type);
    #endif
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
    #if PYOPENCL_CL_VERSION >= 0x2000
              case CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE:
    
                PYOPENCL_GET_TYPED_INFO(ProgramBuild,
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
                    PYOPENCL_FIRST_ARG, param_name,
                    size_t);
    #endif
    
    #undef PYOPENCL_FIRST_ARG
    
              default:
                throw error("Program.get_build_info", CL_INVALID_VALUE);
            }
          }
    
          void build(std::string options, py::object py_devices)
          {
            PYOPENCL_PARSE_PY_DEVICES;
    
            PYOPENCL_CALL_GUARDED_THREADED(clBuildProgram,
                (m_program, num_devices, devices,
                 options.c_str(), 0 ,0));
          }
    
    #if PYOPENCL_CL_VERSION >= 0x1020
          void compile(std::string options, py::object py_devices,
              py::object py_headers)
          {
            PYOPENCL_PARSE_PY_DEVICES;
    
            // {{{ pick apart py_headers
            // py_headers is a list of tuples *(name, program)*
    
            std::vector<std::string> header_names;
            std::vector<cl_program> programs;
    
            for (py::handle name_hdr_tup_py: py_headers)
    
              py::tuple name_hdr_tup = py::reinterpret_borrow<py::tuple>(name_hdr_tup_py);
    
              if (py::len(name_hdr_tup) != 2)
                throw error("Program.compile", CL_INVALID_VALUE,
                    "epxected (name, header) tuple in headers list");
    
              std::string name = (name_hdr_tup[0]).cast<std::string>();
              program &prg = (name_hdr_tup[1]).cast<program &>();
    
    
              header_names.push_back(name);
              programs.push_back(prg.data());
            }
    
            std::vector<const char *> header_name_ptrs;
    
            for (std::string const &name: header_names)
    
              header_name_ptrs.push_back(name.c_str());
    
            // }}}
    
            PYOPENCL_CALL_GUARDED_THREADED(clCompileProgram,
                (m_program, num_devices, devices,
                 options.c_str(), header_names.size(),
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
                 programs.empty() ? nullptr : &programs.front(),
                 header_name_ptrs.empty() ? nullptr : &header_name_ptrs.front(),
    
    
    #if PYOPENCL_CL_VERSION >= 0x2020
          void set_specialization_constant(cl_uint spec_id, py::object py_buffer)
          {
            py_buffer_wrapper bufwrap;
            bufwrap.get(py_buffer.ptr(), PyBUF_ANY_CONTIGUOUS);
            PYOPENCL_CALL_GUARDED(clSetProgramSpecializationConstant,
                (m_program, spec_id, bufwrap.m_buf.len, bufwrap.m_buf.buf));
          }
    #endif
    
      };
    
    
    
    
      inline
      program *create_program_with_source(
          context &ctx,
          std::string const &src)
      {
        const char *string = src.c_str();
        size_t length = src.size();
    
        cl_int status_code;
        PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithSource");
        cl_program result = clCreateProgramWithSource(
            ctx.data(), 1, &string, &length, &status_code);
        if (status_code != CL_SUCCESS)
          throw pyopencl::error("clCreateProgramWithSource", status_code);
    
        try
        {
          return new program(result, false, program::KND_SOURCE);
        }
        catch (...)
        {
          clReleaseProgram(result);
          throw;
        }
      }
    
    
    
    
    
      inline
      program *create_program_with_binary(
          context &ctx,
    
          py::sequence py_devices,
          py::sequence py_binaries)
    
      {
        std::vector<cl_device_id> devices;
        std::vector<const unsigned char *> binaries;
        std::vector<size_t> sizes;
    
    
        size_t num_devices = len(py_devices);
    
        if (len(py_binaries) != num_devices)
          throw error("create_program_with_binary", CL_INVALID_VALUE,
              "device and binary counts don't match");
    
    
        for (size_t i = 0; i < num_devices; ++i)
    
        {
          devices.push_back(
    
              (py_devices[i]).cast<device const &>().data());
    
          const void *buf;
          PYOPENCL_BUFFER_SIZE_T len;
    
          py_buffer_wrapper buf_wrapper;
    
          buf_wrapper.get(py::object(py_binaries[i]).ptr(), PyBUF_ANY_CONTIGUOUS);
    
          buf = buf_wrapper.m_buf.buf;
          len = buf_wrapper.m_buf.len;
    
          binaries.push_back(reinterpret_cast<const unsigned char *>(buf));
          sizes.push_back(len);
        }
    
    
        PYOPENCL_STACK_CONTAINER(cl_int, binary_statuses, num_devices);
    
    
        cl_int status_code;
        PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithBinary");
        cl_program result = clCreateProgramWithBinary(
            ctx.data(), num_devices,
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
            devices.empty( ) ? nullptr : &devices.front(),
            sizes.empty( ) ? nullptr : &sizes.front(),
            binaries.empty( ) ? nullptr : &binaries.front(),
    
            &status_code);
        if (status_code != CL_SUCCESS)
          throw pyopencl::error("clCreateProgramWithBinary", status_code);
    
        /*
        for (int i = 0; i < num_devices; ++i)
          printf("%d:%d\n", i, binary_statuses[i]);
          */
    
        try
        {
          return new program(result, false, program::KND_BINARY);
        }
        catch (...)
        {
          clReleaseProgram(result);
          throw;
        }
      }
    
    
    
    
    #if (PYOPENCL_CL_VERSION >= 0x1020) || \
    
          ((PYOPENCL_CL_VERSION >= 0x1030) && defined(__APPLE__))
      inline
      program *create_program_with_built_in_kernels(
          context &ctx,
          py::object py_devices,
          std::string const &kernel_names)
      {
        PYOPENCL_PARSE_PY_DEVICES;
    
        cl_int status_code;
        PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithBuiltInKernels");
        cl_program result = clCreateProgramWithBuiltInKernels(
            ctx.data(), num_devices, devices,
            kernel_names.c_str(), &status_code);
        if (status_code != CL_SUCCESS)
          throw pyopencl::error("clCreateProgramWithBuiltInKernels", status_code);
    
        try
        {
          return new program(result, false);
        }
        catch (...)
        {
          clReleaseProgram(result);
          throw;
        }
      }
    #endif
    
    
    
    
    #if (PYOPENCL_CL_VERSION >= 0x2010)
      inline
      program *create_program_with_il(
          context &ctx,
          std::string const &src)
      {
        cl_int status_code;
        PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithIL");
        cl_program result = clCreateProgramWithIL(
            ctx.data(), src.c_str(), src.size(), &status_code);
        if (status_code != CL_SUCCESS)
          throw pyopencl::error("clCreateProgramWithIL", status_code);
    
        try
        {
          return new program(result, false, program::KND_IL);
        }
        catch (...)
        {
          clReleaseProgram(result);
          throw;
        }
      }
    #endif
    
    
    
    
    
    
    #if PYOPENCL_CL_VERSION >= 0x1020
      inline
      program *link_program(
          context &ctx,
          py::object py_programs,
          std::string const &options,
          py::object py_devices
          )
      {
        PYOPENCL_PARSE_PY_DEVICES;
    
        std::vector<cl_program> programs;
    
        for (py::handle py_prg: py_programs)
    
          program &prg = (py_prg).cast<program &>();
    
          programs.push_back(prg.data());
        }
    
        cl_int status_code;
        PYOPENCL_PRINT_CALL_TRACE("clLinkProgram");
        cl_program result = clLinkProgram(
            ctx.data(), num_devices, devices,
            options.c_str(),
            programs.size(),
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
            programs.empty() ? nullptr : &programs.front(),
    
            0, 0,
            &status_code);
    
        if (status_code != CL_SUCCESS)
    
          throw pyopencl::error("clLinkProgram", result, status_code);
    
    
        try
        {
          return new program(result, false);
        }
        catch (...)
        {
          clReleaseProgram(result);
          throw;
        }
      }
    
    #endif
    
    
    #if PYOPENCL_CL_VERSION >= 0x1020
      inline
      void unload_platform_compiler(platform &plat)
      {
        PYOPENCL_CALL_GUARDED(clUnloadPlatformCompiler, (plat.data()));
      }
    #endif
    
      // }}}
    
    
      // {{{ kernel
      class local_memory
      {
        private:
          size_t m_size;
    
        public:
          local_memory(size_t size)
            : m_size(size)
          { }
    
          size_t size() const
          { return m_size; }
      };
    
    
    
    
    
      class kernel : noncopyable
    
      {
        private:
          cl_kernel m_kernel;
    
        public:
          kernel(cl_kernel knl, bool retain)
            : m_kernel(knl)
          {
            if (retain)
              PYOPENCL_CALL_GUARDED(clRetainKernel, (knl));
          }
    
          kernel(program const &prg, std::string const &kernel_name)
          {
            cl_int status_code;
    
            PYOPENCL_PRINT_CALL_TRACE("clCreateKernel");
            m_kernel = clCreateKernel(prg.data(), kernel_name.c_str(),
                &status_code);
            if (status_code != CL_SUCCESS)
              throw pyopencl::error("clCreateKernel", status_code);
          }
    
          ~kernel()
          {
            PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseKernel, (m_kernel));
          }
    
          cl_kernel data() const
          {
            return m_kernel;
          }
    
          PYOPENCL_EQUALITY_TESTS(kernel);
    
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
    #if PYOPENCL_CL_VERSION >= 0x2010
          kernel *clone()
          {
            cl_int status_code;
    
            PYOPENCL_PRINT_CALL_TRACE("clCloneKernel");
            cl_kernel result = clCloneKernel(m_kernel, &status_code);
            if (status_code != CL_SUCCESS)
              throw pyopencl::error("clCloneKernel", status_code);
    
            try
            {
              return new kernel(result, /* retain */ false);
            }
            catch (...)
            {
              PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseKernel, (result));
              throw;
            }
          }
    #endif
    
    
          void set_arg_null(cl_uint arg_index)
          {
            cl_mem m = 0;
            PYOPENCL_CALL_GUARDED(clSetKernelArg, (m_kernel, arg_index,
                  sizeof(cl_mem), &m));
          }
    
          void set_arg_mem(cl_uint arg_index, memory_object_holder &moh)
          {
            cl_mem m = moh.data();
            PYOPENCL_CALL_GUARDED(clSetKernelArg,
                (m_kernel, arg_index, sizeof(cl_mem), &m));
          }
    
          void set_arg_local(cl_uint arg_index, local_memory const &loc)
          {
            PYOPENCL_CALL_GUARDED(clSetKernelArg,
                (m_kernel, arg_index, loc.size(), 0));
          }
    
          void set_arg_sampler(cl_uint arg_index, sampler const &smp)
          {
            cl_sampler s = smp.data();
            PYOPENCL_CALL_GUARDED(clSetKernelArg,
                (m_kernel, arg_index, sizeof(cl_sampler), &s));
          }
    
    
          void set_arg_command_queue(cl_uint arg_index, command_queue const &queue)
          {
            cl_command_queue q = queue.data();
            PYOPENCL_CALL_GUARDED(clSetKernelArg,
                (m_kernel, arg_index, sizeof(cl_command_queue), &q));
          }
    
    
          void set_arg_buf_pack(cl_uint arg_index, py::handle py_typechar, py::handle obj)
          {
    
            std::string typechar_str(py::cast<std::string>(py_typechar));
            if (typechar_str.size() != 1)
              throw error("Kernel.set_arg_buf_pack", CL_INVALID_VALUE,
                  "type char argument must have exactly one character");
    
            char typechar = typechar_str[0];
    
    
    #define PYOPENCL_KERNEL_PACK_AND_SET_ARG(TYPECH_VAL, TYPE) \
            case TYPECH_VAL: \
              { \
                TYPE val = py::cast<TYPE>(obj); \
                PYOPENCL_CALL_GUARDED(clSetKernelArg, (m_kernel, arg_index, sizeof(val), &val)); \
                break; \
              }
    
            {
              PYOPENCL_KERNEL_PACK_AND_SET_ARG('c', char)
              PYOPENCL_KERNEL_PACK_AND_SET_ARG('b', signed char)
              PYOPENCL_KERNEL_PACK_AND_SET_ARG('B', unsigned char)
              PYOPENCL_KERNEL_PACK_AND_SET_ARG('h', short)
              PYOPENCL_KERNEL_PACK_AND_SET_ARG('H', unsigned short)
              PYOPENCL_KERNEL_PACK_AND_SET_ARG('i', int)
              PYOPENCL_KERNEL_PACK_AND_SET_ARG('I', unsigned int)
              PYOPENCL_KERNEL_PACK_AND_SET_ARG('l', long)
              PYOPENCL_KERNEL_PACK_AND_SET_ARG('L', unsigned long)
              PYOPENCL_KERNEL_PACK_AND_SET_ARG('f', float)
              PYOPENCL_KERNEL_PACK_AND_SET_ARG('d', double)
              default:
                throw error("Kernel.set_arg_buf_pack", CL_INVALID_VALUE,
                    "invalid type char");
            }
    #undef PYOPENCL_KERNEL_PACK_AND_SET_ARG
          }
    
    
          void set_arg_buf(cl_uint arg_index, py::handle py_buffer)
    
          {
            const void *buf;
            PYOPENCL_BUFFER_SIZE_T len;
    
            py_buffer_wrapper buf_wrapper;
    
            try
            {
              buf_wrapper.get(py_buffer.ptr(), PyBUF_ANY_CONTIGUOUS);
            }
    
            catch (py::error_already_set &)
    
            {
              PyErr_Clear();
              throw error("Kernel.set_arg", CL_INVALID_VALUE,
                  "invalid kernel argument");
            }
    
            buf = buf_wrapper.m_buf.buf;
            len = buf_wrapper.m_buf.len;
    
            PYOPENCL_CALL_GUARDED(clSetKernelArg,
                (m_kernel, arg_index, len, buf));
          }
    
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
    #if PYOPENCL_CL_VERSION >= 0x2000
    
          void set_arg_svm(cl_uint arg_index, svm_pointer const &wrp)
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
          {
            PYOPENCL_CALL_GUARDED(clSetKernelArgSVMPointer,
    
                (m_kernel, arg_index, wrp.svm_ptr()));
    
          void set_arg(cl_uint arg_index, py::handle arg)
    
          {
            if (arg.ptr() == Py_None)
            {
              set_arg_null(arg_index);
              return;
            }
    
    
              set_arg_mem(arg_index, arg.cast<memory_object_holder &>());
    
            catch (py::cast_error &) { }
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
    #if PYOPENCL_CL_VERSION >= 0x2000
            try
            {
    
              set_arg_svm(arg_index, arg.cast<svm_pointer const &>());
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
              return;
            }
            catch (py::cast_error &) { }
    #endif
    
    
              set_arg_local(arg_index, arg.cast<local_memory>());
    
            catch (py::cast_error &) { }
    
              set_arg_sampler(arg_index, arg.cast<const sampler &>());
    
            catch (py::cast_error &) { }
    
            try
            {
              set_arg_command_queue(arg_index, arg.cast<const command_queue &>());
              return;
            }
            catch (py::cast_error &) { }
    
    
            set_arg_buf(arg_index, arg);
          }
    
          py::object get_info(cl_kernel_info param_name) const
          {
            switch (param_name)
            {
              case CL_KERNEL_FUNCTION_NAME:
                PYOPENCL_GET_STR_INFO(Kernel, m_kernel, param_name);
              case CL_KERNEL_NUM_ARGS:
              case CL_KERNEL_REFERENCE_COUNT:
    
                PYOPENCL_GET_TYPED_INFO(Kernel, m_kernel, param_name,
    
                    cl_uint);
              case CL_KERNEL_CONTEXT:
                PYOPENCL_GET_OPAQUE_INFO(Kernel, m_kernel, param_name,
                    cl_context, context);
              case CL_KERNEL_PROGRAM:
                PYOPENCL_GET_OPAQUE_INFO(Kernel, m_kernel, param_name,
                    cl_program, program);
    #if PYOPENCL_CL_VERSION >= 0x1020
              case CL_KERNEL_ATTRIBUTES:
                PYOPENCL_GET_STR_INFO(Kernel, m_kernel, param_name);
    #endif
              default:
                throw error("Kernel.get_info", CL_INVALID_VALUE);
            }
          }
    
          py::object get_work_group_info(
              cl_kernel_work_group_info param_name,
              device const &dev
              ) const
          {
            switch (param_name)
            {
    #define PYOPENCL_FIRST_ARG m_kernel, dev.data() // hackety hack
              case CL_KERNEL_WORK_GROUP_SIZE:
    
                PYOPENCL_GET_TYPED_INFO(KernelWorkGroup,
    
                    PYOPENCL_FIRST_ARG, param_name,
                    size_t);
              case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
                {
                  std::vector<size_t> result;
                  PYOPENCL_GET_VEC_INFO(KernelWorkGroup,
                      PYOPENCL_FIRST_ARG, param_name, result);
    
                  PYOPENCL_RETURN_VECTOR(size_t, result);
                }
              case CL_KERNEL_LOCAL_MEM_SIZE:
    #if PYOPENCL_CL_VERSION >= 0x1010
              case CL_KERNEL_PRIVATE_MEM_SIZE:
    #endif
    
                PYOPENCL_GET_TYPED_INFO(KernelWorkGroup,
    
                    PYOPENCL_FIRST_ARG, param_name,
                    cl_ulong);
    
    #if PYOPENCL_CL_VERSION >= 0x1010
              case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
    
                PYOPENCL_GET_TYPED_INFO(KernelWorkGroup,
    
                    PYOPENCL_FIRST_ARG, param_name,
                    size_t);
    #endif
              default:
                throw error("Kernel.get_work_group_info", CL_INVALID_VALUE);
    #undef PYOPENCL_FIRST_ARG
            }
          }
    
    #if PYOPENCL_CL_VERSION >= 0x1020
          py::object get_arg_info(
              cl_uint arg_index,
              cl_kernel_arg_info param_name
              ) const
          {
            switch (param_name)
            {
    #define PYOPENCL_FIRST_ARG m_kernel, arg_index // hackety hack
              case CL_KERNEL_ARG_ADDRESS_QUALIFIER:
    
                PYOPENCL_GET_TYPED_INFO(KernelArg,
    
                    PYOPENCL_FIRST_ARG, param_name,
                    cl_kernel_arg_address_qualifier);
    
              case CL_KERNEL_ARG_ACCESS_QUALIFIER:
    
                PYOPENCL_GET_TYPED_INFO(KernelArg,
    
                    PYOPENCL_FIRST_ARG, param_name,
                    cl_kernel_arg_access_qualifier);
    
              case CL_KERNEL_ARG_TYPE_NAME:
              case CL_KERNEL_ARG_NAME:
                PYOPENCL_GET_STR_INFO(KernelArg, PYOPENCL_FIRST_ARG, param_name);
    
    
              case CL_KERNEL_ARG_TYPE_QUALIFIER:
    
                PYOPENCL_GET_TYPED_INFO(KernelArg,
    
                    PYOPENCL_FIRST_ARG, param_name,
                    cl_kernel_arg_type_qualifier);
    
    #undef PYOPENCL_FIRST_ARG
              default:
                throw error("Kernel.get_arg_info", CL_INVALID_VALUE);
            }
          }
    #endif
    
    
    #if PYOPENCL_CL_VERSION >= 0x2010
        py::object get_sub_group_info(
            device const &dev,
            cl_kernel_sub_group_info param_name,
            py::object py_input_value)
        {
          switch (param_name)
          {
            // size_t * -> size_t
            case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE:
            case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE:
              {
                std::vector<size_t> input_value;
                COPY_PY_LIST(size_t, input_value);
    
                size_t param_value;
                PYOPENCL_CALL_GUARDED(clGetKernelSubGroupInfo,
                    (m_kernel, dev.data(), param_name,
                     input_value.size()*sizeof(input_value.front()),
                     input_value.empty() ? nullptr : &input_value.front(),
                     sizeof(param_value), &param_value, 0));
    
                return py::cast(param_value);
              }
    
            // size_t -> size_t[]
            case CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT:
              {
                size_t input_value = py::cast<size_t>(py_input_value);
    
                std::vector<size_t> result;
                size_t size;
                PYOPENCL_CALL_GUARDED(clGetKernelSubGroupInfo,