Skip to content
Snippets Groups Projects
wrap_cl.hpp 115 KiB
Newer Older
  • Learn to ignore specific revisions
  •               origin, region, &row_pitch, &slice_pitch,
                  PYOPENCL_WAITLIST_ARGS, &evt,
                  &status_code);
            Py_END_ALLOW_THREADS
            if (status_code != CL_SUCCESS)
              throw pyopencl::error("clEnqueueMapImage", status_code);
          } );
    
        event evt_handle(evt, false);
    
    
        std::unique_ptr<memory_map> map;
    
           map = std::unique_ptr<memory_map>(new memory_map(cq, img, mapped));
    
        }
        catch (...)
        {
          PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueUnmapMemObject, (
                cq.data(), img.data(), mapped, 0, 0, 0));
          throw;
        }
    
        py::handle<> result = py::handle<>(PyArray_NewFromDescr(
            &PyArray_Type, tp_descr,
            shape.size(),
            shape.empty() ? NULL : &shape.front(),
            strides.empty() ? NULL : &strides.front(),
            mapped, ary_flags, /*obj*/NULL));
    
        py::handle<> map_py(handle_from_new_ptr(map.release()));
        PyArray_BASE(result.get()) = map_py.get();
        Py_INCREF(map_py.get());
    
        return py::make_tuple(
            result,
            handle_from_new_ptr(new event(evt_handle)),
            row_pitch, slice_pitch);
      }
    
      // }}}
    
      // {{{ sampler
      class sampler : boost::noncopyable
      {
        private:
          cl_sampler m_sampler;
    
        public:
          sampler(context const &ctx, bool normalized_coordinates,
              cl_addressing_mode am, cl_filter_mode fm)
          {
            cl_int status_code;
            PYOPENCL_PRINT_CALL_TRACE("clCreateSampler");
            m_sampler = clCreateSampler(
                ctx.data(),
                normalized_coordinates,
                am, fm, &status_code);
    
            if (status_code != CL_SUCCESS)
              throw pyopencl::error("Sampler", status_code);
          }
    
          sampler(cl_sampler samp, bool retain)
            : m_sampler(samp)
          {
            if (retain)
              PYOPENCL_CALL_GUARDED(clRetainSampler, (samp));
          }
    
          ~sampler()
          {
            PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseSampler, (m_sampler));
          }
    
          cl_sampler data() const
          {
            return m_sampler;
          }
    
          PYOPENCL_EQUALITY_TESTS(sampler);
    
          py::object get_info(cl_sampler_info param_name) const
          {
            switch (param_name)
            {
              case CL_SAMPLER_REFERENCE_COUNT:
                PYOPENCL_GET_INTEGRAL_INFO(Sampler, m_sampler, param_name,
                    cl_uint);
              case CL_SAMPLER_CONTEXT:
                PYOPENCL_GET_OPAQUE_INFO(Sampler, m_sampler, param_name,
                    cl_context, context);
              case CL_SAMPLER_ADDRESSING_MODE:
                PYOPENCL_GET_INTEGRAL_INFO(Sampler, m_sampler, param_name,
                    cl_addressing_mode);
              case CL_SAMPLER_FILTER_MODE:
                PYOPENCL_GET_INTEGRAL_INFO(Sampler, m_sampler, param_name,
                    cl_filter_mode);
              case CL_SAMPLER_NORMALIZED_COORDS:
                PYOPENCL_GET_INTEGRAL_INFO(Sampler, m_sampler, param_name,
                    cl_bool);
    
              default:
                throw error("Sampler.get_info", CL_INVALID_VALUE);
            }
          }
      };
    
      // }}}
    
      // {{{ program
    
      class program : boost::noncopyable
      {
        public:
          enum program_kind_type { KND_UNKNOWN, KND_SOURCE, KND_BINARY };
    
        private:
          cl_program m_program;
          program_kind_type m_program_kind;
    
        public:
          program(cl_program prog, bool retain, program_kind_type progkind=KND_UNKNOWN)
            : m_program(prog), m_program_kind(progkind)
          {
            if (retain)
              PYOPENCL_CALL_GUARDED(clRetainProgram, (prog));
          }
    
          ~program()
          {
            PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseProgram, (m_program));
          }
    
          cl_program data() const
          {
            return m_program;
          }
    
          program_kind_type kind() const
          {
            return m_program_kind;
          }
    
          PYOPENCL_EQUALITY_TESTS(program);
    
          py::object get_info(cl_program_info param_name) const
          {
            switch (param_name)
            {
              case CL_PROGRAM_REFERENCE_COUNT:
                PYOPENCL_GET_INTEGRAL_INFO(Program, m_program, param_name,
                    cl_uint);
              case CL_PROGRAM_CONTEXT:
                PYOPENCL_GET_OPAQUE_INFO(Program, m_program, param_name,
                    cl_context, context);
              case CL_PROGRAM_NUM_DEVICES:
                PYOPENCL_GET_INTEGRAL_INFO(Program, m_program, param_name,
                    cl_uint);
              case CL_PROGRAM_DEVICES:
                {
                  std::vector<cl_device_id> result;
                  PYOPENCL_GET_VEC_INFO(Program, m_program, param_name, result);
    
                  py::list py_result;
    
                  for (cl_device_id did: result)
    
                    py_result.append(handle_from_new_ptr(
                          new pyopencl::device(did)));
                  return py_result;
                }
              case CL_PROGRAM_SOURCE:
                PYOPENCL_GET_STR_INFO(Program, m_program, param_name);
              case CL_PROGRAM_BINARY_SIZES:
                {
                  std::vector<size_t> result;
                  PYOPENCL_GET_VEC_INFO(Program, m_program, param_name, result);
                  PYOPENCL_RETURN_VECTOR(size_t, result);
                }
              case CL_PROGRAM_BINARIES:
                // {{{
                {
                  std::vector<size_t> sizes;
                  PYOPENCL_GET_VEC_INFO(Program, m_program, CL_PROGRAM_BINARY_SIZES, sizes);
    
                  size_t total_size = std::accumulate(sizes.begin(), sizes.end(), 0);
    
                  boost::scoped_array<unsigned char> result(
                      new unsigned char[total_size]);
                  std::vector<unsigned char *> result_ptrs;
    
                  unsigned char *ptr = result.get();
                  for (unsigned i = 0; i < sizes.size(); ++i)
                  {
                    result_ptrs.push_back(ptr);
                    ptr += sizes[i];
                  }
    
                  PYOPENCL_CALL_GUARDED(clGetProgramInfo,
                      (m_program, param_name, sizes.size()*sizeof(unsigned char *),
                       result_ptrs.empty( ) ? NULL : &result_ptrs.front(), 0)); \
    
                  py::list py_result;
                  ptr = result.get();
                  for (unsigned i = 0; i < sizes.size(); ++i)
                  {
                    py::handle<> binary_pyobj(
    #if PY_VERSION_HEX >= 0x03000000
                        PyBytes_FromStringAndSize(
                          reinterpret_cast<char *>(ptr), sizes[i])
    #else
                        PyString_FromStringAndSize(
                          reinterpret_cast<char *>(ptr), sizes[i])
    #endif
                        );
                    py_result.append(binary_pyobj);
                    ptr += sizes[i];
                  }
                  return py_result;
                }
                // }}}
    #if PYOPENCL_CL_VERSION >= 0x1020
              case CL_PROGRAM_NUM_KERNELS:
                PYOPENCL_GET_INTEGRAL_INFO(Program, m_program, param_name,
                    size_t);
              case CL_PROGRAM_KERNEL_NAMES:
                PYOPENCL_GET_STR_INFO(Program, m_program, param_name);
    #endif
    
              default:
                throw error("Program.get_info", CL_INVALID_VALUE);
            }
          }
    
          py::object get_build_info(
              device const &dev,
              cl_program_build_info param_name) const
          {
            switch (param_name)
            {
    #define PYOPENCL_FIRST_ARG m_program, dev.data() // hackety hack
              case CL_PROGRAM_BUILD_STATUS:
                PYOPENCL_GET_INTEGRAL_INFO(ProgramBuild,
                    PYOPENCL_FIRST_ARG, param_name,
                    cl_build_status);
              case CL_PROGRAM_BUILD_OPTIONS:
              case CL_PROGRAM_BUILD_LOG:
                PYOPENCL_GET_STR_INFO(ProgramBuild,
                    PYOPENCL_FIRST_ARG, param_name);
    #if PYOPENCL_CL_VERSION >= 0x1020
              case CL_PROGRAM_BINARY_TYPE:
                PYOPENCL_GET_INTEGRAL_INFO(ProgramBuild,
                    PYOPENCL_FIRST_ARG, param_name,
                    cl_program_binary_type);
    #endif
    #undef PYOPENCL_FIRST_ARG
    
              default:
                throw error("Program.get_build_info", CL_INVALID_VALUE);
            }
          }
    
          void build(std::string options, py::object py_devices)
          {
            PYOPENCL_PARSE_PY_DEVICES;
    
            PYOPENCL_CALL_GUARDED_THREADED(clBuildProgram,
                (m_program, num_devices, devices,
                 options.c_str(), 0 ,0));
          }
    
    #if PYOPENCL_CL_VERSION >= 0x1020
          void compile(std::string options, py::object py_devices,
              py::object py_headers)
          {
            PYOPENCL_PARSE_PY_DEVICES;
    
            // {{{ pick apart py_headers
            // py_headers is a list of tuples *(name, program)*
    
            std::vector<std::string> header_names;
            std::vector<cl_program> programs;
            PYTHON_FOREACH(name_hdr_tup, py_headers)
            {
              if (py::len(name_hdr_tup) != 2)
                throw error("Program.compile", CL_INVALID_VALUE,
                    "epxected (name, header) tuple in headers list");
              std::string name = py::extract<std::string const &>(name_hdr_tup[0]);
              program &prg = py::extract<program &>(name_hdr_tup[1]);
    
              header_names.push_back(name);
              programs.push_back(prg.data());
            }
    
            std::vector<const char *> header_name_ptrs;
    
            for (std::string const &name: header_names)
    
    3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756
              header_name_ptrs.push_back(name.c_str());
    
            // }}}
    
            PYOPENCL_CALL_GUARDED_THREADED(clCompileProgram,
                (m_program, num_devices, devices,
                 options.c_str(), header_names.size(),
                 programs.empty() ? NULL : &programs.front(),
                 header_name_ptrs.empty() ? NULL : &header_name_ptrs.front(),
                 0, 0));
          }
    #endif
      };
    
    
    
    
      inline
      program *create_program_with_source(
          context &ctx,
          std::string const &src)
      {
        const char *string = src.c_str();
        size_t length = src.size();
    
        cl_int status_code;
        PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithSource");
        cl_program result = clCreateProgramWithSource(
            ctx.data(), 1, &string, &length, &status_code);
        if (status_code != CL_SUCCESS)
          throw pyopencl::error("clCreateProgramWithSource", status_code);
    
        try
        {
          return new program(result, false, program::KND_SOURCE);
        }
        catch (...)
        {
          clReleaseProgram(result);
          throw;
        }
      }
    
    
    
    
    
      inline
      program *create_program_with_binary(
          context &ctx,
          py::object py_devices,
          py::object py_binaries)
      {
        std::vector<cl_device_id> devices;
        std::vector<const unsigned char *> binaries;
        std::vector<size_t> sizes;
        std::vector<cl_int> binary_statuses;
    
        int num_devices = len(py_devices);
        if (len(py_binaries) != num_devices)
          throw error("create_program_with_binary", CL_INVALID_VALUE,
              "device and binary counts don't match");
    
        for (int i = 0; i < num_devices; ++i)
        {
          devices.push_back(
              py::extract<device const &>(py_devices[i])().data());
          const void *buf;
          PYOPENCL_BUFFER_SIZE_T len;
    
    #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
          py_buffer_wrapper buf_wrapper;
    
          buf_wrapper.get(py::object(py_binaries[i]).ptr(), PyBUF_ANY_CONTIGUOUS);
    
          buf = buf_wrapper.m_buf.buf;
          len = buf_wrapper.m_buf.len;
    #else
          if (PyObject_AsReadBuffer(
                py::object(py_binaries[i]).ptr(), &buf, &len))
            throw py::error_already_set();
    #endif
    
          binaries.push_back(reinterpret_cast<const unsigned char *>(buf));
          sizes.push_back(len);
        }
    
        binary_statuses.resize(num_devices);
    
        cl_int status_code;
        PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithBinary");
        cl_program result = clCreateProgramWithBinary(
            ctx.data(), num_devices,
            devices.empty( ) ? NULL : &devices.front(),
            sizes.empty( ) ? NULL : &sizes.front(),
            binaries.empty( ) ? NULL : &binaries.front(),
            binary_statuses.empty( ) ? NULL : &binary_statuses.front(),
            &status_code);
        if (status_code != CL_SUCCESS)
          throw pyopencl::error("clCreateProgramWithBinary", status_code);
    
        /*
        for (int i = 0; i < num_devices; ++i)
          printf("%d:%d\n", i, binary_statuses[i]);
          */
    
        try
        {
          return new program(result, false, program::KND_BINARY);
        }
        catch (...)
        {
          clReleaseProgram(result);
          throw;
        }
      }
    
    
    
    #if (PYOPENCL_CL_VERSION >= 0x1020) && \
          ((PYOPENCL_CL_VERSION >= 0x1030) && defined(__APPLE__))
      inline
      program *create_program_with_built_in_kernels(
          context &ctx,
          py::object py_devices,
          std::string const &kernel_names)
      {
        PYOPENCL_PARSE_PY_DEVICES;
    
        cl_int status_code;
        PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithBuiltInKernels");
        cl_program result = clCreateProgramWithBuiltInKernels(
            ctx.data(), num_devices, devices,
            kernel_names.c_str(), &status_code);
        if (status_code != CL_SUCCESS)
          throw pyopencl::error("clCreateProgramWithBuiltInKernels", status_code);
    
        try
        {
          return new program(result, false);
        }
        catch (...)
        {
          clReleaseProgram(result);
          throw;
        }
      }
    #endif
    
    
    
    #if PYOPENCL_CL_VERSION >= 0x1020
      inline
      program *link_program(
          context &ctx,
          py::object py_programs,
          std::string const &options,
          py::object py_devices
          )
      {
        PYOPENCL_PARSE_PY_DEVICES;
    
        std::vector<cl_program> programs;
        PYTHON_FOREACH(py_prg, py_programs)
        {
          program &prg = py::extract<program &>(py_prg);
          programs.push_back(prg.data());
        }
    
        cl_int status_code;
        PYOPENCL_PRINT_CALL_TRACE("clLinkProgram");
        cl_program result = clLinkProgram(
            ctx.data(), num_devices, devices,
            options.c_str(),
            programs.size(),
            programs.empty() ? NULL : &programs.front(),
            0, 0,
            &status_code);
    
        if (status_code != CL_SUCCESS)
          throw pyopencl::error("clLinkPorgram", status_code);
    
        try
        {
          return new program(result, false);
        }
        catch (...)
        {
          clReleaseProgram(result);
          throw;
        }
      }
    
    #endif
    
    
    #if PYOPENCL_CL_VERSION >= 0x1020
      inline
      void unload_platform_compiler(platform &plat)
      {
        PYOPENCL_CALL_GUARDED(clUnloadPlatformCompiler, (plat.data()));
      }
    #endif
    
      // }}}
    
      // {{{ kernel
      class local_memory
      {
        private:
          size_t m_size;
    
        public:
          local_memory(size_t size)
            : m_size(size)
          { }
    
          size_t size() const
          { return m_size; }
      };
    
    
    
    
      class kernel : boost::noncopyable
      {
        private:
          cl_kernel m_kernel;
    
        public:
          kernel(cl_kernel knl, bool retain)
            : m_kernel(knl)
          {
            if (retain)
              PYOPENCL_CALL_GUARDED(clRetainKernel, (knl));
          }
    
          kernel(program const &prg, std::string const &kernel_name)
          {
            cl_int status_code;
    
            PYOPENCL_PRINT_CALL_TRACE("clCreateKernel");
            m_kernel = clCreateKernel(prg.data(), kernel_name.c_str(),
                &status_code);
            if (status_code != CL_SUCCESS)
              throw pyopencl::error("clCreateKernel", status_code);
          }
    
          ~kernel()
          {
            PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseKernel, (m_kernel));
          }
    
          cl_kernel data() const
          {
            return m_kernel;
          }
    
          PYOPENCL_EQUALITY_TESTS(kernel);
    
          void set_arg_null(cl_uint arg_index)
          {
            cl_mem m = 0;
            PYOPENCL_CALL_GUARDED(clSetKernelArg, (m_kernel, arg_index,
                  sizeof(cl_mem), &m));
          }
    
          void set_arg_mem(cl_uint arg_index, memory_object_holder &moh)
          {
            cl_mem m = moh.data();
            PYOPENCL_CALL_GUARDED(clSetKernelArg,
                (m_kernel, arg_index, sizeof(cl_mem), &m));
          }
    
          void set_arg_local(cl_uint arg_index, local_memory const &loc)
          {
            PYOPENCL_CALL_GUARDED(clSetKernelArg,
                (m_kernel, arg_index, loc.size(), 0));
          }
    
          void set_arg_sampler(cl_uint arg_index, sampler const &smp)
          {
            cl_sampler s = smp.data();
            PYOPENCL_CALL_GUARDED(clSetKernelArg,
                (m_kernel, arg_index, sizeof(cl_sampler), &s));
          }
    
          void set_arg_buf(cl_uint arg_index, py::object py_buffer)
          {
            const void *buf;
            PYOPENCL_BUFFER_SIZE_T len;
    
    #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
            py_buffer_wrapper buf_wrapper;
    
            try
            {
              buf_wrapper.get(py_buffer.ptr(), PyBUF_ANY_CONTIGUOUS);
            }
            catch (py::error_already_set)
            {
              PyErr_Clear();
              throw error("Kernel.set_arg", CL_INVALID_VALUE,
                  "invalid kernel argument");
            }
    
            buf = buf_wrapper.m_buf.buf;
            len = buf_wrapper.m_buf.len;
    #else
            if (PyObject_AsReadBuffer(py_buffer.ptr(), &buf, &len))
            {
              PyErr_Clear();
              throw error("Kernel.set_arg", CL_INVALID_VALUE,
                  "invalid kernel argument");
            }
    #endif
    
            PYOPENCL_CALL_GUARDED(clSetKernelArg,
                (m_kernel, arg_index, len, buf));
          }
    
          void set_arg(cl_uint arg_index, py::object arg)
          {
            if (arg.ptr() == Py_None)
            {
              set_arg_null(arg_index);
              return;
            }
    
            py::extract<memory_object_holder &> ex_mo(arg);
            if (ex_mo.check())
            {
              set_arg_mem(arg_index, ex_mo());
              return;
            }
    
            py::extract<local_memory const &> ex_loc(arg);
            if (ex_loc.check())
            {
              set_arg_local(arg_index, ex_loc());
              return;
            }
    
            py::extract<sampler const &> ex_smp(arg);
            if (ex_smp.check())
            {
              set_arg_sampler(arg_index, ex_smp());
              return;
            }
    
            set_arg_buf(arg_index, arg);
          }
    
          py::object get_info(cl_kernel_info param_name) const
          {
            switch (param_name)
            {
              case CL_KERNEL_FUNCTION_NAME:
                PYOPENCL_GET_STR_INFO(Kernel, m_kernel, param_name);
              case CL_KERNEL_NUM_ARGS:
              case CL_KERNEL_REFERENCE_COUNT:
                PYOPENCL_GET_INTEGRAL_INFO(Kernel, m_kernel, param_name,
                    cl_uint);
              case CL_KERNEL_CONTEXT:
                PYOPENCL_GET_OPAQUE_INFO(Kernel, m_kernel, param_name,
                    cl_context, context);
              case CL_KERNEL_PROGRAM:
                PYOPENCL_GET_OPAQUE_INFO(Kernel, m_kernel, param_name,
                    cl_program, program);
    #if PYOPENCL_CL_VERSION >= 0x1020
              case CL_KERNEL_ATTRIBUTES:
                PYOPENCL_GET_STR_INFO(Kernel, m_kernel, param_name);
    #endif
              default:
                throw error("Kernel.get_info", CL_INVALID_VALUE);
            }
          }
    
          py::object get_work_group_info(
              cl_kernel_work_group_info param_name,
              device const &dev
              ) const
          {
            switch (param_name)
            {
    #define PYOPENCL_FIRST_ARG m_kernel, dev.data() // hackety hack
              case CL_KERNEL_WORK_GROUP_SIZE:
                PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup,
                    PYOPENCL_FIRST_ARG, param_name,
                    size_t);
              case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
                {
                  std::vector<size_t> result;
                  PYOPENCL_GET_VEC_INFO(KernelWorkGroup,
                      PYOPENCL_FIRST_ARG, param_name, result);
    
                  PYOPENCL_RETURN_VECTOR(size_t, result);
                }
              case CL_KERNEL_LOCAL_MEM_SIZE:
    #if PYOPENCL_CL_VERSION >= 0x1010
              case CL_KERNEL_PRIVATE_MEM_SIZE:
    #endif
                PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup,
                    PYOPENCL_FIRST_ARG, param_name,
                    cl_ulong);
    
    #if PYOPENCL_CL_VERSION >= 0x1010
              case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
                PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup,
                    PYOPENCL_FIRST_ARG, param_name,
                    size_t);
    #endif
              default:
                throw error("Kernel.get_work_group_info", CL_INVALID_VALUE);
    #undef PYOPENCL_FIRST_ARG
            }
          }
    
    #if PYOPENCL_CL_VERSION >= 0x1020
          py::object get_arg_info(
              cl_uint arg_index,
              cl_kernel_arg_info param_name
              ) const
          {
            switch (param_name)
            {
    #define PYOPENCL_FIRST_ARG m_kernel, arg_index // hackety hack
              case CL_KERNEL_ARG_ADDRESS_QUALIFIER:
                PYOPENCL_GET_INTEGRAL_INFO(KernelArg,
                    PYOPENCL_FIRST_ARG, param_name,
                    cl_kernel_arg_address_qualifier);
    
              case CL_KERNEL_ARG_ACCESS_QUALIFIER:
                PYOPENCL_GET_INTEGRAL_INFO(KernelArg,
                    PYOPENCL_FIRST_ARG, param_name,
                    cl_kernel_arg_access_qualifier);
    
              case CL_KERNEL_ARG_TYPE_NAME:
              case CL_KERNEL_ARG_NAME:
                PYOPENCL_GET_STR_INFO(KernelArg, PYOPENCL_FIRST_ARG, param_name);
    #undef PYOPENCL_FIRST_ARG
              default:
                throw error("Kernel.get_arg_info", CL_INVALID_VALUE);
            }
          }
    #endif
      };
    
    
      inline
      py::list create_kernels_in_program(program &pgm)
      {
        cl_uint num_kernels;
        PYOPENCL_CALL_GUARDED(clCreateKernelsInProgram, (
              pgm.data(), 0, 0, &num_kernels));
    
        std::vector<cl_kernel> kernels(num_kernels);
        PYOPENCL_CALL_GUARDED(clCreateKernelsInProgram, (
              pgm.data(), num_kernels,
              kernels.empty( ) ? NULL : &kernels.front(), &num_kernels));
    
        py::list result;
    
        for (cl_kernel knl: kernels)
    
          result.append(handle_from_new_ptr(new kernel(knl, true)));
    
        return result;
      }
    
    
    
      inline
      event *enqueue_nd_range_kernel(
          command_queue &cq,
          kernel &knl,
          py::object py_global_work_size,
          py::object py_local_work_size,
          py::object py_global_work_offset,
          py::object py_wait_for,
          bool g_times_l)
      {
        PYOPENCL_PARSE_WAIT_FOR;
    
        cl_uint work_dim = len(py_global_work_size);
    
        std::vector<size_t> global_work_size;
        COPY_PY_LIST(size_t, global_work_size);
    
        size_t *local_work_size_ptr = 0;
        std::vector<size_t> local_work_size;
        if (py_local_work_size.ptr() != Py_None)
        {
          if (g_times_l)
            work_dim = std::max(work_dim, unsigned(len(py_local_work_size)));
          else
            if (work_dim != unsigned(len(py_local_work_size)))
              throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE,
                  "global/local work sizes have differing dimensions");
    
          COPY_PY_LIST(size_t, local_work_size);
    
          while (local_work_size.size() < work_dim)
            local_work_size.push_back(1);
          while (global_work_size.size() < work_dim)
            global_work_size.push_back(1);
    
          local_work_size_ptr = local_work_size.empty( ) ? NULL : &local_work_size.front();
        }
    
        if (g_times_l && local_work_size_ptr)
        {
          for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
            global_work_size[work_axis] *= local_work_size[work_axis];
        }
    
        size_t *global_work_offset_ptr = 0;
        std::vector<size_t> global_work_offset;
        if (py_global_work_offset.ptr() != Py_None)
        {
          if (work_dim != unsigned(len(py_global_work_offset)))
            throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE,
                "global work size and offset have differing dimensions");
    
          COPY_PY_LIST(size_t, global_work_offset);
    
          if (g_times_l && local_work_size_ptr)
          {
            for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
              global_work_offset[work_axis] *= local_work_size[work_axis];
          }
    
          global_work_offset_ptr = global_work_offset.empty( ) ? NULL :  &global_work_offset.front();
        }
    
        PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( {
              cl_event evt;
              PYOPENCL_CALL_GUARDED(clEnqueueNDRangeKernel, (
                    cq.data(),
                    knl.data(),
                    work_dim,
                    global_work_offset_ptr,
                    global_work_size.empty( ) ? NULL : &global_work_size.front(),
                    local_work_size_ptr,
                    PYOPENCL_WAITLIST_ARGS, &evt
                    ));
              PYOPENCL_RETURN_NEW_EVENT(evt);
            } );
      }
    
    
    
    
    
    
      inline
      event *enqueue_task(
          command_queue &cq,
          kernel &knl,
          py::object py_wait_for)
      {
        PYOPENCL_PARSE_WAIT_FOR;
    
        PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( {
          cl_event evt;
          PYOPENCL_CALL_GUARDED(clEnqueueTask, (
                cq.data(),
                knl.data(),
                PYOPENCL_WAITLIST_ARGS, &evt
                ));
          PYOPENCL_RETURN_NEW_EVENT(evt);
        } );
      }
    
      // }}}
    
      // {{{ gl interop
      inline
      bool have_gl()
      {
    #ifdef HAVE_GL
        return true;
    #else
        return false;
    #endif
      }
    
    
    
    
    #ifdef HAVE_GL
    
    #ifdef __APPLE__
      inline
      cl_context_properties get_apple_cgl_share_group()
      {
        CGLContextObj kCGLContext = CGLGetCurrentContext();
        CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext);
    
        return (cl_context_properties) kCGLShareGroup;
      }
    #endif /* __APPLE__ */
    
    
    
    
      class gl_buffer : public memory_object
      {
        public:
          gl_buffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
            : memory_object(mem, retain, hostbuf)
          { }
      };
    
    
    
    
      class gl_renderbuffer : public memory_object
      {
        public:
          gl_renderbuffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
            : memory_object(mem, retain, hostbuf)
          { }
      };
    
    
    
    
      class gl_texture : public image
      {
        public:
          gl_texture(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
            : image(mem, retain, hostbuf)
          { }
    
          py::object get_gl_texture_info(cl_gl_texture_info param_name)
          {
            switch (param_name)
            {
              case CL_GL_TEXTURE_TARGET:
                PYOPENCL_GET_INTEGRAL_INFO(GLTexture, data(), param_name, GLenum);
              case CL_GL_MIPMAP_LEVEL:
                PYOPENCL_GET_INTEGRAL_INFO(GLTexture, data(), param_name, GLint);
    
              default:
                throw error("MemoryObject.get_gl_texture_info", CL_INVALID_VALUE);
            }
          }
      };
    
    
    
    
    #define PYOPENCL_WRAP_BUFFER_CREATOR(TYPE, NAME, CL_NAME, ARGS, CL_ARGS) \
      inline \
      TYPE *NAME ARGS \
      { \
        cl_int status_code; \
        PYOPENCL_PRINT_CALL_TRACE(#CL_NAME); \
        cl_mem mem = CL_NAME CL_ARGS; \
        \
        if (status_code != CL_SUCCESS) \
          throw pyopencl::error(#CL_NAME, status_code); \
        \
        try \
        { \
          return new TYPE(mem, false); \
        } \
        catch (...) \
        { \
          PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); \
          throw; \
        } \
      }
    
    
    
    
      PYOPENCL_WRAP_BUFFER_CREATOR(gl_buffer,
          create_from_gl_buffer, clCreateFromGLBuffer,
          (context &ctx, cl_mem_flags flags, GLuint bufobj),
          (ctx.data(), flags, bufobj, &status_code));
      PYOPENCL_WRAP_BUFFER_CREATOR(gl_texture,
          create_from_gl_texture_2d, clCreateFromGLTexture2D,
          (context &ctx, cl_mem_flags flags,
             GLenum texture_target, GLint miplevel, GLuint texture),
          (ctx.data(), flags, texture_target, miplevel, texture, &status_code));
      PYOPENCL_WRAP_BUFFER_CREATOR(gl_texture,
          create_from_gl_texture_3d, clCreateFromGLTexture3D,
          (context &ctx, cl_mem_flags flags,
             GLenum texture_target, GLint miplevel, GLuint texture),
          (ctx.data(), flags, texture_target, miplevel, texture, &status_code));
      PYOPENCL_WRAP_BUFFER_CREATOR(gl_renderbuffer,
          create_from_gl_renderbuffer, clCreateFromGLRenderbuffer,
          (context &ctx, cl_mem_flags flags, GLuint renderbuffer),
          (ctx.data(), flags, renderbuffer, &status_code));
    
      inline
      gl_texture *create_from_gl_texture(
          context &ctx, cl_mem_flags flags,
          GLenum texture_target, GLint miplevel,
          GLuint texture, unsigned dims)
      {
        if (dims == 2)
          return create_from_gl_texture_2d(ctx, flags, texture_target, miplevel, texture);
        else if (dims == 3)
          return create_from_gl_texture_3d(ctx, flags, texture_target, miplevel, texture);
        else