diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py
index 49ec01ef77945c6dd4bb3ae817527ec241498f7f..9100c844cfdda55105c35043bf72cb163dcde4a2 100644
--- a/pyopencl/__init__.py
+++ b/pyopencl/__init__.py
@@ -26,11 +26,9 @@ THE SOFTWARE.
 
 import re
 import six
-import sys
-from six.moves import input, range, intern
+from six.moves import input
 
 from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT  # noqa
-from pytools import Record as _Record
 
 try:
     import pyopencl.cffi_cl as _cl
@@ -43,9 +41,6 @@ except ImportError:
                 "its source directory. This likely won't work.")
     raise
 
-_CPY2 = _cl._CPY2
-_CPY26 = _cl._CPY2 and sys.version_info < (2, 7)
-
 import numpy as np
 
 from pyopencl.cffi_cl import (  # noqa
@@ -117,6 +112,7 @@ from pyopencl.cffi_cl import (  # noqa
         MemoryObject,
         MemoryMap,
         Buffer,
+        CompilerWarning,
         _Program,
         Kernel,
 
@@ -166,6 +162,8 @@ from pyopencl.cffi_cl import (  # noqa
         Sampler,
         GLTexture,
         DeviceTopologyAmd,
+
+        add_get_info_attrs as _add_get_info_attrs,
         )
 
 if _cl.have_gl():
@@ -183,54 +181,6 @@ if _cl.have_gl():
         pass
 
 
-import inspect as _inspect
-
-CONSTANT_CLASSES = [
-        getattr(_cl, name) for name in dir(_cl)
-        if _inspect.isclass(getattr(_cl, name))
-        and name[0].islower() and name not in ["zip", "map", "range"]]
-
-
-# {{{ diagnostics
-
-class CompilerWarning(UserWarning):
-    pass
-
-
-def compiler_output(text):
-    import os
-    from warnings import warn
-    if int(os.environ.get("PYOPENCL_COMPILER_OUTPUT", "0")):
-        warn(text, CompilerWarning)
-    else:
-        warn("Non-empty compiler output encountered. Set the "
-                "environment variable PYOPENCL_COMPILER_OUTPUT=1 "
-                "to see more.", CompilerWarning)
-
-
-class _ErrorRecord(_Record):
-    pass
-
-# }}}
-
-
-# {{{ arg packing helpers
-
-_size_t_char = ({
-    8: 'Q',
-    4: 'L',
-    2: 'H',
-    1: 'B',
-})[_cl._ffi.sizeof('size_t')]
-_type_char_map = {
-    'n': _size_t_char.lower(),
-    'N': _size_t_char
-}
-del _size_t_char
-
-# }}}
-
-
 # {{{ find pyopencl shipped source code
 
 def _find_pyopencl_include_path():
@@ -256,7 +206,7 @@ def _find_pyopencl_include_path():
 # }}}
 
 
-# {{{ Program (including caching support)
+# {{{ Program (wrapper around _Program, adds caching support)
 
 _DEFAULT_BUILD_OPTIONS = []
 _DEFAULT_INCLUDE_OPTIONS = ["-I", _find_pyopencl_include_path()]
@@ -507,7 +457,7 @@ class Program(object):
             routine = e.routine
 
             err = _cl.RuntimeError(
-                    _ErrorRecord(
+                    _cl.Error._ErrorRecord(
                         what=lambda: what,
                         code=lambda: code,
                         routine=lambda: routine))
@@ -532,6 +482,8 @@ class Program(object):
     def __hash__(self):
         return hash(self._get_prg())
 
+_add_get_info_attrs(Program, Program.get_info, program_info)
+
 
 def create_program_with_built_in_kernels(context, devices, kernel_names):
     if not isinstance(kernel_names, str):
@@ -548,764 +500,7 @@ def link_program(context, programs, options=[], devices=None):
 # }}}
 
 
-def _add_functionality():
-    cls_to_info_cls = {
-            _cl.Platform: (_cl.Platform.get_info, _cl.platform_info, []),
-            _cl.Device: (_cl.Device.get_info, _cl.device_info,
-                ["PLATFORM", "MAX_WORK_GROUP_SIZE", "MAX_COMPUTE_UNITS"]),
-            _cl.Context: (_cl.Context.get_info, _cl.context_info, []),
-            _cl.CommandQueue: (_cl.CommandQueue.get_info, _cl.command_queue_info,
-                ["CONTEXT", "DEVICE"]),
-            _cl.Event: (_cl.Event.get_info, _cl.event_info, []),
-            _cl.MemoryObjectHolder:
-            (MemoryObjectHolder.get_info, _cl.mem_info, []),
-            Image: (_cl.Image.get_image_info, _cl.image_info, []),
-            Program: (Program.get_info, _cl.program_info, []),
-            Kernel: (Kernel.get_info, _cl.kernel_info, []),
-            _cl.Sampler: (Sampler.get_info, _cl.sampler_info, []),
-            }
-
-    def to_string(cls, value, default_format=None):
-        for name in dir(cls):
-            if (not name.startswith("_") and getattr(cls, name) == value):
-                return name
-
-        if default_format is None:
-            raise ValueError("a name for value %d was not found in %s"
-                    % (value, cls.__name__))
-        else:
-            return default_format % value
-
-    for cls in CONSTANT_CLASSES:
-        cls.to_string = classmethod(to_string)
-
-    # {{{ get_info attributes -------------------------------------------------
-
-    def make_getinfo(info_method, info_name, info_attr):
-        def result(self):
-            return info_method(self, info_attr)
-
-        return property(result)
-
-    def make_cacheable_getinfo(info_method, info_name, cache_attr, info_attr):
-        def result(self):
-            try:
-                return getattr(self, cache_attr)
-            except AttributeError:
-                pass
-
-            result = info_method(self, info_attr)
-            setattr(self, cache_attr, result)
-            return result
-
-        return property(result)
-
-    for cls, (info_method, info_class, cacheable_attrs) \
-            in six.iteritems(cls_to_info_cls):
-        for info_name, info_value in six.iteritems(info_class.__dict__):
-            if info_name == "to_string" or info_name.startswith("_"):
-                continue
-
-            info_lower = info_name.lower()
-            info_constant = getattr(info_class, info_name)
-            if info_name in cacheable_attrs:
-                cache_attr = intern("_info_cache_"+info_lower)
-                setattr(cls, info_lower, make_cacheable_getinfo(
-                    info_method, info_lower, cache_attr, info_constant))
-            else:
-                setattr(cls, info_lower, make_getinfo(
-                        info_method, info_name, info_constant))
-
-    # }}}
-
-    # {{{ Platform
-
-    def platform_repr(self):
-        return "<pyopencl.Platform '%s' at 0x%x>" % (self.name, self.int_ptr)
-
-    def platform_get_cl_version(self):
-        import re
-        version_string = self.version
-        match = re.match(r"^OpenCL ([0-9]+)\.([0-9]+) .*$", version_string)
-        if match is None:
-            raise RuntimeError("platform %s returned non-conformant "
-                               "platform version string '%s'" %
-                               (self, version_string))
-
-        return int(match.group(1)), int(match.group(2))
-
-    Platform.__repr__ = platform_repr
-    Platform._get_cl_version = platform_get_cl_version
-
-    # }}}
-
-    # {{{ Device
-
-    def device_repr(self):
-        return "<pyopencl.Device '%s' on '%s' at 0x%x>" % (
-                self.name.strip(), self.platform.name.strip(), self.int_ptr)
-
-    def device_persistent_unique_id(self):
-        return (self.vendor, self.vendor_id, self.name, self.version)
-
-    Device.__repr__ = device_repr
-
-    # undocumented for now:
-    Device.persistent_unique_id = property(device_persistent_unique_id)
-
-    # }}}
-
-    # {{{ Context
-
-    def context_repr(self):
-        return "<pyopencl.Context at 0x%x on %s>" % (self.int_ptr,
-                ", ".join(repr(dev) for dev in self.devices))
-
-    def context_get_cl_version(self):
-        return self.devices[0].platform._get_cl_version()
-
-    Context.__repr__ = context_repr
-    from pytools import memoize_method
-    Context._get_cl_version = memoize_method(context_get_cl_version)
-
-    # }}}
-
-    # {{{ CommandQueue
-
-    def command_queue_enter(self):
-        return self
-
-    def command_queue_exit(self, exc_type, exc_val, exc_tb):
-        self.finish()
-
-    def command_queue_get_cl_version(self):
-        return self.context._get_cl_version()
-
-    CommandQueue.__enter__ = command_queue_enter
-    CommandQueue.__exit__ = command_queue_exit
-    CommandQueue._get_cl_version = memoize_method(command_queue_get_cl_version)
-
-    # }}}
-
-    # {{{ _Program (the internal, non-caching version)
-
-    def program_get_build_logs(self):
-        build_logs = []
-        for dev in self.get_info(_cl.program_info.DEVICES):
-            try:
-                log = self.get_build_info(dev, program_build_info.LOG)
-            except:
-                log = "<error retrieving log>"
-
-            build_logs.append((dev, log))
-
-        return build_logs
-
-    def program_build(self, options_bytes, devices=None):
-        err = None
-        try:
-            self._build(options=options_bytes, devices=devices)
-        except Error as e:
-            what = e.what + "\n\n" + (75*"="+"\n").join(
-                    "Build on %s:\n\n%s" % (dev, log)
-                    for dev, log in self._get_build_logs())
-            code = e.code
-            routine = e.routine
-
-            err = _cl.RuntimeError(
-                    _ErrorRecord(
-                        what=lambda: what,
-                        code=lambda: code,
-                        routine=lambda: routine))
-
-        if err is not None:
-            # Python 3.2 outputs the whole list of currently active exceptions
-            # This serves to remove one (redundant) level from that nesting.
-            raise err
-
-        message = (75*"="+"\n").join(
-                "Build on %s succeeded, but said:\n\n%s" % (dev, log)
-                for dev, log in self._get_build_logs()
-                if log is not None and log.strip())
-
-        if message:
-            if self.kind() == program_kind.SOURCE:
-                build_type = "From-source build"
-            elif self.kind() == program_kind.BINARY:
-                build_type = "From-binary build"
-            else:
-                build_type = "Build"
-
-            compiler_output("%s succeeded, but resulted in non-empty logs:\n%s"
-                    % (build_type, message))
-
-        return self
-
-    _cl._Program._get_build_logs = program_get_build_logs
-    _cl._Program.build = program_build
-
-    # }}}
-
-    # {{{ Event
-    class ProfilingInfoGetter:
-        def __init__(self, event):
-            self.event = event
-
-        def __getattr__(self, name):
-            info_cls = _cl.profiling_info
-
-            try:
-                inf_attr = getattr(info_cls, name.upper())
-            except AttributeError:
-                raise AttributeError("%s has no attribute '%s'"
-                        % (type(self), name))
-            else:
-                return self.event.get_profiling_info(inf_attr)
-
-    _cl.Event.profile = property(ProfilingInfoGetter)
-
-    # }}}
-
-    # {{{ Kernel
-
-    kernel_old_init = Kernel.__init__
-    kernel_old_get_work_group_info = Kernel.get_work_group_info
-
-    def kernel_init(self, prg, name):
-        if not isinstance(prg, _cl._Program):
-            prg = prg._get_prg()
-
-        kernel_old_init(self, prg, name)
-
-        self._setup(prg)
-
-    def kernel__setup(self, prg):
-        self._source = getattr(prg, "_source", None)
-
-        self._generate_naive_call()
-        self._wg_info_cache = {}
-        return self
-
-    def kernel_get_work_group_info(self, param, device):
-        try:
-            return self._wg_info_cache[param, device]
-        except KeyError:
-            pass
-
-        result = kernel_old_get_work_group_info(self, param, device)
-        self._wg_info_cache[param, device] = result
-        return result
-
-    # {{{ code generation for __call__, set_args
-
-    def kernel__set_set_args_body(self, body, num_passed_args):
-        from pytools.py_codegen import (
-                PythonFunctionGenerator,
-                PythonCodeGenerator,
-                Indentation)
-
-        arg_names = ["arg%d" % i for i in range(num_passed_args)]
-
-        # {{{ wrap in error handler
-
-        err_gen = PythonCodeGenerator()
-
-        def gen_error_handler():
-            err_gen("""
-                if current_arg is not None:
-                    args = [{args}]
-                    advice = ""
-                    from pyopencl.array import Array
-                    if isinstance(args[current_arg], Array):
-                        advice = " (perhaps you meant to pass 'array.data' " \
-                            "instead of the array itself?)"
-
-                    raise _cl.LogicError(
-                            "when processing argument #%d (1-based): %s%s"
-                            % (current_arg+1, str(e), advice))
-                else:
-                    raise
-                """
-                .format(args=", ".join(arg_names)))
-            err_gen("")
-
-        err_gen("try:")
-        with Indentation(err_gen):
-            err_gen.extend(body)
-        err_gen("except TypeError as e:")
-        with Indentation(err_gen):
-            gen_error_handler()
-        err_gen("except _cl.LogicError as e:")
-        with Indentation(err_gen):
-            gen_error_handler()
-
-        # }}}
-
-        def add_preamble(gen):
-            gen.add_to_preamble(
-                "import numpy as np")
-            gen.add_to_preamble(
-                "import pyopencl.cffi_cl as _cl")
-            gen.add_to_preamble(
-                "from pyopencl.cffi_cl import _lib, "
-                "_ffi, _handle_error, _CLKernelArg")
-            gen.add_to_preamble("from pyopencl import status_code")
-            gen.add_to_preamble("from struct import pack")
-            gen.add_to_preamble("")
-
-        # {{{ generate _enqueue
-
-        gen = PythonFunctionGenerator("enqueue_knl_%s" % self.function_name,
-                ["self", "queue", "global_size", "local_size"]
-                + arg_names
-                + ["global_offset=None", "g_times_l=None", "wait_for=None"])
-
-        add_preamble(gen)
-        gen.extend(err_gen)
-
-        gen("""
-            return _cl.enqueue_nd_range_kernel(queue, self, global_size, local_size,
-                    global_offset, wait_for, g_times_l=g_times_l)
-            """)
-
-        self._enqueue = gen.get_function()
-
-        # }}}
-
-        # {{{ generate set_args
-
-        gen = PythonFunctionGenerator("_set_args", ["self"] + arg_names)
-
-        add_preamble(gen)
-        gen.extend(err_gen)
-
-        self._set_args = gen.get_function()
-
-        # }}}
-
-    def kernel__generate_buffer_arg_setter(self, gen, arg_idx, buf_var):
-        from pytools.py_codegen import Indentation
-
-        if _CPY2:
-            # https://github.com/numpy/numpy/issues/5381
-            gen("if isinstance({buf_var}, np.generic):".format(buf_var=buf_var))
-            with Indentation(gen):
-                gen("{buf_var} = np.getbuffer({buf_var})".format(buf_var=buf_var))
-
-        gen("""
-            c_buf, sz, _ = _cl._c_buffer_from_obj({buf_var})
-            status = _lib.kernel__set_arg_buf(self.ptr, {arg_idx}, c_buf, sz)
-            if status != _ffi.NULL:
-                _handle_error(status)
-            """
-            .format(arg_idx=arg_idx, buf_var=buf_var))
-
-    def kernel__generate_bytes_arg_setter(self, gen, arg_idx, buf_var):
-        gen("""
-            status = _lib.kernel__set_arg_buf(self.ptr, {arg_idx},
-                {buf_var}, len({buf_var}))
-            if status != _ffi.NULL:
-                _handle_error(status)
-            """
-            .format(arg_idx=arg_idx, buf_var=buf_var))
-
-    def kernel__generate_generic_arg_handler(self, gen, arg_idx, arg_var):
-        from pytools.py_codegen import Indentation
-
-        gen("""
-            if {arg_var} is None:
-                status = _lib.kernel__set_arg_null(self.ptr, {arg_idx})
-                if status != _ffi.NULL:
-                    _handle_error(status)
-            elif isinstance({arg_var}, _CLKernelArg):
-                self.set_arg({arg_idx}, {arg_var})
-            """
-            .format(arg_idx=arg_idx, arg_var=arg_var))
-
-        gen("else:")
-        with Indentation(gen):
-            self._generate_buffer_arg_setter(gen, arg_idx, arg_var)
-
-    def kernel__generate_naive_call(self):
-        num_args = self.num_args
-
-        from pytools.py_codegen import PythonCodeGenerator
-        gen = PythonCodeGenerator()
-
-        if num_args == 0:
-            gen("pass")
-
-        for i in range(num_args):
-            gen("# process argument {arg_idx}".format(arg_idx=i))
-            gen("")
-            gen("current_arg = {arg_idx}".format(arg_idx=i))
-            self._generate_generic_arg_handler(gen, i, "arg%d" % i)
-            gen("")
-
-        self._set_set_args_body(gen, num_args)
-
-    def kernel_set_scalar_arg_dtypes(self, scalar_arg_dtypes):
-        self._scalar_arg_dtypes = scalar_arg_dtypes
-
-        # {{{ arg counting bug handling
-
-        # For example:
-        # https://github.com/pocl/pocl/issues/197
-        # (but Apple CPU has a similar bug)
-
-        work_around_arg_count_bug = False
-        warn_about_arg_count_bug = False
-
-        from pyopencl.characterize import has_struct_arg_count_bug
-
-        count_bug_per_dev = [
-                has_struct_arg_count_bug(dev)
-                for dev in self.context.devices]
-
-        from pytools import single_valued
-        if any(count_bug_per_dev):
-            if all(count_bug_per_dev):
-                work_around_arg_count_bug = single_valued(count_bug_per_dev)
-            else:
-                warn_about_arg_count_bug = True
-
-        fp_arg_count = 0
-
-        # }}}
-
-        cl_arg_idx = 0
-
-        from pytools.py_codegen import PythonCodeGenerator
-        gen = PythonCodeGenerator()
-
-        if not scalar_arg_dtypes:
-            gen("pass")
-
-        for arg_idx, arg_dtype in enumerate(scalar_arg_dtypes):
-            gen("# process argument {arg_idx}".format(arg_idx=arg_idx))
-            gen("")
-            gen("current_arg = {arg_idx}".format(arg_idx=arg_idx))
-            arg_var = "arg%d" % arg_idx
-
-            if arg_dtype is None:
-                self._generate_generic_arg_handler(gen, cl_arg_idx, arg_var)
-                cl_arg_idx += 1
-                gen("")
-                continue
-
-            arg_dtype = np.dtype(arg_dtype)
-
-            if arg_dtype.char == "V":
-                self._generate_generic_arg_handler(gen, cl_arg_idx, arg_var)
-                cl_arg_idx += 1
-
-            elif arg_dtype.kind == "c":
-                if warn_about_arg_count_bug:
-                    warn("{knl_name}: arguments include complex numbers, and "
-                            "some (but not all) of the target devices mishandle "
-                            "struct kernel arguments (hence the workaround is "
-                            "disabled".format(
-                                knl_name=self.function_name, stacklevel=2))
-
-                if arg_dtype == np.complex64:
-                    arg_char = "f"
-                elif arg_dtype == np.complex128:
-                    arg_char = "d"
-                else:
-                    raise TypeError("unexpected complex type: %s" % arg_dtype)
-
-                if (work_around_arg_count_bug == "pocl"
-                        and arg_dtype == np.complex128
-                        and fp_arg_count + 2 <= 8):
-                    gen(
-                            "buf = pack('{arg_char}', {arg_var}.real)"
-                            .format(arg_char=arg_char, arg_var=arg_var))
-                    self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf")
-                    cl_arg_idx += 1
-                    gen("current_arg = current_arg + 1000")
-                    gen(
-                            "buf = pack('{arg_char}', {arg_var}.imag)"
-                            .format(arg_char=arg_char, arg_var=arg_var))
-                    self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf")
-                    cl_arg_idx += 1
-
-                elif (work_around_arg_count_bug == "apple"
-                        and arg_dtype == np.complex128
-                        and fp_arg_count + 2 <= 8):
-                    raise NotImplementedError("No work-around to "
-                            "Apple's broken structs-as-kernel arg "
-                            "handling has been found. "
-                            "Cannot pass complex numbers to kernels.")
-
-                else:
-                    gen(
-                            "buf = pack('{arg_char}{arg_char}', "
-                            "{arg_var}.real, {arg_var}.imag)"
-                            .format(arg_char=arg_char, arg_var=arg_var))
-                    self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf")
-                    cl_arg_idx += 1
-
-                fp_arg_count += 2
-
-            elif arg_dtype.char in "IL" and _CPY26:
-                # Prevent SystemError: ../Objects/longobject.c:336: bad
-                # argument to internal function
-
-                gen(
-                        "buf = pack('{arg_char}', long({arg_var}))"
-                        .format(arg_char=arg_dtype.char, arg_var=arg_var))
-                self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf")
-                cl_arg_idx += 1
-
-            else:
-                if arg_dtype.kind == "f":
-                    fp_arg_count += 1
-
-                arg_char = arg_dtype.char
-                arg_char = _type_char_map.get(arg_char, arg_char)
-                gen(
-                        "buf = pack('{arg_char}', {arg_var})"
-                        .format(
-                            arg_char=arg_char,
-                            arg_var=arg_var))
-                self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf")
-                cl_arg_idx += 1
-
-            gen("")
-
-        if cl_arg_idx != self.num_args:
-            raise TypeError(
-                "length of argument list (%d) and "
-                "CL-generated number of arguments (%d) do not agree"
-                % (cl_arg_idx, self.num_args))
-
-        self._set_set_args_body(gen, len(scalar_arg_dtypes))
-
-    # }}}
-
-    def kernel_set_args(self, *args, **kwargs):
-        # Need to dupicate the 'self' argument for dynamically generated  method
-        return self._set_args(self, *args, **kwargs)
-
-    def kernel_call(self, queue, global_size, local_size, *args, **kwargs):
-        # __call__ can't be overridden directly, so we need this
-        # trampoline hack.
-        return self._enqueue(self, queue, global_size, local_size, *args, **kwargs)
-
-    def kernel_capture_call(self, filename, queue, global_size, local_size,
-            *args, **kwargs):
-        from pyopencl.capture_call import capture_kernel_call
-        capture_kernel_call(self, filename, queue, global_size, local_size,
-                *args, **kwargs)
-
-    Kernel.__init__ = kernel_init
-    Kernel._setup = kernel__setup
-    Kernel.get_work_group_info = kernel_get_work_group_info
-    Kernel._set_set_args_body = kernel__set_set_args_body
-    Kernel._generate_buffer_arg_setter = kernel__generate_buffer_arg_setter
-    Kernel._generate_bytes_arg_setter = kernel__generate_bytes_arg_setter
-    Kernel._generate_generic_arg_handler = kernel__generate_generic_arg_handler
-    Kernel._generate_naive_call = kernel__generate_naive_call
-    Kernel.set_scalar_arg_dtypes = kernel_set_scalar_arg_dtypes
-    Kernel.set_args = kernel_set_args
-    Kernel.__call__ = kernel_call
-    Kernel.capture_call = kernel_capture_call
-
-    # }}}
-
-    # {{{ ImageFormat
-
-    def image_format_repr(self):
-        return "ImageFormat(%s, %s)" % (
-                channel_order.to_string(self.channel_order,
-                    "<unknown channel order 0x%x>"),
-                channel_type.to_string(self.channel_data_type,
-                    "<unknown channel data type 0x%x>"))
-
-    def image_format_eq(self, other):
-        return (self.channel_order == other.channel_order
-                and self.channel_data_type == other.channel_data_type)
-
-    def image_format_ne(self, other):
-        return not image_format_eq(self, other)
-
-    def image_format_hash(self):
-        return hash((type(self), self.channel_order, self.channel_data_type))
-
-    ImageFormat.__repr__ = image_format_repr
-    ImageFormat.__eq__ = image_format_eq
-    ImageFormat.__ne__ = image_format_ne
-    ImageFormat.__hash__ = image_format_hash
-
-    # }}}
-
-    # {{{ Image
-
-    image_old_init = Image.__init__
-
-    def image_init(self, context, flags, format, shape=None, pitches=None,
-            hostbuf=None, is_array=False, buffer=None):
-
-        if shape is None and hostbuf is None:
-            raise Error("'shape' must be passed if 'hostbuf' is not given")
-
-        if shape is None and hostbuf is not None:
-            shape = hostbuf.shape
-
-        if hostbuf is not None and not \
-                (flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)):
-            from warnings import warn
-            warn("'hostbuf' was passed, but no memory flags to make use of it.")
-
-        if hostbuf is None and pitches is not None:
-            raise Error("'pitches' may only be given if 'hostbuf' is given")
-
-        if context._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2):
-            if buffer is not None and is_array:
-                    raise ValueError(
-                            "'buffer' and 'is_array' are mutually exclusive")
-
-            if len(shape) == 3:
-                if buffer is not None:
-                    raise TypeError(
-                            "'buffer' argument is not supported for 3D arrays")
-                elif is_array:
-                    image_type = mem_object_type.IMAGE2D_ARRAY
-                else:
-                    image_type = mem_object_type.IMAGE3D
-
-            elif len(shape) == 2:
-                if buffer is not None:
-                    raise TypeError(
-                            "'buffer' argument is not supported for 2D arrays")
-                elif is_array:
-                    image_type = mem_object_type.IMAGE1D_ARRAY
-                else:
-                    image_type = mem_object_type.IMAGE2D
-
-            elif len(shape) == 1:
-                if buffer is not None:
-                    image_type = mem_object_type.IMAGE1D_BUFFER
-                elif is_array:
-                    raise TypeError("array of zero-dimensional images not supported")
-                else:
-                    image_type = mem_object_type.IMAGE1D
-
-            else:
-                raise ValueError("images cannot have more than three dimensions")
-
-            desc = ImageDescriptor()
-
-            desc.image_type = image_type
-            desc.shape = shape  # also sets desc.array_size
-
-            if pitches is None:
-                desc.pitches = (0, 0)
-            else:
-                desc.pitches = pitches
-
-            desc.num_mip_levels = 0  # per CL 1.2 spec
-            desc.num_samples = 0  # per CL 1.2 spec
-            desc.buffer = buffer
-
-            image_old_init(self, context, flags, format, desc, hostbuf)
-        else:
-            # legacy init for CL 1.1 and older
-            if is_array:
-                raise TypeError("'is_array=True' is not supported for CL < 1.2")
-            # if num_mip_levels is not None:
-                # raise TypeError(
-                #       "'num_mip_levels' argument is not supported for CL < 1.2")
-            # if num_samples is not None:
-                # raise TypeError(
-                #        "'num_samples' argument is not supported for CL < 1.2")
-            if buffer is not None:
-                raise TypeError("'buffer' argument is not supported for CL < 1.2")
-
-            image_old_init(self, context, flags, format, shape,
-                    pitches, hostbuf)
-
-    class _ImageInfoGetter:
-        def __init__(self, event):
-            from warnings import warn
-            warn("Image.image.attr is deprecated. "
-                    "Use Image.attr directly, instead.")
-
-            self.event = event
-
-        def __getattr__(self, name):
-            try:
-                inf_attr = getattr(_cl.image_info, name.upper())
-            except AttributeError:
-                raise AttributeError("%s has no attribute '%s'"
-                        % (type(self), name))
-            else:
-                return self.event.get_image_info(inf_attr)
-
-    def image_shape(self):
-        if self.type == mem_object_type.IMAGE2D:
-            return (self.width, self.height)
-        elif self.type == mem_object_type.IMAGE3D:
-            return (self.width, self.height, self.depth)
-        else:
-            raise LogicError("only images have shapes")
-
-    Image.__init__ = image_init
-    Image.image = property(_ImageInfoGetter)
-    Image.shape = property(image_shape)
-
-    # }}}
-
-    # {{{ Error
-
-    def error_str(self):
-        val = self.args[0]
-        try:
-            val.routine
-        except AttributeError:
-            return str(val)
-        else:
-            result = ""
-            if val.code() != status_code.SUCCESS:
-                result = status_code.to_string(
-                        val.code(), "<unknown error %d>")
-            routine = val.routine()
-            if routine:
-                result = "%s failed: %s" % (routine, result)
-            what = val.what()
-            if what:
-                if result:
-                    result += " - "
-                result += what
-            return result
-
-    def error_code(self):
-        return self.args[0].code()
-
-    def error_routine(self):
-        return self.args[0].routine()
-
-    def error_what(self):
-        return self.args[0].what()
-
-    Error.__str__ = error_str
-    Error.code = property(error_code)
-    Error.routine = property(error_routine)
-    Error.what = property(error_what)
-
-    # }}}
-
-    if _cl.have_gl():
-        def gl_object_get_gl_object(self):
-            return self.get_gl_object_info()[1]
-
-        GLBuffer.gl_object = property(gl_object_get_gl_object)
-        GLTexture.gl_object = property(gl_object_get_gl_object)
-
-_add_functionality()
-
-
-# {{{ convenience
+# {{{ create_some_context
 
 def create_some_context(interactive=None, answers=None, cache_dir=None):
     import os
@@ -1437,6 +632,10 @@ def create_some_context(interactive=None, answers=None, cache_dir=None):
 
 _csc = create_some_context
 
+# }}}
+
+
+# {{{ enqueue_copy
 
 def _mark_copy_deprecated(func):
     def new_func(*args, **kwargs):
@@ -1670,6 +869,7 @@ def enqueue_copy(queue, dest, src, **kwargs):
 
 # }}}
 
+
 # {{{ image creation
 
 DTYPE_TO_CHANNEL_TYPE = {
diff --git a/pyopencl/cache.py b/pyopencl/cache.py
index 5ac23a49b9ff26784e7c38991c537be94af00b9f..bbe1314f22d4586a19af80e427525c0c65a4b782 100644
--- a/pyopencl/cache.py
+++ b/pyopencl/cache.py
@@ -368,7 +368,7 @@ def _create_built_program_from_source_cached(ctx, src, options_bytes,
             if log is not None and log.strip())
 
     if message:
-        from pyopencl import compiler_output
+        from pyopencl.cffi_cl import compiler_output
         compiler_output(
                 "Built kernel retrieved from cache. Original from-source "
                 "build had warnings:\n"+message)
diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py
index 20e68401dff1ceadd3d07f4e5f05449f2c03726a..94a3a1e2a8a8f2798ca464f81ac92eb50b1654e4 100644
--- a/pyopencl/cffi_cl.py
+++ b/pyopencl/cffi_cl.py
@@ -27,12 +27,15 @@ THE SOFTWARE.
 """
 
 import six
-from six.moves import map, range, zip
+from six.moves import map, range, zip, intern
 
 import warnings
+from warnings import warn
 import numpy as np
 import sys
 
+from pytools import memoize_method
+
 from pyopencl._cffi import ffi as _ffi
 from .compyte.array import f_contiguous_strides, c_contiguous_strides
 
@@ -86,6 +89,7 @@ _lib.set_py_funcs(_py_gc, _py_ref, _py_deref, _py_call)
 # are we running on pypy?
 _PYPY = '__pypy__' in sys.builtin_module_names
 _CPY2 = not _PYPY and sys.version_info < (3,)
+_CPY26 = _CPY2 and sys.version_info < (2, 7)
 
 try:
     _unicode = eval('unicode')
@@ -267,80 +271,92 @@ def get_cl_header_version():
 _constants = {}
 
 
-class _NoInit(object):
+# {{{ constant classes
+
+class _ConstantsNamespace(object):
     def __init__(self):
         raise RuntimeError("This class cannot be instantiated.")
 
+    @classmethod
+    def to_string(cls, value, default_format=None):
+        for name in dir(cls):
+            if (not name.startswith("_") and getattr(cls, name) == value):
+                return name
+
+        if default_format is None:
+            raise ValueError("a name for value %d was not found in %s"
+                    % (value, cls.__name__))
+        else:
+            return default_format % value
 
-# {{{ constant classes
 
 # /!\ If you add anything here, add it to pyopencl/__init__.py as well.
 
-class program_kind(_NoInit):  # noqa
+class program_kind(_ConstantsNamespace):  # noqa
     pass
 
 
-class status_code(_NoInit):  # noqa
+class status_code(_ConstantsNamespace):  # noqa
     pass
 
 
-class platform_info(_NoInit):  # noqa
+class platform_info(_ConstantsNamespace):  # noqa
     pass
 
 
-class device_type(_NoInit):  # noqa
+class device_type(_ConstantsNamespace):  # noqa
     pass
 
 
-class device_info(_NoInit):  # noqa
+class device_info(_ConstantsNamespace):  # noqa
     pass
 
 
-class device_fp_config(_NoInit):  # noqa
+class device_fp_config(_ConstantsNamespace):  # noqa
     pass
 
 
-class device_mem_cache_type(_NoInit):  # noqa
+class device_mem_cache_type(_ConstantsNamespace):  # noqa
     pass
 
 
-class device_local_mem_type(_NoInit):  # noqa
+class device_local_mem_type(_ConstantsNamespace):  # noqa
     pass
 
 
-class device_exec_capabilities(_NoInit):  # noqa
+class device_exec_capabilities(_ConstantsNamespace):  # noqa
     pass
 
 
-class device_svm_capabilities(_NoInit):  # noqa
+class device_svm_capabilities(_ConstantsNamespace):  # noqa
     pass
 
 
-class command_queue_properties(_NoInit):  # noqa
+class command_queue_properties(_ConstantsNamespace):  # noqa
     pass
 
 
-class context_info(_NoInit):  # noqa
+class context_info(_ConstantsNamespace):  # noqa
     pass
 
 
-class gl_context_info(_NoInit):  # noqa
+class gl_context_info(_ConstantsNamespace):  # noqa
     pass
 
 
-class context_properties(_NoInit):  # noqa
+class context_properties(_ConstantsNamespace):  # noqa
     pass
 
 
-class command_queue_info(_NoInit):  # noqa
+class command_queue_info(_ConstantsNamespace):  # noqa
     pass
 
 
-class queue_properties(_NoInit):  # noqa
+class queue_properties(_ConstantsNamespace):  # noqa
     pass
 
 
-class mem_flags(_NoInit):  # noqa
+class mem_flags(_ConstantsNamespace):  # noqa
     @classmethod
     def _writable(cls, flags):
         return flags & (cls.READ_WRITE | cls.WRITE_ONLY)
@@ -358,126 +374,125 @@ class mem_flags(_NoInit):  # noqa
         return cls._writable(flags) and cls._hold_host(flags)
 
 
-class svm_mem_flags(_NoInit):  # noqa
+class svm_mem_flags(_ConstantsNamespace):  # noqa
     pass
 
 
-class channel_order(_NoInit):  # noqa
+class channel_order(_ConstantsNamespace):  # noqa
     pass
 
 
-class channel_type(_NoInit):  # noqa
+class channel_type(_ConstantsNamespace):  # noqa
     pass
 
 
-class mem_object_type(_NoInit):  # noqa
+class mem_object_type(_ConstantsNamespace):  # noqa
     pass
 
 
-class mem_info(_NoInit):  # noqa
+class mem_info(_ConstantsNamespace):  # noqa
     pass
 
 
-class image_info(_NoInit):  # noqa
+class image_info(_ConstantsNamespace):  # noqa
     pass
 
 
-class addressing_mode(_NoInit):  # noqa
+class addressing_mode(_ConstantsNamespace):  # noqa
     pass
 
 
-class filter_mode(_NoInit):  # noqa
+class filter_mode(_ConstantsNamespace):  # noqa
     pass
 
 
-class sampler_info(_NoInit):  # noqa
+class sampler_info(_ConstantsNamespace):  # noqa
     pass
 
 
-class map_flags(_NoInit):  # noqa
+class map_flags(_ConstantsNamespace):  # noqa
     pass
 
 
-class program_info(_NoInit):  # noqa
+class program_info(_ConstantsNamespace):  # noqa
     pass
 
 
-class program_build_info(_NoInit):  # noqa
+class program_build_info(_ConstantsNamespace):  # noqa
     pass
 
 
-class program_binary_type(_NoInit):  # noqa
+class program_binary_type(_ConstantsNamespace):  # noqa
     pass
 
 
-class kernel_info(_NoInit):  # noqa
+class kernel_info(_ConstantsNamespace):  # noqa
     pass
 
 
-class kernel_arg_info(_NoInit):  # noqa
+class kernel_arg_info(_ConstantsNamespace):  # noqa
     pass
 
 
-class kernel_arg_address_qualifier(_NoInit):  # noqa
+class kernel_arg_address_qualifier(_ConstantsNamespace):  # noqa
     pass
 
 
-class kernel_arg_access_qualifier(_NoInit):  # noqa
+class kernel_arg_access_qualifier(_ConstantsNamespace):  # noqa
     pass
 
 
-class kernel_arg_type_qualifier(_NoInit):  # noqa
+class kernel_arg_type_qualifier(_ConstantsNamespace):  # noqa
     pass
 
 
-class kernel_work_group_info(_NoInit):  # noqa
+class kernel_work_group_info(_ConstantsNamespace):  # noqa
     pass
 
 
-class event_info(_NoInit):  # noqa
+class event_info(_ConstantsNamespace):  # noqa
     pass
 
 
-class command_type(_NoInit):  # noqa
+class command_type(_ConstantsNamespace):  # noqa
     pass
 
 
-class command_execution_status(_NoInit):  # noqa
+class command_execution_status(_ConstantsNamespace):  # noqa
     pass
 
 
-class profiling_info(_NoInit):  # noqa
+class profiling_info(_ConstantsNamespace):  # noqa
     pass
 
 
-class mem_migration_flags(_NoInit):  # noqa
+class mem_migration_flags(_ConstantsNamespace):  # noqa
     pass
 
 
-class mem_migration_flags_ext(_NoInit):  # noqa
+class mem_migration_flags_ext(_ConstantsNamespace):  # noqa
     pass
 
 
-class device_partition_property(_NoInit):  # noqa
+class device_partition_property(_ConstantsNamespace):  # noqa
     pass
 
 
-class device_affinity_domain(_NoInit):  # noqa
+class device_affinity_domain(_ConstantsNamespace):  # noqa
     pass
 
 
-class gl_object_type(_NoInit):  # noqa
+class gl_object_type(_ConstantsNamespace):  # noqa
     pass
 
 
-class gl_texture_info(_NoInit):  # noqa
+class gl_texture_info(_ConstantsNamespace):  # noqa
     pass
 
 
-class migrate_mem_object_flags_ext(_NoInit):  # noqa
+class migrate_mem_object_flags_ext(_ConstantsNamespace):  # noqa
     pass
 
-
 # }}}
 
 _locals = locals()
@@ -524,6 +539,39 @@ class Error(Exception):
         else:
             super(Error, self).__init__(self._ErrorRecord(*a, **kw))
 
+    def __str__(self):
+        val = self.args[0]
+        try:
+            val.routine
+        except AttributeError:
+            return str(val)
+        else:
+            result = ""
+            if val.code() != status_code.SUCCESS:
+                result = status_code.to_string(
+                        val.code(), "<unknown error %d>")
+            routine = val.routine()
+            if routine:
+                result = "%s failed: %s" % (routine, result)
+            what = val.what()
+            if what:
+                if result:
+                    result += " - "
+                result += what
+            return result
+
+    @property
+    def code(self):
+        return self.args[0].code()
+
+    @property
+    def routine(self):
+        return self.args[0].routine()
+
+    @property
+    def what(self):
+        return self.args[0].what()
+
 
 class MemoryError(Error):
     pass
@@ -578,6 +626,20 @@ class Platform(_Common):
         return [Device._create(devices.ptr[0][i])
                 for i in range(devices.size[0])]
 
+    def __repr__(self):
+        return "<pyopencl.Platform '%s' at 0x%x>" % (self.name, self.int_ptr)
+
+    def _get_cl_version(self):
+        import re
+        version_string = self.version
+        match = re.match(r"^OpenCL ([0-9]+)\.([0-9]+) .*$", version_string)
+        if match is None:
+            raise RuntimeError("platform %s returned non-conformant "
+                               "platform version string '%s'" %
+                               (self, version_string))
+
+        return int(match.group(1)), int(match.group(2))
+
 
 def unload_platform_compiler(plat):
     _handle_error(_lib.platform__unload_compiler(plat.ptr))
@@ -605,6 +667,14 @@ class Device(_Common):
         return [Device._create(devices.ptr[0][i])
                 for i in range(devices.size[0])]
 
+    def __repr__(self):
+        return "<pyopencl.Device '%s' on '%s' at 0x%x>" % (
+                self.name.strip(), self.platform.name.strip(), self.int_ptr)
+
+    @property
+    def persistent_unique_id(self):
+        return (self.vendor, self.vendor_id, self.name, self.version)
+
 # }}}
 
 
@@ -687,6 +757,14 @@ class Context(_Common):
         self.ptr = _ctx[0]
         self.cache_dir = cache_dir
 
+    def __repr__(self):
+        return "<pyopencl.Context at 0x%x on %s>" % (self.int_ptr,
+                ", ".join(repr(dev) for dev in self.devices))
+
+    @memoize_method
+    def _get_cl_version(self):
+        return self.devices[0].platform._get_cl_version()
+
 # }}}
 
 
@@ -713,6 +791,16 @@ class CommandQueue(_Common):
     def flush(self):
         _handle_error(_lib.command_queue__flush(self.ptr))
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.finish()
+
+    def _get_cl_version(self):
+        return self.context._get_cl_version()
+
+
 # }}}
 
 
@@ -988,6 +1076,21 @@ class Buffer(MemoryObject):
 
 # {{{ Program
 
+class CompilerWarning(UserWarning):
+    pass
+
+
+def compiler_output(text):
+    import os
+    from warnings import warn
+    if int(os.environ.get("PYOPENCL_COMPILER_OUTPUT", "0")):
+        warn(text, CompilerWarning)
+    else:
+        warn("Non-empty compiler output encountered. Set the "
+                "environment variable PYOPENCL_COMPILER_OUTPUT=1 "
+                "to see more.", CompilerWarning)
+
+
 class _Program(_Common):
     _id = 'program'
 
@@ -1088,20 +1191,403 @@ class _Program(_Common):
                 ._setup(self)
                 for i in range(knls.size[0])]
 
+    def _get_build_logs(self):
+        build_logs = []
+        for dev in self.get_info(program_info.DEVICES):
+            try:
+                log = self.get_build_info(dev, program_build_info.LOG)
+            except:
+                log = "<error retrieving log>"
+
+            build_logs.append((dev, log))
+
+        return build_logs
+
+    def build(self, options_bytes, devices=None):
+        err = None
+        try:
+            self._build(options=options_bytes, devices=devices)
+        except Error as e:
+            what = e.what + "\n\n" + (75*"="+"\n").join(
+                    "Build on %s:\n\n%s" % (dev, log)
+                    for dev, log in self._get_build_logs())
+            code = e.code
+            routine = e.routine
+
+            err = RuntimeError(
+                    Error._ErrorRecord(
+                        what=lambda: what,
+                        code=lambda: code,
+                        routine=lambda: routine))
+
+        if err is not None:
+            # Python 3.2 outputs the whole list of currently active exceptions
+            # This serves to remove one (redundant) level from that nesting.
+            raise err
+
+        message = (75*"="+"\n").join(
+                "Build on %s succeeded, but said:\n\n%s" % (dev, log)
+                for dev, log in self._get_build_logs()
+                if log is not None and log.strip())
+
+        if message:
+            if self.kind() == program_kind.SOURCE:
+                build_type = "From-source build"
+            elif self.kind() == program_kind.BINARY:
+                build_type = "From-binary build"
+            else:
+                build_type = "Build"
+
+            compiler_output("%s succeeded, but resulted in non-empty logs:\n%s"
+                    % (build_type, message))
+
+        return self
+
 # }}}
 
 
 # {{{ Kernel
 
+# {{{ arg packing helpers
+
+_size_t_char = ({
+    8: 'Q',
+    4: 'L',
+    2: 'H',
+    1: 'B',
+})[_ffi.sizeof('size_t')]
+_type_char_map = {
+    'n': _size_t_char.lower(),
+    'N': _size_t_char
+}
+del _size_t_char
+
+# }}}
+
+
 class Kernel(_Common):
     _id = 'kernel'
 
     def __init__(self, program, name):
+        if not isinstance(program, _Program):
+            program = program._get_prg()
+
         ptr_kernel = _ffi.new('clobj_t*')
         _handle_error(_lib.create_kernel(ptr_kernel, program.ptr,
                                          _to_cstring(name)))
         self.ptr = ptr_kernel[0]
 
+        self._setup(program)
+
+    def _setup(self, prg):
+        self._source = getattr(prg, "_source", None)
+
+        self._generate_naive_call()
+        self._wg_info_cache = {}
+        return self
+
+    # {{{ code generation for __call__, set_args
+
+    def _set_set_args_body(self, body, num_passed_args):
+        from pytools.py_codegen import (
+                PythonFunctionGenerator,
+                PythonCodeGenerator,
+                Indentation)
+
+        arg_names = ["arg%d" % i for i in range(num_passed_args)]
+
+        # {{{ wrap in error handler
+
+        err_gen = PythonCodeGenerator()
+
+        def gen_error_handler():
+            err_gen("""
+                if current_arg is not None:
+                    args = [{args}]
+                    advice = ""
+                    from pyopencl.array import Array
+                    if isinstance(args[current_arg], Array):
+                        advice = " (perhaps you meant to pass 'array.data' " \
+                            "instead of the array itself?)"
+
+                    raise _cl.LogicError(
+                            "when processing argument #%d (1-based): %s%s"
+                            % (current_arg+1, str(e), advice))
+                else:
+                    raise
+                """
+                .format(args=", ".join(arg_names)))
+            err_gen("")
+
+        err_gen("try:")
+        with Indentation(err_gen):
+            err_gen.extend(body)
+        err_gen("except TypeError as e:")
+        with Indentation(err_gen):
+            gen_error_handler()
+        err_gen("except _cl.LogicError as e:")
+        with Indentation(err_gen):
+            gen_error_handler()
+
+        # }}}
+
+        def add_preamble(gen):
+            gen.add_to_preamble(
+                "import numpy as np")
+            gen.add_to_preamble(
+                "import pyopencl.cffi_cl as _cl")
+            gen.add_to_preamble(
+                "from pyopencl.cffi_cl import _lib, "
+                "_ffi, _handle_error, _CLKernelArg")
+            gen.add_to_preamble("from pyopencl import status_code")
+            gen.add_to_preamble("from struct import pack")
+            gen.add_to_preamble("")
+
+        # {{{ generate _enqueue
+
+        gen = PythonFunctionGenerator("enqueue_knl_%s" % self.function_name,
+                ["self", "queue", "global_size", "local_size"]
+                + arg_names
+                + ["global_offset=None", "g_times_l=None", "wait_for=None"])
+
+        add_preamble(gen)
+        gen.extend(err_gen)
+
+        gen("""
+            return _cl.enqueue_nd_range_kernel(queue, self, global_size, local_size,
+                    global_offset, wait_for, g_times_l=g_times_l)
+            """)
+
+        self._enqueue = gen.get_function()
+
+        # }}}
+
+        # {{{ generate set_args
+
+        gen = PythonFunctionGenerator("_set_args", ["self"] + arg_names)
+
+        add_preamble(gen)
+        gen.extend(err_gen)
+
+        self._set_args = gen.get_function()
+
+        # }}}
+
+    def _generate_buffer_arg_setter(self, gen, arg_idx, buf_var):
+        from pytools.py_codegen import Indentation
+
+        if _CPY2:
+            # https://github.com/numpy/numpy/issues/5381
+            gen("if isinstance({buf_var}, np.generic):".format(buf_var=buf_var))
+            with Indentation(gen):
+                gen("{buf_var} = np.getbuffer({buf_var})".format(buf_var=buf_var))
+
+        gen("""
+            c_buf, sz, _ = _cl._c_buffer_from_obj({buf_var})
+            status = _lib.kernel__set_arg_buf(self.ptr, {arg_idx}, c_buf, sz)
+            if status != _ffi.NULL:
+                _handle_error(status)
+            """
+            .format(arg_idx=arg_idx, buf_var=buf_var))
+
+    def _generate_bytes_arg_setter(self, gen, arg_idx, buf_var):
+        gen("""
+            status = _lib.kernel__set_arg_buf(self.ptr, {arg_idx},
+                {buf_var}, len({buf_var}))
+            if status != _ffi.NULL:
+                _handle_error(status)
+            """
+            .format(arg_idx=arg_idx, buf_var=buf_var))
+
+    def _generate_generic_arg_handler(self, gen, arg_idx, arg_var):
+        from pytools.py_codegen import Indentation
+
+        gen("""
+            if {arg_var} is None:
+                status = _lib.kernel__set_arg_null(self.ptr, {arg_idx})
+                if status != _ffi.NULL:
+                    _handle_error(status)
+            elif isinstance({arg_var}, _CLKernelArg):
+                self.set_arg({arg_idx}, {arg_var})
+            """
+            .format(arg_idx=arg_idx, arg_var=arg_var))
+
+        gen("else:")
+        with Indentation(gen):
+            self._generate_buffer_arg_setter(gen, arg_idx, arg_var)
+
+    def _generate_naive_call(self):
+        num_args = self.num_args
+
+        from pytools.py_codegen import PythonCodeGenerator
+        gen = PythonCodeGenerator()
+
+        if num_args == 0:
+            gen("pass")
+
+        for i in range(num_args):
+            gen("# process argument {arg_idx}".format(arg_idx=i))
+            gen("")
+            gen("current_arg = {arg_idx}".format(arg_idx=i))
+            self._generate_generic_arg_handler(gen, i, "arg%d" % i)
+            gen("")
+
+        self._set_set_args_body(gen, num_args)
+
+    def set_scalar_arg_dtypes(self, scalar_arg_dtypes):
+        self._scalar_arg_dtypes = scalar_arg_dtypes
+
+        # {{{ arg counting bug handling
+
+        # For example:
+        # https://github.com/pocl/pocl/issues/197
+        # (but Apple CPU has a similar bug)
+
+        work_around_arg_count_bug = False
+        warn_about_arg_count_bug = False
+
+        from pyopencl.characterize import has_struct_arg_count_bug
+
+        count_bug_per_dev = [
+                has_struct_arg_count_bug(dev)
+                for dev in self.context.devices]
+
+        from pytools import single_valued
+        if any(count_bug_per_dev):
+            if all(count_bug_per_dev):
+                work_around_arg_count_bug = single_valued(count_bug_per_dev)
+            else:
+                warn_about_arg_count_bug = True
+
+        fp_arg_count = 0
+
+        # }}}
+
+        cl_arg_idx = 0
+
+        from pytools.py_codegen import PythonCodeGenerator
+        gen = PythonCodeGenerator()
+
+        if not scalar_arg_dtypes:
+            gen("pass")
+
+        for arg_idx, arg_dtype in enumerate(scalar_arg_dtypes):
+            gen("# process argument {arg_idx}".format(arg_idx=arg_idx))
+            gen("")
+            gen("current_arg = {arg_idx}".format(arg_idx=arg_idx))
+            arg_var = "arg%d" % arg_idx
+
+            if arg_dtype is None:
+                self._generate_generic_arg_handler(gen, cl_arg_idx, arg_var)
+                cl_arg_idx += 1
+                gen("")
+                continue
+
+            arg_dtype = np.dtype(arg_dtype)
+
+            if arg_dtype.char == "V":
+                self._generate_generic_arg_handler(gen, cl_arg_idx, arg_var)
+                cl_arg_idx += 1
+
+            elif arg_dtype.kind == "c":
+                if warn_about_arg_count_bug:
+                    warn("{knl_name}: arguments include complex numbers, and "
+                            "some (but not all) of the target devices mishandle "
+                            "struct kernel arguments (hence the workaround is "
+                            "disabled".format(
+                                knl_name=self.function_name, stacklevel=2))
+
+                if arg_dtype == np.complex64:
+                    arg_char = "f"
+                elif arg_dtype == np.complex128:
+                    arg_char = "d"
+                else:
+                    raise TypeError("unexpected complex type: %s" % arg_dtype)
+
+                if (work_around_arg_count_bug == "pocl"
+                        and arg_dtype == np.complex128
+                        and fp_arg_count + 2 <= 8):
+                    gen(
+                            "buf = pack('{arg_char}', {arg_var}.real)"
+                            .format(arg_char=arg_char, arg_var=arg_var))
+                    self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf")
+                    cl_arg_idx += 1
+                    gen("current_arg = current_arg + 1000")
+                    gen(
+                            "buf = pack('{arg_char}', {arg_var}.imag)"
+                            .format(arg_char=arg_char, arg_var=arg_var))
+                    self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf")
+                    cl_arg_idx += 1
+
+                elif (work_around_arg_count_bug == "apple"
+                        and arg_dtype == np.complex128
+                        and fp_arg_count + 2 <= 8):
+                    raise NotImplementedError("No work-around to "
+                            "Apple's broken structs-as-kernel arg "
+                            "handling has been found. "
+                            "Cannot pass complex numbers to kernels.")
+
+                else:
+                    gen(
+                            "buf = pack('{arg_char}{arg_char}', "
+                            "{arg_var}.real, {arg_var}.imag)"
+                            .format(arg_char=arg_char, arg_var=arg_var))
+                    self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf")
+                    cl_arg_idx += 1
+
+                fp_arg_count += 2
+
+            elif arg_dtype.char in "IL" and _CPY26:
+                # Prevent SystemError: ../Objects/longobject.c:336: bad
+                # argument to internal function
+
+                gen(
+                        "buf = pack('{arg_char}', long({arg_var}))"
+                        .format(arg_char=arg_dtype.char, arg_var=arg_var))
+                self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf")
+                cl_arg_idx += 1
+
+            else:
+                if arg_dtype.kind == "f":
+                    fp_arg_count += 1
+
+                arg_char = arg_dtype.char
+                arg_char = _type_char_map.get(arg_char, arg_char)
+                gen(
+                        "buf = pack('{arg_char}', {arg_var})"
+                        .format(
+                            arg_char=arg_char,
+                            arg_var=arg_var))
+                self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf")
+                cl_arg_idx += 1
+
+            gen("")
+
+        if cl_arg_idx != self.num_args:
+            raise TypeError(
+                "length of argument list (%d) and "
+                "CL-generated number of arguments (%d) do not agree"
+                % (cl_arg_idx, self.num_args))
+
+        self._set_set_args_body(gen, len(scalar_arg_dtypes))
+
+    # }}}
+
+    def set_args(self, *args, **kwargs):
+        # Need to duplicate the 'self' argument for dynamically generated  method
+        return self._set_args(self, *args, **kwargs)
+
+    def __call__(self, queue, global_size, local_size, *args, **kwargs):
+        # __call__ can't be overridden directly, so we need this
+        # trampoline hack.
+        return self._enqueue(self, queue, global_size, local_size, *args, **kwargs)
+
+    def capture_call(self, filename, queue, global_size, local_size,
+            *args, **kwargs):
+        from pyopencl.capture_call import capture_kernel_call
+        capture_kernel_call(self, filename, queue, global_size, local_size,
+                *args, **kwargs)
+
     def set_arg(self, arg_index, arg):
         # If you change this, also change the kernel call generation logic.
         if arg is None:
@@ -1126,10 +1612,18 @@ class Kernel(_Common):
                                                    c_buf, size))
 
     def get_work_group_info(self, param, device):
+        try:
+            return self._wg_info_cache[param, device]
+        except KeyError:
+            pass
+
         info = _ffi.new('generic_info*')
         _handle_error(_lib.kernel__get_work_group_info(
             self.ptr, param, device.ptr, info))
-        return _generic_info_to_python(info)
+        result = _generic_info_to_python(info)
+
+        self._wg_info_cache[param, device] = result
+        return result
 
     def get_arg_info(self, idx, param):
         info = _ffi.new('generic_info*')
@@ -1162,6 +1656,24 @@ class Event(_Common):
                                                _ffi.new_handle(_func)))
 
 
+class ProfilingInfoGetter:
+    def __init__(self, event):
+        self.event = event
+
+    def __getattr__(self, name):
+        info_cls = profiling_info
+
+        try:
+            inf_attr = getattr(info_cls, name.upper())
+        except AttributeError:
+            raise AttributeError("%s has no attribute '%s'"
+                    % (type(self), name))
+        else:
+            return self.event.get_profiling_info(inf_attr)
+
+Event.profile = property(ProfilingInfoGetter)
+
+
 def wait_for_events(wait_for):
     _handle_error(_lib.wait_for_events(*_clobj_list(wait_for)))
 
@@ -1788,6 +2300,23 @@ class ImageFormat(object):
     def itemsize(self):
         return self.channel_count * self.dtype_size
 
+    def __repr__(self):
+        return "ImageFormat(%s, %s)" % (
+                channel_order.to_string(self.channel_order,
+                    "<unknown channel order 0x%x>"),
+                channel_type.to_string(self.channel_data_type,
+                    "<unknown channel data type 0x%x>"))
+
+    def __eq__(self, other):
+        return (self.channel_order == other.channel_order
+                and self.channel_data_type == other.channel_data_type)
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __hash__(self):
+        return hash((type(self), self.channel_order, self.channel_data_type))
+
 
 def get_supported_image_formats(context, flags, image_type):
     info = _ffi.new('generic_info*')
@@ -1887,7 +2416,7 @@ _fill_dtype_dict = {
 class Image(MemoryObject):
     _id = 'image'
 
-    def __init__(self, *args):
+    def __init_dispatch(self, *args):
         if len(args) == 5:
             # >= 1.2
             self.__init_1_2(*args)
@@ -1960,11 +2489,122 @@ class Image(MemoryObject):
             raise LogicError("invalid dimension",
                              status_code.INVALID_VALUE, "Image")
 
+    def __init__(self, context, flags, format, shape=None, pitches=None,
+            hostbuf=None, is_array=False, buffer=None):
+
+        if shape is None and hostbuf is None:
+            raise Error("'shape' must be passed if 'hostbuf' is not given")
+
+        if shape is None and hostbuf is not None:
+            shape = hostbuf.shape
+
+        if hostbuf is not None and not \
+                (flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)):
+            from warnings import warn
+            warn("'hostbuf' was passed, but no memory flags to make use of it.")
+
+        if hostbuf is None and pitches is not None:
+            raise Error("'pitches' may only be given if 'hostbuf' is given")
+
+        if context._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2):
+            if buffer is not None and is_array:
+                    raise ValueError(
+                            "'buffer' and 'is_array' are mutually exclusive")
+
+            if len(shape) == 3:
+                if buffer is not None:
+                    raise TypeError(
+                            "'buffer' argument is not supported for 3D arrays")
+                elif is_array:
+                    image_type = mem_object_type.IMAGE2D_ARRAY
+                else:
+                    image_type = mem_object_type.IMAGE3D
+
+            elif len(shape) == 2:
+                if buffer is not None:
+                    raise TypeError(
+                            "'buffer' argument is not supported for 2D arrays")
+                elif is_array:
+                    image_type = mem_object_type.IMAGE1D_ARRAY
+                else:
+                    image_type = mem_object_type.IMAGE2D
+
+            elif len(shape) == 1:
+                if buffer is not None:
+                    image_type = mem_object_type.IMAGE1D_BUFFER
+                elif is_array:
+                    raise TypeError("array of zero-dimensional images not supported")
+                else:
+                    image_type = mem_object_type.IMAGE1D
+
+            else:
+                raise ValueError("images cannot have more than three dimensions")
+
+            desc = ImageDescriptor()
+
+            desc.image_type = image_type
+            desc.shape = shape  # also sets desc.array_size
+
+            if pitches is None:
+                desc.pitches = (0, 0)
+            else:
+                desc.pitches = pitches
+
+            desc.num_mip_levels = 0  # per CL 1.2 spec
+            desc.num_samples = 0  # per CL 1.2 spec
+            desc.buffer = buffer
+
+            self.__init_dispatch(context, flags, format, desc, hostbuf)
+        else:
+            # legacy init for CL 1.1 and older
+            if is_array:
+                raise TypeError("'is_array=True' is not supported for CL < 1.2")
+            # if num_mip_levels is not None:
+                # raise TypeError(
+                #       "'num_mip_levels' argument is not supported for CL < 1.2")
+            # if num_samples is not None:
+                # raise TypeError(
+                #        "'num_samples' argument is not supported for CL < 1.2")
+            if buffer is not None:
+                raise TypeError("'buffer' argument is not supported for CL < 1.2")
+
+            self.__init_dispatch(context, flags, format, shape,
+                    pitches, hostbuf)
+
     def get_image_info(self, param):
         info = _ffi.new('generic_info*')
         _handle_error(_lib.image__get_image_info(self.ptr, param, info))
         return _generic_info_to_python(info)
 
+    @property
+    def shape(self):
+        if self.type == mem_object_type.IMAGE2D:
+            return (self.width, self.height)
+        elif self.type == mem_object_type.IMAGE3D:
+            return (self.width, self.height, self.depth)
+        else:
+            raise LogicError("only images have shapes")
+
+
+class _ImageInfoGetter:
+    def __init__(self, event):
+        from warnings import warn
+        warn("Image.image.attr is deprecated. "
+                "Use Image.attr directly, instead.")
+
+        self.event = event
+
+    def __getattr__(self, name):
+        try:
+            inf_attr = getattr(image_info, name.upper())
+        except AttributeError:
+            raise AttributeError("%s has no attribute '%s'"
+                    % (type(self), name))
+        else:
+            return self.event.get_image_info(inf_attr)
+
+Image.info = property(_ImageInfoGetter)
+
 # }}}
 
 
@@ -2044,4 +2684,66 @@ class DeviceTopologyAmd(object):
 
 # }}}
 
+
+# {{{ get_info monkeypatchery
+
+def add_get_info_attrs(cls, info_method, info_class, cacheable_attrs=None):
+    if cacheable_attrs is None:
+        cacheable_attrs = []
+
+    def make_getinfo(info_method, info_name, info_attr):
+        def result(self):
+            return info_method(self, info_attr)
+
+        return property(result)
+
+    def make_cacheable_getinfo(info_method, info_name, cache_attr, info_attr):
+        def result(self):
+            try:
+                return getattr(self, cache_attr)
+            except AttributeError:
+                pass
+
+            result = info_method(self, info_attr)
+            setattr(self, cache_attr, result)
+            return result
+
+        return property(result)
+
+    for info_name, info_value in six.iteritems(info_class.__dict__):
+        if info_name == "to_string" or info_name.startswith("_"):
+            continue
+
+        info_lower = info_name.lower()
+        info_constant = getattr(info_class, info_name)
+        if info_name in cacheable_attrs:
+            cache_attr = intern("_info_cache_"+info_lower)
+            setattr(cls, info_lower, make_cacheable_getinfo(
+                info_method, info_lower, cache_attr, info_constant))
+        else:
+            setattr(cls, info_lower, make_getinfo(
+                    info_method, info_name, info_constant))
+
+add_get_info_attrs(Platform, Platform.get_info, platform_info),
+add_get_info_attrs(Device, Device.get_info, device_info,
+                ["PLATFORM", "MAX_WORK_GROUP_SIZE", "MAX_COMPUTE_UNITS"])
+add_get_info_attrs(Context, Context.get_info, context_info)
+add_get_info_attrs(CommandQueue, CommandQueue.get_info, command_queue_info,
+                ["CONTEXT", "DEVICE"])
+add_get_info_attrs(Event, Event.get_info, event_info)
+add_get_info_attrs(MemoryObjectHolder, MemoryObjectHolder.get_info, mem_info)
+add_get_info_attrs(Image, Image.get_image_info, image_info)
+add_get_info_attrs(Kernel, Kernel.get_info, kernel_info)
+add_get_info_attrs(Sampler, Sampler.get_info, sampler_info)
+
+# }}}
+
+
+if have_gl():
+    def gl_object_get_gl_object(self):
+        return self.get_gl_object_info()[1]
+
+    GLBuffer.gl_object = property(gl_object_get_gl_object)
+    GLTexture.gl_object = property(gl_object_get_gl_object)
+
 # vim: foldmethod=marker