diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py
index 3fe87b7c27cceda5a5fc405d85c8ef4a01565c3e..a3a5555a027bf81ef2b3e7defb20b430d2409c4d 100644
--- a/pyopencl/__init__.py
+++ b/pyopencl/__init__.py
@@ -63,8 +63,46 @@ def compiler_output(text):
                 "to see more.", CompilerWarning)
 
 
+# {{{ find pyopencl shipped source code
+
+def _find_pyopencl_include_path():
+    from pkg_resources import Requirement, resource_filename
+    return resource_filename(Requirement.parse("pyopencl"), "pyopencl/cl")
+
+# }}}
+
+
 # {{{ Program (including caching support)
 
+_DEFAULT_BUILD_OPTIONS = []
+_DEFAULT_INCLUDE_OPTIONS = ["-I", _find_pyopencl_include_path()]
+
+# map of platform.name to build options list
+_PLAT_BUILD_OPTIONS = {}
+
+
+def enable_debugging(platform_or_context):
+    """Enables debugging for all code subsequently compiled by
+    PyOpenCL on the passed *platform*. Alternatively, a context
+    may be passed.
+    """
+
+    if isinstance(platform_or_context, Context):
+        platform = platform_or_context.devices[0].platform
+    else:
+        platform = platform_or_context
+
+    if "AMD Accelerated" in platform.name:
+        _PLAT_BUILD_OPTIONS.setdefault(platform.name, []).extend(
+                ["-g", "-O0"])
+        import os
+        os.environ["CPU_MAX_COMPUTE_UNITS"] = "1"
+    else:
+        from warnings import warn
+        warn("do not know how to enable debugging on '%s'"
+                % platform.name)
+
+
 class Program(object):
     def __init__(self, arg1, arg2=None, arg3=None):
         if arg2 is None:
@@ -88,8 +126,9 @@ class Program(object):
             self._prg = None
 
         else:
-            # 3-argument form: context, devices, binaries
-            self._prg = _cl._Program(arg1, arg2, arg3)
+            context, device, binaries = arg1, arg2, arg3
+            self._context = context
+            self._prg = _cl._Program(context, device, binaries)
 
     def _get_prg(self):
         if self._prg is not None:
@@ -140,7 +179,11 @@ class Program(object):
         if isinstance(options, str):
             options = [options]
 
-        options = options + ["-I", _find_pyopencl_include_path()]
+        options = (options
+                + _DEFAULT_BUILD_OPTIONS
+                + _DEFAULT_INCLUDE_OPTIONS
+                + _PLAT_BUILD_OPTIONS.get(
+                    self._context.devices[0].platform.name, []))
 
         import os
         forced_options = os.environ.get("PYOPENCL_BUILD_OPTIONS")
@@ -660,15 +703,6 @@ def _add_functionality():
 _add_functionality()
 
 
-# {{{ find pyopencl shipped source code
-
-def _find_pyopencl_include_path():
-    from pkg_resources import Requirement, resource_filename
-    return resource_filename(Requirement.parse("pyopencl"), "pyopencl/cl")
-
-# }}}
-
-
 # {{{ convenience
 
 def create_some_context(interactive=True, answers=None):
diff --git a/pyopencl/capture_call.py b/pyopencl/capture_call.py
index 1d6b2e2f9d2462b5c342fa1bf2ae39ee2028e5d7..ecc006d59a9f6259add02ffe373be58f65b11f14 100644
--- a/pyopencl/capture_call.py
+++ b/pyopencl/capture_call.py
@@ -119,6 +119,8 @@ def capture_kernel_call(kernel, filename, queue, g_size, l_size, *args, **kwargs
             cg("knl._arg_type_chars = %s" % repr(kernel._arg_type_chars))
         cg("knl(queue, %s, %s," % (repr(g_size), repr(l_size)))
         cg("    %s)" % ", ".join(kernel_args))
+        cg("")
+        cg("queue.finish()")
 
     # }}}
 
diff --git a/pyopencl/scan.py b/pyopencl/scan.py
index 38233f96d85819a36f4b9a2b78f46786612bfdfa..1b80960ae8a6eeb0c60674ac3dbbdd78e60cfac2 100644
--- a/pyopencl/scan.py
+++ b/pyopencl/scan.py
@@ -125,7 +125,7 @@ SCAN_INTERVALS_SOURCE = SHARED_PREAMBLE + r"""//CL//
 
 KERNEL
 REQD_WG_SIZE(WG_SIZE, 1, 1)
-void ${name_prefix}_scan_intervals(
+void ${kernel_name}(
     ${argument_signature},
     GLOBAL_MEM scan_type *restrict partial_scan_buffer,
     const index_type N,
@@ -784,7 +784,7 @@ _IGNORED_WORDS = set("""
         get_local_size get_local_id cl_khr_fp64 reqd_work_group_size
         get_num_groups barrier get_group_id
 
-        _final_update _scan_intervals _debug_scan
+        _final_update _debug_scan kernel_name
 
         positions all padded integer its previous write based writes 0
         has local worth scan_expr to read cannot not X items False bank
@@ -1249,6 +1249,12 @@ class GenericScanKernel(_GenericScanKernelBase):
         wg_size = _round_down_to_power_of_2(
                 min(max_wg_size, 256))
 
+        kernel_name = self.code_variables["name_prefix"]+"_scan_intervals"
+        if is_first_level:
+            kernel_name += "_lev1"
+        else:
+            kernel_name += "_lev2"
+
         scan_tpl = _make_template(SCAN_INTERVALS_SOURCE)
         scan_src = str(scan_tpl.render(
             wg_size=wg_size,
@@ -1260,13 +1266,12 @@ class GenericScanKernel(_GenericScanKernelBase):
             is_first_level=is_first_level,
             store_segment_start_flags=store_segment_start_flags,
             use_bank_conflict_avoidance=use_bank_conflict_avoidance,
+            kernel_name=kernel_name,
             **self.code_variables))
 
         prg = cl.Program(self.context, scan_src).build(self.options)
 
-        knl = getattr(
-                prg,
-                self.code_variables["name_prefix"]+"_scan_intervals")
+        knl = getattr(prg, kernel_name)
 
         scalar_arg_dtypes.extend(
                 (None, self.index_dtype, self. index_dtype))