diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 3fe87b7c27cceda5a5fc405d85c8ef4a01565c3e..a3a5555a027bf81ef2b3e7defb20b430d2409c4d 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -63,8 +63,46 @@ def compiler_output(text): "to see more.", CompilerWarning) +# {{{ find pyopencl shipped source code + +def _find_pyopencl_include_path(): + from pkg_resources import Requirement, resource_filename + return resource_filename(Requirement.parse("pyopencl"), "pyopencl/cl") + +# }}} + + # {{{ Program (including caching support) +_DEFAULT_BUILD_OPTIONS = [] +_DEFAULT_INCLUDE_OPTIONS = ["-I", _find_pyopencl_include_path()] + +# map of platform.name to build options list +_PLAT_BUILD_OPTIONS = {} + + +def enable_debugging(platform_or_context): + """Enables debugging for all code subsequently compiled by + PyOpenCL on the passed *platform*. Alternatively, a context + may be passed. + """ + + if isinstance(platform_or_context, Context): + platform = platform_or_context.devices[0].platform + else: + platform = platform_or_context + + if "AMD Accelerated" in platform.name: + _PLAT_BUILD_OPTIONS.setdefault(platform.name, []).extend( + ["-g", "-O0"]) + import os + os.environ["CPU_MAX_COMPUTE_UNITS"] = "1" + else: + from warnings import warn + warn("do not know how to enable debugging on '%s'" + % platform.name) + + class Program(object): def __init__(self, arg1, arg2=None, arg3=None): if arg2 is None: @@ -88,8 +126,9 @@ class Program(object): self._prg = None else: - # 3-argument form: context, devices, binaries - self._prg = _cl._Program(arg1, arg2, arg3) + context, device, binaries = arg1, arg2, arg3 + self._context = context + self._prg = _cl._Program(context, device, binaries) def _get_prg(self): if self._prg is not None: @@ -140,7 +179,11 @@ class Program(object): if isinstance(options, str): options = [options] - options = options + ["-I", _find_pyopencl_include_path()] + options = (options + + _DEFAULT_BUILD_OPTIONS + + _DEFAULT_INCLUDE_OPTIONS + + _PLAT_BUILD_OPTIONS.get( + self._context.devices[0].platform.name, [])) import os forced_options = os.environ.get("PYOPENCL_BUILD_OPTIONS") @@ -660,15 +703,6 @@ def _add_functionality(): _add_functionality() -# {{{ find pyopencl shipped source code - -def _find_pyopencl_include_path(): - from pkg_resources import Requirement, resource_filename - return resource_filename(Requirement.parse("pyopencl"), "pyopencl/cl") - -# }}} - - # {{{ convenience def create_some_context(interactive=True, answers=None): diff --git a/pyopencl/capture_call.py b/pyopencl/capture_call.py index 1d6b2e2f9d2462b5c342fa1bf2ae39ee2028e5d7..ecc006d59a9f6259add02ffe373be58f65b11f14 100644 --- a/pyopencl/capture_call.py +++ b/pyopencl/capture_call.py @@ -119,6 +119,8 @@ def capture_kernel_call(kernel, filename, queue, g_size, l_size, *args, **kwargs cg("knl._arg_type_chars = %s" % repr(kernel._arg_type_chars)) cg("knl(queue, %s, %s," % (repr(g_size), repr(l_size))) cg(" %s)" % ", ".join(kernel_args)) + cg("") + cg("queue.finish()") # }}} diff --git a/pyopencl/scan.py b/pyopencl/scan.py index 38233f96d85819a36f4b9a2b78f46786612bfdfa..1b80960ae8a6eeb0c60674ac3dbbdd78e60cfac2 100644 --- a/pyopencl/scan.py +++ b/pyopencl/scan.py @@ -125,7 +125,7 @@ SCAN_INTERVALS_SOURCE = SHARED_PREAMBLE + r"""//CL// KERNEL REQD_WG_SIZE(WG_SIZE, 1, 1) -void ${name_prefix}_scan_intervals( +void ${kernel_name}( ${argument_signature}, GLOBAL_MEM scan_type *restrict partial_scan_buffer, const index_type N, @@ -784,7 +784,7 @@ _IGNORED_WORDS = set(""" get_local_size get_local_id cl_khr_fp64 reqd_work_group_size get_num_groups barrier get_group_id - _final_update _scan_intervals _debug_scan + _final_update _debug_scan kernel_name positions all padded integer its previous write based writes 0 has local worth scan_expr to read cannot not X items False bank @@ -1249,6 +1249,12 @@ class GenericScanKernel(_GenericScanKernelBase): wg_size = _round_down_to_power_of_2( min(max_wg_size, 256)) + kernel_name = self.code_variables["name_prefix"]+"_scan_intervals" + if is_first_level: + kernel_name += "_lev1" + else: + kernel_name += "_lev2" + scan_tpl = _make_template(SCAN_INTERVALS_SOURCE) scan_src = str(scan_tpl.render( wg_size=wg_size, @@ -1260,13 +1266,12 @@ class GenericScanKernel(_GenericScanKernelBase): is_first_level=is_first_level, store_segment_start_flags=store_segment_start_flags, use_bank_conflict_avoidance=use_bank_conflict_avoidance, + kernel_name=kernel_name, **self.code_variables)) prg = cl.Program(self.context, scan_src).build(self.options) - knl = getattr( - prg, - self.code_variables["name_prefix"]+"_scan_intervals") + knl = getattr(prg, kernel_name) scalar_arg_dtypes.extend( (None, self.index_dtype, self. index_dtype))