From ce319df795fda85fe9d1fe7aad8e268b6ad898be Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Fri, 7 Dec 2012 19:18:25 -0500 Subject: [PATCH] Minor scan fixes. --- pyopencl/scan.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pyopencl/scan.py b/pyopencl/scan.py index 6d22f0c7..ddd69dda 100644 --- a/pyopencl/scan.py +++ b/pyopencl/scan.py @@ -144,7 +144,7 @@ void ${name_prefix}_scan_intervals( ) { // index K in first dimension used for carry storage - %if scan_dtype.itemsize > 4 and scan_dtype.itemsize % 8 == 0: + %if scan_dtype.itemsize > 4 and scan_dtype.itemsize % 8 == 0 and is_gpu: // Avoid bank conflicts by adding a single 32-bit value to the size of // the scan type. struct __attribute__ ((__packed__)) wrapped_scan_type @@ -756,7 +756,8 @@ _PREFIX_WORDS = set(""" first_seg_start_in_interval g_segment_start_flags group_base seg_end my_val DEBUG ARGS ints_to_store ints_per_wg scan_types_per_int linear_index - linear_scan_data_idx dest src store_base + linear_scan_data_idx dest src store_base wrapped_scan_type + dummy LID_2 LID_1 LID_0 LDIM_0 LDIM_1 LDIM_2 @@ -765,11 +766,11 @@ _PREFIX_WORDS = set(""" """.split()) _IGNORED_WORDS = set(""" - 4 32 + 4 8 32 typedef for endfor if void while endwhile endfor endif else const printf None return bool n char true false ifdef pycl_printf str xrange assert - np iinfo max itemsize + np iinfo max itemsize __packed__ struct set iteritems len setdefault @@ -802,7 +803,7 @@ _IGNORED_WORDS = set(""" branch workgroup complicated granularity phase remainder than simpler We smaller look ifs lots self behind allow barriers whole loop after already Observe achieve contiguous stores hard go with by math - size won t way divisible bit so + size won t way divisible bit so Avoid declare adding single type is_tail is_first_level input_expr argument_signature preamble double_support neutral output_statement @@ -813,6 +814,7 @@ _IGNORED_WORDS = set(""" update_loop_lookbehind update_loop_plain update_loop use_lookbehind_update store_segment_start_flags update_loop first_seg scan_dtype dtype_to_ctype + is_gpu a b prev_item i last_item prev_value N NO_SEG_BOUNDARY across_seg_boundary @@ -1010,6 +1012,7 @@ class _GenericScanKernelBase(object): arg_ctypes=arg_ctypes, scan_expr=_process_code_for_macro(scan_expr), neutral=_process_code_for_macro(neutral), + is_gpu=self.devices[0].type == cl.device_type.GPU, double_support=all( has_double_support(dev) for dev in devices), ) -- GitLab