diff --git a/doc/source/algorithm.rst b/doc/source/algorithm.rst index 17a86176e57886fa150d5057028212a4523d3100..adc61b2723372873ff67daa37c21eaa8ea95d23f 100644 --- a/doc/source/algorithm.rst +++ b/doc/source/algorithm.rst @@ -281,3 +281,6 @@ Building many variable-size lists .. automethod:: __call__ +.. autoclass:: KeyValueSorter + + .. automethod:: __call__ diff --git a/pyopencl/algorithm.py b/pyopencl/algorithm.py index e51e30a88714f3b50ac1fea72e9bd888b267677b..88c297e1ee542bfa02af4ae66eea39176cbe53af 100644 --- a/pyopencl/algorithm.py +++ b/pyopencl/algorithm.py @@ -998,4 +998,122 @@ class ListOfListsBuilder: # }}} +# {{{ key-value sorting + +class _KernelInfo(Record): + pass + +def _make_cl_int_literal(value, dtype): + iinfo = np.iinfo(dtype) + result = str(int(value)) + if dtype.itemsize == 8: + result += "l" + if int(iinfo.min) < 0: + result += "u" + + return result + +class KeyValueSorter(object): + """Given arrays *values* and *keys* of equal length + and a number *nkeys* of keys, returns a tuple `(starts, + lists)`, as follows: *values* and *keys* are sorted + by *keys*, and the sorted *values* is returned as + *lists*. Then for each index *i* in `range(nkeys)`, + *starts[i]* is written to indicating where the + group of *values* belonging to the key with index + *i* begins. It implicitly ends at *starts[i+1]*. + + `starts` is built so that it has `nkeys+1` entries, so that + the *i*'th entry is the start of the *i*'th list, and the + *i*'th entry is the index one past the *i*'th list's end, + even for the last list. + + This implies that all lists are contiguous. + + .. note:: This functionality is provided as a preview. Its + interface is subject to change until this notice is removed. + + .. versionadded:: 2013.1 + """ + + def __init__(self, context): + self.context = context + + @memoize_method + def get_kernels(self, key_dtype, value_dtype, starts_dtype): + from pyopencl.algorithm import RadixSort + from pyopencl.tools import VectorArg, ScalarArg + + by_target_sorter = RadixSort( + self.context, [ + VectorArg(value_dtype, "values"), + VectorArg(key_dtype, "keys"), + ], + key_expr="keys[i]", + sort_arg_names=["values", "keys"]) + + from pyopencl.elementwise import ElementwiseTemplate + start_finder = ElementwiseTemplate( + arguments="""//CL// + starts_t *key_group_starts, + key_t *keys_sorted_by_key, + """, + + operation=r"""//CL// + key_t my_key = keys_sorted_by_key[i]; + + if (i == 0 || my_key != keys_sorted_by_key[i-1]) + key_group_starts[my_key] = i; + """, + name="find_starts").build(self.context, + type_values=( + ("key_t", starts_dtype), + ("starts_t", starts_dtype), + ), + var_values=()) + + from pyopencl.scan import GenericScanKernel + bound_propagation_scan = GenericScanKernel( + self.context, starts_dtype, + arguments=[ + VectorArg(starts_dtype, "starts"), + # starts has length n+1 + ScalarArg(key_dtype, "nkeys"), + ], + input_expr="starts[nkeys-i]", + scan_expr="min(a, b)", + neutral=_make_cl_int_literal( + np.iinfo(starts_dtype).max, starts_dtype), + output_statement="starts[nkeys-i] = item;") + + return _KernelInfo( + by_target_sorter=by_target_sorter, + start_finder=start_finder, + bound_propagation_scan=bound_propagation_scan) + + def __call__(self, queue, keys, values, nkeys, + starts_dtype, allocator=None): + if allocator is None: + allocator = values.allocator + + knl_info = self.get_kernels(keys.dtype, values.dtype, + starts_dtype) + + (values_sorted_by_key, keys_sorted_by_key + ) = knl_info.by_target_sorter( + values, keys, queue=queue) + + starts = cl.array.empty(queue, (nkeys+1), starts_dtype, + allocator=allocator) \ + .fill(len(values_sorted_by_key)) + + knl_info.start_finder(starts, keys_sorted_by_key, + range=slice(len(keys_sorted_by_key))) + + knl_info.bound_propagation_scan(starts, nkeys, queue=queue) + + return starts, values_sorted_by_key + +# }}} + # vim: filetype=pyopencl:fdm=marker diff --git a/test/test_algorithm.py b/test/test_algorithm.py index 0294eddd890e93f7bbdfa630e64230e31439172e..c927fee4bd96dcabef3a341646beb9f2f682af27 100644 --- a/test/test_algorithm.py +++ b/test/test_algorithm.py @@ -759,6 +759,34 @@ def test_list_builder(ctx_factory): assert inf.count == 3000 assert (inf.lists.get()[-6:] == [1, 2, 2, 3, 3, 3]).all() +@pytools.test.mark_test.opencl +def test_key_value_sorter(ctx_factory): + context = ctx_factory() + queue = cl.CommandQueue(context) + + n = 10**5 + nkeys = 2000 + from pyopencl.clrandom import rand as clrand + keys = clrand(queue, n, np.int32, b=nkeys) + values = clrand(queue, n, np.int32, b=n).astype(np.int64) + + assert np.max(keys.get()) < nkeys + + from pyopencl.algorithm import KeyValueSorter + kvs = KeyValueSorter(context) + starts, lists = kvs(queue, keys, values, nkeys, starts_dtype=np.int32) + + starts = starts.get() + lists = lists.get() + + mydict = dict() + for k, v in zip(keys.get(), values.get()): + mydict.setdefault(k, []).append(v) + + for i in xrange(nkeys): + start, end = starts[i:i+2] + assert sorted(mydict[i]) == sorted(lists[start:end]) + # }}}