From 82ff5b92de50138d7d8f7201ca90815a1ee29d37 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Tue, 14 Nov 2017 23:03:49 -0600 Subject: [PATCH 1/8] Add testing for sparse list --- pyopencl/algorithm.py | 4 +++- test/test_algorithm.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/pyopencl/algorithm.py b/pyopencl/algorithm.py index 5083b095..5f8287cf 100644 --- a/pyopencl/algorithm.py +++ b/pyopencl/algorithm.py @@ -736,7 +736,8 @@ class ListOfListsBuilder: def __init__(self, context, list_names_and_dtypes, generate_template, arg_decls, count_sharing=None, devices=None, name_prefix="plb_build_list", options=[], preamble="", - debug=False, complex_kernel=False): + debug=False, complex_kernel=False, + eliminate_empty_output_lists=False): """ :arg context: A :class:`pyopencl.Context`. :arg list_names_and_dtypes: a list of `(name, dtype)` tuples @@ -810,6 +811,7 @@ class ListOfListsBuilder: self.debug = debug self.complex_kernel = complex_kernel + self.eliminate_empty_output_lists = eliminate_empty_output_lists # {{{ kernel generators diff --git a/test/test_algorithm.py b/test/test_algorithm.py index 2e1537df..ab8fa6f7 100644 --- a/test/test_algorithm.py +++ b/test/test_algorithm.py @@ -847,6 +847,36 @@ def test_list_builder(ctx_factory): assert inf.count == 3000 assert (inf.lists.get()[-6:] == [1, 2, 2, 3, 3, 3]).all() + builder = ListOfListsBuilder( + context, + [("mylist1", np.int32), ("mylist2", np.int32)], + """//CL// + void generate(LIST_ARG_DECL USER_ARG_DECL index_type i) + { + if (i % 5 == 0) + { + for (int j = 0; j < 10; ++j) + { + APPEND_mylist1(j); + APPEND_mylist2(1); + } + } + } + """, + arg_decls=[], + eliminate_empty_output_lists=True) + + result, evt = builder(queue, 1000) + + mylist1 = result["mylist1"] + assert mylist1.count == 2000 + assert (mylist1.starts.get()[:5] == [0, 10, 20, 30, 40]).all() + assert (mylist1.indices.get()[:5] == [0, 5, 10, 15, 20]).all() + assert (mylist1.lists.get()[:5] == [0, 1, 2, 3, 4]).all() + mylist2 = result["mylist2"] + assert mylist2.count == 2000 + assert (mylist2.lists.get()[:5] == [1, 1, 1, 1, 1]).all() + def test_key_value_sorter(ctx_factory): from pytest import importorskip -- GitLab From 5c9bf8f1788b9829bd124b142dd066d518f6e4c4 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Thu, 16 Nov 2017 15:37:00 -0600 Subject: [PATCH 2/8] Update test case --- test/test_algorithm.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/test_algorithm.py b/test/test_algorithm.py index ab8fa6f7..3846d186 100644 --- a/test/test_algorithm.py +++ b/test/test_algorithm.py @@ -855,10 +855,10 @@ def test_list_builder(ctx_factory): { if (i % 5 == 0) { - for (int j = 0; j < 10; ++j) + for (int j = 0; j < i / 5; ++j) { APPEND_mylist1(j); - APPEND_mylist2(1); + APPEND_mylist2(j + 1); } } } @@ -869,13 +869,13 @@ def test_list_builder(ctx_factory): result, evt = builder(queue, 1000) mylist1 = result["mylist1"] - assert mylist1.count == 2000 - assert (mylist1.starts.get()[:5] == [0, 10, 20, 30, 40]).all() - assert (mylist1.indices.get()[:5] == [0, 5, 10, 15, 20]).all() - assert (mylist1.lists.get()[:5] == [0, 1, 2, 3, 4]).all() + assert mylist1.count == 19900 + assert (mylist1.starts.get()[:5] == [0, 1, 3, 6, 10]).all() + assert (mylist1.indices.get()[:5] == [5, 10, 15, 20, 25]).all() + assert (mylist1.lists.get()[:6] == [0, 0, 1, 0, 1, 2]).all() mylist2 = result["mylist2"] - assert mylist2.count == 2000 - assert (mylist2.lists.get()[:5] == [1, 1, 1, 1, 1]).all() + assert mylist2.count == 19900 + assert (mylist2.lists.get()[:6] == [1, 1, 2, 1, 2, 3]).all() def test_key_value_sorter(ctx_factory): -- GitLab From e9a08a52b40a5b9e0f28dd11ff42940d44189ae5 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Thu, 16 Nov 2017 23:35:45 -0600 Subject: [PATCH 3/8] Eliminate empty lists in ListOfListsBuilder --- pyopencl/algorithm.py | 79 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 3 deletions(-) diff --git a/pyopencl/algorithm.py b/pyopencl/algorithm.py index 5f8287cf..6e608da1 100644 --- a/pyopencl/algorithm.py +++ b/pyopencl/algorithm.py @@ -648,7 +648,12 @@ void ${kernel_name}(${kernel_list_arg_decl} USER_ARG_DECL index_type n) %if name not in count_sharing: index_type plb_${name}_index; if (plb_${name}_start_index) - plb_${name}_index = plb_${name}_start_index[i]; + %if eliminate_empty_output_lists: + plb_${name}_index = + plb_${name}_start_index[plb_${name}_mask_scan[i]]; + %else: + plb_${name}_index = plb_${name}_start_index[i]; + %endif else plb_${name}_index = 0; %endif @@ -826,6 +831,29 @@ class ListOfListsBuilder: output_statement="ary[i+1] = item;", devices=self.devices) + @memoize_method + def get_compress_kernel(self, index_dtype): + from pyopencl.scan import GenericScanKernel + return GenericScanKernel( + self.context, index_dtype, + arguments=Template(""" + __global ${index_t} *count, + __global ${index_t} *indices, + __global ${index_t} *mask_scan, + __global ${index_t} *num_non_empty_list + """).render(index_t=dtype_to_ctype(index_dtype)), + input_expr="count[i] == 0 ? 0 : 1", + scan_expr="a+b", neutral="0", + output_statement=""" + mask_scan[i + 1] = item; + if (prev_item != item) { + indices[item - 1] = i; + count[item - 1] = count[i]; + } + if (i + 1 == N) *num_non_empty_list = item; + """, + devices=self.devices) + def do_not_vectorize(self): from pytools import any return (self.complex_kernel @@ -860,6 +888,7 @@ class ListOfListsBuilder: self.context.devices), debug=self.debug, do_not_vectorize=self.do_not_vectorize(), + eliminate_empty_output_lists=self.eliminate_empty_output_lists, kernel_list_arg_decl=_get_arg_decl(kernel_list_args), kernel_list_arg_values=_get_arg_list(user_list_args, prefix="&"), @@ -910,6 +939,10 @@ class ListOfListsBuilder: kernel_list_args.append( VectorArg(index_dtype, "plb_%s_start_index" % name)) + if self.eliminate_empty_output_lists: + kernel_list_args.append( + VectorArg(index_dtype, "plb_%s_mask_scan" % name)) + index_name = "plb_%s_index" % name user_list_args.append(OtherArg("%s *%s" % ( index_ctype, index_name), index_name)) @@ -926,6 +959,7 @@ class ListOfListsBuilder: self.context.devices), debug=self.debug, do_not_vectorize=self.do_not_vectorize(), + eliminate_empty_output_lists=self.eliminate_empty_output_lists, kernel_list_arg_decl=_get_arg_decl(kernel_list_args), kernel_list_arg_values=kernel_list_arg_values, @@ -1016,6 +1050,8 @@ class ListOfListsBuilder: count_kernel = self.get_count_kernel(index_dtype) write_kernel = self.get_write_kernel(index_dtype) scan_kernel = self.get_scan_kernel(index_dtype) + if self.eliminate_empty_output_lists: + compress_kernel = self.get_compress_kernel(index_dtype) # {{{ allocate memory for counts @@ -1052,6 +1088,26 @@ class ListOfListsBuilder: *(tuple(count_list_args) + args + (n_objects,)), **dict(wait_for=wait_for)) + if self.eliminate_empty_output_lists: + for name, dtype in self.list_names_and_dtypes: + if name in omit_lists: + continue + + info_record = result[name] + info_record.indices = cl.array.empty( + queue, (n_objects + 1,), index_dtype, allocator=allocator) + info_record.num_nonempty_lists = cl.array.empty( + queue, (1,), index_dtype, allocator=allocator) + info_record.mask_scan = cl.array.empty( + queue, (n_objects + 1,), index_dtype, allocator=allocator) + info_record.mask_scan[0] = 0 + info_record.compress_events = compress_kernel( + info_record.starts, + info_record.indices, + info_record.mask_scan, + info_record.num_nonempty_lists, + wait_for=[count_event] + info_record.mask_scan.events) + # {{{ run scans scan_events = [] @@ -1063,9 +1119,23 @@ class ListOfListsBuilder: continue info_record = result[name] + if self.eliminate_empty_output_lists: + info_record.compress_events.wait() + num_nonempty_lists = info_record.num_nonempty_lists.get()[0] + info_record.num_nonempty_lists = num_nonempty_lists + info_record.starts = info_record.starts[:num_nonempty_lists + 1] + info_record.indices = info_record.indices[:num_nonempty_lists] + info_record.starts[-1] = 0 + starts_ary = info_record.starts - evt = scan_kernel(starts_ary, wait_for=[count_event], - size=n_objects) + if self.eliminate_empty_output_lists: + evt = scan_kernel( + starts_ary, + size=info_record.num_nonempty_lists, + wait_for=starts_ary.events) + else: + evt = scan_kernel(starts_ary, wait_for=[count_event], + size=n_objects) starts_ary.setitem(0, 0, queue=queue, wait_for=[evt]) scan_events.extend(starts_ary.events) @@ -1103,6 +1173,9 @@ class ListOfListsBuilder: if name not in self.count_sharing: write_list_args.append(info_record.starts.data) + if self.eliminate_empty_output_lists: + write_list_args.append(info_record.mask_scan.data) + # }}} evt = write_kernel(queue, gsize, lsize, -- GitLab From 3e4560e5d00cfec5ca5e5aac60fe2d323f5d7775 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Fri, 17 Nov 2017 09:16:38 -0600 Subject: [PATCH 4/8] Disable unicode in Mako template for Python 2 --- pyopencl/algorithm.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/pyopencl/algorithm.py b/pyopencl/algorithm.py index 6e608da1..40c6a814 100644 --- a/pyopencl/algorithm.py +++ b/pyopencl/algorithm.py @@ -833,15 +833,22 @@ class ListOfListsBuilder: @memoize_method def get_compress_kernel(self, index_dtype): + arguments = """ + __global ${index_t} *count, + __global ${index_t} *indices, + __global ${index_t} *mask_scan, + __global ${index_t} *num_non_empty_list + """ + from sys import version_info + if (version_info > (3, 0)): + arguments = Template(arguments) + else: + arguments = Template(arguments, disable_unicode=True) + from pyopencl.scan import GenericScanKernel return GenericScanKernel( self.context, index_dtype, - arguments=Template(""" - __global ${index_t} *count, - __global ${index_t} *indices, - __global ${index_t} *mask_scan, - __global ${index_t} *num_non_empty_list - """).render(index_t=dtype_to_ctype(index_dtype)), + arguments=arguments.render(index_t=dtype_to_ctype(index_dtype)), input_expr="count[i] == 0 ? 0 : 1", scan_expr="a+b", neutral="0", output_statement=""" -- GitLab From e5bf685a0c95a6e9866a7f38632b49f4a0f3ddb8 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Sun, 19 Nov 2017 11:02:39 -0600 Subject: [PATCH 5/8] Add documentation and minor tweaks --- pyopencl/algorithm.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/pyopencl/algorithm.py b/pyopencl/algorithm.py index 40c6a814..302c0a79 100644 --- a/pyopencl/algorithm.py +++ b/pyopencl/algorithm.py @@ -1026,7 +1026,17 @@ class ListOfListsBuilder: This implies that all lists are contiguous. - *event* is a :class:`pyopencl.Event` for dependency management. + If eliminate_empty_output_lists is set to True, *lists* has two + additional attributes ``num_nonempty_lists`` and ``indices`` + + * ``num_nonempty_lists`` for the number of nonempty lists. + * ``indices`` for the index of nonempty list in input objects. + + In this case, `starts` has `num_nonempty_lists` + 1 entries. The *i*'s + entry is the start of the *i*'th nonempty list, which is generated by + the object with index *indices[i]*. + + *event* is a :class:`pyopencl.Event` for dependency management. .. versionchanged:: 2016.2 @@ -1096,9 +1106,12 @@ class ListOfListsBuilder: **dict(wait_for=wait_for)) if self.eliminate_empty_output_lists: + compress_events = {} for name, dtype in self.list_names_and_dtypes: if name in omit_lists: continue + if name in self.count_sharing: + continue info_record = result[name] info_record.indices = cl.array.empty( @@ -1108,7 +1121,7 @@ class ListOfListsBuilder: info_record.mask_scan = cl.array.empty( queue, (n_objects + 1,), index_dtype, allocator=allocator) info_record.mask_scan[0] = 0 - info_record.compress_events = compress_kernel( + compress_events[name] = compress_kernel( info_record.starts, info_record.indices, info_record.mask_scan, @@ -1127,7 +1140,7 @@ class ListOfListsBuilder: info_record = result[name] if self.eliminate_empty_output_lists: - info_record.compress_events.wait() + compress_events[name].wait() num_nonempty_lists = info_record.num_nonempty_lists.get()[0] info_record.num_nonempty_lists = num_nonempty_lists info_record.starts = info_record.starts[:num_nonempty_lists + 1] -- GitLab From d4f97f89da57f8911b40b7c65da5982e15e90847 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Mon, 20 Nov 2017 23:11:22 -0600 Subject: [PATCH 6/8] Improve variable naming and documentation --- pyopencl/algorithm.py | 38 ++++++++++++++++++++------------------ test/test_algorithm.py | 2 +- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/pyopencl/algorithm.py b/pyopencl/algorithm.py index 302c0a79..dc3be4be 100644 --- a/pyopencl/algorithm.py +++ b/pyopencl/algorithm.py @@ -650,7 +650,7 @@ void ${kernel_name}(${kernel_list_arg_decl} USER_ARG_DECL index_type n) if (plb_${name}_start_index) %if eliminate_empty_output_lists: plb_${name}_index = - plb_${name}_start_index[plb_${name}_mask_scan[i]]; + plb_${name}_start_index[${name}_compressed_indices[i]]; %else: plb_${name}_index = plb_${name}_start_index[i]; %endif @@ -835,8 +835,8 @@ class ListOfListsBuilder: def get_compress_kernel(self, index_dtype): arguments = """ __global ${index_t} *count, - __global ${index_t} *indices, - __global ${index_t} *mask_scan, + __global ${index_t} *nonempty_indices, + __global ${index_t} *compressed_indices, __global ${index_t} *num_non_empty_list """ from sys import version_info @@ -852,9 +852,9 @@ class ListOfListsBuilder: input_expr="count[i] == 0 ? 0 : 1", scan_expr="a+b", neutral="0", output_statement=""" - mask_scan[i + 1] = item; + compressed_indices[i + 1] = item; if (prev_item != item) { - indices[item - 1] = i; + nonempty_indices[item - 1] = i; count[item - 1] = count[i]; } if (i + 1 == N) *num_non_empty_list = item; @@ -948,7 +948,7 @@ class ListOfListsBuilder: if self.eliminate_empty_output_lists: kernel_list_args.append( - VectorArg(index_dtype, "plb_%s_mask_scan" % name)) + VectorArg(index_dtype, "%s_compressed_indices" % name)) index_name = "plb_%s_index" % name user_list_args.append(OtherArg("%s *%s" % ( @@ -1026,15 +1026,16 @@ class ListOfListsBuilder: This implies that all lists are contiguous. - If eliminate_empty_output_lists is set to True, *lists* has two - additional attributes ``num_nonempty_lists`` and ``indices`` + If the *eliminate_empty_output_lists* constructor argument is set to + True, *lists* has two additional attributes ``num_nonempty_lists`` and + ``nonempty_indices`` * ``num_nonempty_lists`` for the number of nonempty lists. - * ``indices`` for the index of nonempty list in input objects. + * ``nonempty_indices`` for the index of nonempty list in input objects. In this case, `starts` has `num_nonempty_lists` + 1 entries. The *i*'s entry is the start of the *i*'th nonempty list, which is generated by - the object with index *indices[i]*. + the object with index *nonempty_indices[i]*. *event* is a :class:`pyopencl.Event` for dependency management. @@ -1114,19 +1115,19 @@ class ListOfListsBuilder: continue info_record = result[name] - info_record.indices = cl.array.empty( + info_record.nonempty_indices = cl.array.empty( queue, (n_objects + 1,), index_dtype, allocator=allocator) info_record.num_nonempty_lists = cl.array.empty( queue, (1,), index_dtype, allocator=allocator) - info_record.mask_scan = cl.array.empty( + info_record.compressed_indices = cl.array.empty( queue, (n_objects + 1,), index_dtype, allocator=allocator) - info_record.mask_scan[0] = 0 + info_record.compressed_indices[0] = 0 compress_events[name] = compress_kernel( info_record.starts, - info_record.indices, - info_record.mask_scan, + info_record.nonempty_indices, + info_record.compressed_indices, info_record.num_nonempty_lists, - wait_for=[count_event] + info_record.mask_scan.events) + wait_for=[count_event] + info_record.compressed_indices.events) # {{{ run scans @@ -1144,7 +1145,8 @@ class ListOfListsBuilder: num_nonempty_lists = info_record.num_nonempty_lists.get()[0] info_record.num_nonempty_lists = num_nonempty_lists info_record.starts = info_record.starts[:num_nonempty_lists + 1] - info_record.indices = info_record.indices[:num_nonempty_lists] + info_record.nonempty_indices = \ + info_record.nonempty_indices[:num_nonempty_lists] info_record.starts[-1] = 0 starts_ary = info_record.starts @@ -1194,7 +1196,7 @@ class ListOfListsBuilder: write_list_args.append(info_record.starts.data) if self.eliminate_empty_output_lists: - write_list_args.append(info_record.mask_scan.data) + write_list_args.append(info_record.compressed_indices.data) # }}} diff --git a/test/test_algorithm.py b/test/test_algorithm.py index 3846d186..ee32278b 100644 --- a/test/test_algorithm.py +++ b/test/test_algorithm.py @@ -871,7 +871,7 @@ def test_list_builder(ctx_factory): mylist1 = result["mylist1"] assert mylist1.count == 19900 assert (mylist1.starts.get()[:5] == [0, 1, 3, 6, 10]).all() - assert (mylist1.indices.get()[:5] == [5, 10, 15, 20, 25]).all() + assert (mylist1.nonempty_indices.get()[:5] == [5, 10, 15, 20, 25]).all() assert (mylist1.lists.get()[:6] == [0, 0, 1, 0, 1, 2]).all() mylist2 = result["mylist2"] assert mylist2.count == 19900 -- GitLab From 764145ca3898ff5dc2ac24d1b55472edcee92508 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Thu, 23 Nov 2017 11:07:15 -0600 Subject: [PATCH 7/8] Make BuiltList immutable and add copyright notice --- pyopencl/algorithm.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pyopencl/algorithm.py b/pyopencl/algorithm.py index dc3be4be..152fa3a5 100644 --- a/pyopencl/algorithm.py +++ b/pyopencl/algorithm.py @@ -5,7 +5,8 @@ from __future__ import absolute_import from six.moves import range from six.moves import zip -__copyright__ = """Copyright 2011-2012 Andreas Kloeckner""" +__copyright__ = """Copyright 2011-2012 Andreas Kloeckner \ + Copyright 2017 Hao Gao""" __license__ = """ Permission is hereby granted, free of charge, to any person @@ -1087,7 +1088,12 @@ class ListOfListsBuilder: # The scan will turn the "counts" array into the "starts" array # in-place. - result[name] = BuiltList(starts=counts) + if self.eliminate_empty_output_lists: + result[name] = BuiltList(count=None, starts=counts, lists=None, + num_nonempty_lists=None, + nonempty_indices=None) + else: + result[name] = BuiltList(count=None, starts=counts, lists=None) count_list_args.append(counts.data) # }}} -- GitLab From 7ce65d47df1ec478e2969eb8fc2f5dec76df07e8 Mon Sep 17 00:00:00 2001 From: Hao Gao Date: Thu, 23 Nov 2017 12:22:58 -0600 Subject: [PATCH 8/8] Fix indentation --- pyopencl/algorithm.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyopencl/algorithm.py b/pyopencl/algorithm.py index 152fa3a5..73d87154 100644 --- a/pyopencl/algorithm.py +++ b/pyopencl/algorithm.py @@ -651,7 +651,9 @@ void ${kernel_name}(${kernel_list_arg_decl} USER_ARG_DECL index_type n) if (plb_${name}_start_index) %if eliminate_empty_output_lists: plb_${name}_index = - plb_${name}_start_index[${name}_compressed_indices[i]]; + plb_${name}_start_index[ + ${name}_compressed_indices[i] + ]; %else: plb_${name}_index = plb_${name}_start_index[i]; %endif -- GitLab