From b8c98645f2960697cc9a812d0b5a409dcf425baf Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Sat, 29 Oct 2011 20:26:34 -0400 Subject: [PATCH] Add user interface for dim length prescription, test for workgroup prescribed too small. --- MEMO | 4 ++-- loopy/check.py | 3 --- loopy/codegen/__init__.py | 2 +- loopy/kernel.py | 24 +++++++++++++++++++----- test/test_loopy.py | 32 ++++++++++++++++++++++++++++++++ 5 files changed, 54 insertions(+), 11 deletions(-) diff --git a/MEMO b/MEMO index e695da1be..4bbf91776 100644 --- a/MEMO +++ b/MEMO @@ -42,8 +42,6 @@ Things to consider To-do ^^^^^ -- user interface for dim length prescription - - Deal with equality constraints. (These arise, e.g., when partitioning a loop of length 16 into 16s.) @@ -86,6 +84,8 @@ Future ideas Dealt with ^^^^^^^^^^ +- user interface for dim length prescription + - Restrict-to-sequential and tagging have nothing to do with each other. -> Removed SequentialTag and turned it into a separate computed kernel property. diff --git a/loopy/check.py b/loopy/check.py index f7932a098..3717084dd 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -146,9 +146,6 @@ def check_for_write_races(kernel): # }}} - - - # {{{ sanity-check for implemented domains of each instruction def check_implemented_domains(kernel, implemented_domains): diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 564907ec9..b093c44df 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -318,7 +318,7 @@ def generate_code(kernel): # }}} from loopy.check import check_implemented_domains - #assert check_implemented_domains(kernel, gen_code.implemented_domains) + assert check_implemented_domains(kernel, gen_code.implemented_domains) return str(mod) diff --git a/loopy/kernel.py b/loopy/kernel.py index bc4588b87..4c31a30c0 100644 --- a/loopy/kernel.py +++ b/loopy/kernel.py @@ -438,6 +438,9 @@ class LoopKernel(Record): 'bulk' slabs with fewer conditionals. :ivar temporary_variables: :ivar iname_to_tag: + :ivar local_sizes: A dictionary from integers to integers, mapping + workgroup axes to ther sizes, e.g. *{0: 16}* forces axis 0 to be + length 16. The following two instance variables are only used until :func:`loopy.kernel.make_kernel` is finished: @@ -449,7 +452,7 @@ class LoopKernel(Record): preamble=None, assumptions=None, iname_slab_increments={}, temporary_variables={}, - workgroup_size=None, + local_sizes=None, iname_to_tag={}, iname_to_tag_requests=None): """ :arg domain: a :class:`islpy.BasicSet`, or a string parseable to a basic set by the isl. @@ -585,7 +588,7 @@ class LoopKernel(Record): assumptions=assumptions, iname_slab_increments=iname_slab_increments, temporary_variables=temporary_variables, - workgroup_size=workgroup_size, + local_sizes=local_sizes, iname_to_tag=iname_to_tag, iname_to_tag_requests=iname_to_tag_requests) @@ -761,11 +764,22 @@ class LoopKernel(Record): max_dims = self.device.max_work_item_dimensions - def to_dim_tuple(size_dict, which): + def to_dim_tuple(size_dict, which, forced_sizes={}): + forced_sizes = forced_sizes.copy() + size_list = [] sorted_axes = sorted(size_dict.iterkeys()) - while sorted_axes: + + while sorted_axes or forced_sizes: cur_axis = sorted_axes.pop(0) + + if len(size_list) in forced_sizes: + size_list.append( + isl.PwAff.from_aff( + isl.Aff.zero_on_domain(self.space.params()) + + forced_sizes.pop(len(size_list)))) + continue + while cur_axis > len(size_list): from loopy import LoopyAdvisory from warnings import warn @@ -782,7 +796,7 @@ class LoopKernel(Record): return tuple(size_list) return (to_dim_tuple(global_sizes, "global"), - to_dim_tuple(local_sizes, "local")) + to_dim_tuple(local_sizes, "local", forced_sizes=self.local_sizes)) def get_grid_sizes_as_exprs(self, ignore_auto=False): grid_size, group_size = self.get_grid_sizes(ignore_auto=ignore_auto) diff --git a/test/test_loopy.py b/test/test_loopy.py index 28561d56f..24446b0b9 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -38,6 +38,38 @@ def test_owed_barriers(ctx_factory): +def test_wg_too_small(ctx_factory): + dtype = np.float32 + ctx = ctx_factory() + order = "C" + queue = cl.CommandQueue(ctx, + properties=cl.command_queue_properties.PROFILING_ENABLE) + + knl = lp.make_kernel(ctx.devices[0], + "{[i]: 0<=i<100}", + [ + "[i:l.0] <float32> z[i] = a[i]" + ], + [ + lp.ArrayArg("a", dtype, shape=(100,)), + ], + local_sizes={0: 16}) + + kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.check_kernels(kernel_gen) + + for gen_knl in kernel_gen: + try: + compiled = lp.CompiledKernel(ctx, gen_knl) + except RuntimeError, e: + assert "implemented and desired" in str(e) + pass # expected! + else: + assert False # expecting an error + + + + if __name__ == "__main__": import sys if len(sys.argv) > 1: -- GitLab