diff --git a/MEMO b/MEMO index e695da1bee3c371e30f660e1163e4d0eb0fe4eef..4bbf9177621cc60840357c64a3b2be0c72305778 100644 --- a/MEMO +++ b/MEMO @@ -42,8 +42,6 @@ Things to consider To-do ^^^^^ -- user interface for dim length prescription - - Deal with equality constraints. (These arise, e.g., when partitioning a loop of length 16 into 16s.) @@ -86,6 +84,8 @@ Future ideas Dealt with ^^^^^^^^^^ +- user interface for dim length prescription + - Restrict-to-sequential and tagging have nothing to do with each other. -> Removed SequentialTag and turned it into a separate computed kernel property. diff --git a/loopy/check.py b/loopy/check.py index f7932a098ee52d775bcc352dc19dfc0d3b156307..3717084dd9610024676d36d89c20648bf8fb1801 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -146,9 +146,6 @@ def check_for_write_races(kernel): # }}} - - - # {{{ sanity-check for implemented domains of each instruction def check_implemented_domains(kernel, implemented_domains): diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 564907ec9800f9cbc06b481b6d621f2b0ba2219d..b093c44df718ac381c6cb3744804dde885a6e5ef 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -318,7 +318,7 @@ def generate_code(kernel): # }}} from loopy.check import check_implemented_domains - #assert check_implemented_domains(kernel, gen_code.implemented_domains) + assert check_implemented_domains(kernel, gen_code.implemented_domains) return str(mod) diff --git a/loopy/kernel.py b/loopy/kernel.py index bc4588b8705b9810f25284cae720393403df0649..4c31a30c05d73407553575182850131912c535c8 100644 --- a/loopy/kernel.py +++ b/loopy/kernel.py @@ -438,6 +438,9 @@ class LoopKernel(Record): 'bulk' slabs with fewer conditionals. :ivar temporary_variables: :ivar iname_to_tag: + :ivar local_sizes: A dictionary from integers to integers, mapping + workgroup axes to ther sizes, e.g. *{0: 16}* forces axis 0 to be + length 16. The following two instance variables are only used until :func:`loopy.kernel.make_kernel` is finished: @@ -449,7 +452,7 @@ class LoopKernel(Record): preamble=None, assumptions=None, iname_slab_increments={}, temporary_variables={}, - workgroup_size=None, + local_sizes=None, iname_to_tag={}, iname_to_tag_requests=None): """ :arg domain: a :class:`islpy.BasicSet`, or a string parseable to a basic set by the isl. @@ -585,7 +588,7 @@ class LoopKernel(Record): assumptions=assumptions, iname_slab_increments=iname_slab_increments, temporary_variables=temporary_variables, - workgroup_size=workgroup_size, + local_sizes=local_sizes, iname_to_tag=iname_to_tag, iname_to_tag_requests=iname_to_tag_requests) @@ -761,11 +764,22 @@ class LoopKernel(Record): max_dims = self.device.max_work_item_dimensions - def to_dim_tuple(size_dict, which): + def to_dim_tuple(size_dict, which, forced_sizes={}): + forced_sizes = forced_sizes.copy() + size_list = [] sorted_axes = sorted(size_dict.iterkeys()) - while sorted_axes: + + while sorted_axes or forced_sizes: cur_axis = sorted_axes.pop(0) + + if len(size_list) in forced_sizes: + size_list.append( + isl.PwAff.from_aff( + isl.Aff.zero_on_domain(self.space.params()) + + forced_sizes.pop(len(size_list)))) + continue + while cur_axis > len(size_list): from loopy import LoopyAdvisory from warnings import warn @@ -782,7 +796,7 @@ class LoopKernel(Record): return tuple(size_list) return (to_dim_tuple(global_sizes, "global"), - to_dim_tuple(local_sizes, "local")) + to_dim_tuple(local_sizes, "local", forced_sizes=self.local_sizes)) def get_grid_sizes_as_exprs(self, ignore_auto=False): grid_size, group_size = self.get_grid_sizes(ignore_auto=ignore_auto) diff --git a/test/test_loopy.py b/test/test_loopy.py index 28561d56f426de49bbbc3feb2f0e6534636ec199..24446b0b9a3f9bdad0e67417a10255088ac812f7 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -38,6 +38,38 @@ def test_owed_barriers(ctx_factory): +def test_wg_too_small(ctx_factory): + dtype = np.float32 + ctx = ctx_factory() + order = "C" + queue = cl.CommandQueue(ctx, + properties=cl.command_queue_properties.PROFILING_ENABLE) + + knl = lp.make_kernel(ctx.devices[0], + "{[i]: 0<=i<100}", + [ + "[i:l.0] <float32> z[i] = a[i]" + ], + [ + lp.ArrayArg("a", dtype, shape=(100,)), + ], + local_sizes={0: 16}) + + kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.check_kernels(kernel_gen) + + for gen_knl in kernel_gen: + try: + compiled = lp.CompiledKernel(ctx, gen_knl) + except RuntimeError, e: + assert "implemented and desired" in str(e) + pass # expected! + else: + assert False # expecting an error + + + + if __name__ == "__main__": import sys if len(sys.argv) > 1: