diff --git a/loopy/__init__.py b/loopy/__init__.py index 346edcec10ba2b9713f1fc1ad26a0a1ea6e9778d..55024245f1947c422c9d6c0cd078b6ffc90d827e 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -101,7 +101,10 @@ __all__ = [ ] class auto: - pass + """A generic placeholder object for something that should be automatically + detected. See, for example, the *shape* or *strides* argument of + :class:`GlobalArg`. + """ # }}} diff --git a/loopy/compiled.py b/loopy/compiled.py index e844cf87ac30ffe32c438e9ed5d15a0073b52918..682af394577cd59f953c09879c740381c6a0053c 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -429,6 +429,7 @@ class TestArgInfo(Record): pass def make_ref_args(kernel, queue, parameters, fill_value): + import loopy as lp from loopy.kernel.data import ValueArg, GlobalArg, ImageArg from pymbolic import evaluate @@ -468,7 +469,7 @@ def make_ref_args(kernel, queue, parameters, fill_value): alloc_size = None strides = None else: - assert arg.offset == 0 + assert arg.offset is lp.auto or arg.offset == 0 strides = evaluate(arg.strides, parameters) @@ -530,6 +531,7 @@ def make_ref_args(kernel, queue, parameters, fill_value): def make_args(queue, kernel, arg_descriptors, parameters, fill_value): + import loopy as lp from loopy.kernel.data import ValueArg, GlobalArg, ImageArg from pymbolic import evaluate @@ -562,7 +564,7 @@ def make_args(queue, kernel, arg_descriptors, parameters, queue.context, arg_desc.ref_array.get()) elif isinstance(arg, GlobalArg): - assert arg.offset == 0 + assert arg.offset is lp.auto or arg.offset == 0 shape = evaluate(arg.shape, parameters) strides = evaluate(arg.strides, parameters) diff --git a/loopy/context_matching.py b/loopy/context_matching.py index 76ef818f3cbd3b81c60da6c73f5ee4fe27d63989..0fdf82ba918098f3aec035de82906fafa4e07482 100644 --- a/loopy/context_matching.py +++ b/loopy/context_matching.py @@ -64,12 +64,12 @@ class AlternativeMatch(object): # {{{ single id match parsing def parse_id_match(id_matches): - """Syntax examples: + """Syntax examples:: - my_insn - compute_* - fetch*$first - fetch*$first,store*$first + my_insn + compute_* + fetch*$first + fetch*$first,store*$first Alternatively, a list of *(name_glob, tag_glob)* tuples. """ @@ -166,7 +166,7 @@ def parse_stack_match(smatch): lowest < next < ... < highest - where `lowest` is necessarily the bottom of the stack. `...` matches an + where `lowest` is necessarily the bottom of the stack. ``...`` matches an arbitrary number of intervening stack levels. There is currently no way to match the top of the stack. diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 12c7b196ad5919e918e9f68f138fce29697d8859..ed0faf8eaf03642148138ac47750ab2624ac9a1e 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -366,7 +366,8 @@ class IndexRankFinder(WalkMapper): else: self.index_ranks.append(len(expr.index)) -def guess_kernel_args_if_requested(domains, instructions, temporary_variables, subst_rules, kernel_args): +def guess_kernel_args_if_requested(domains, instructions, temporary_variables, + subst_rules, kernel_args, default_offset): if "..." not in kernel_args: return kernel_args @@ -439,7 +440,7 @@ def guess_kernel_args_if_requested(domains, instructions, temporary_variables, s # It's not a temp var, and thereby not a domain parameter--the only # other writable type of variable is an argument. - kernel_args.append(GlobalArg(arg_name, shape=lp.auto, offset=lp.auto)) + kernel_args.append(GlobalArg(arg_name, shape=lp.auto, offset=default_offset)) continue irank = find_index_rank(arg_name) @@ -447,7 +448,7 @@ def guess_kernel_args_if_requested(domains, instructions, temporary_variables, s # read-only, no indices kernel_args.append(ValueArg(arg_name)) else: - kernel_args.append(GlobalArg(arg_name, shape=lp.auto, offset=lp.auto)) + kernel_args.append(GlobalArg(arg_name, shape=lp.auto, offset=default_offset)) return kernel_args @@ -810,6 +811,9 @@ def make_kernel(device, domains, instructions, kernel_args=["..."], **kwargs): These defines may also be used in the domain and in argument shapes and strides. They are expanded only upon kernel creation. :arg default_order: "C" (default) or "F" + :arg default_offset: 0 or :class:`loopy.auto`. The default value of + *offset* in :attr:`loopy.kernel.data.GlobalArg` for guessed arguments. + Defaults to 0. :arg function_manglers: list of functions of signature (name, arg_dtypes) returning a tuple (result_dtype, c_name) or a tuple (result_dtype, c_name, arg_dtypes), @@ -827,6 +831,7 @@ def make_kernel(device, domains, instructions, kernel_args=["..."], **kwargs): defines = kwargs.pop("defines", {}) default_order = kwargs.pop("default_order", "C") + default_offset = kwargs.pop("default_offset", 0) # {{{ instruction/subst parsing @@ -864,7 +869,8 @@ def make_kernel(device, domains, instructions, kernel_args=["..."], **kwargs): domains = parse_domains(isl_context, domains, defines) kernel_args = guess_kernel_args_if_requested(domains, instructions, - kwargs.get("temporary_variables", {}), substitutions, kernel_args) + kwargs.get("temporary_variables", {}), substitutions, kernel_args, + default_offset) from loopy.kernel import LoopKernel knl = LoopKernel(device, domains, instructions, kernel_args, **kwargs) diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index c174ca70161e2fe06634aaa374207ba751100fa1..9301de3f392b7bc0b00acf69394e0dcc13396bb7 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -195,8 +195,11 @@ class ShapedArg(KernelArgument): * 0 * a string (that is interpreted as an argument name). - * :class:`loopy.auto`, in which case this information is added at run time - by :class:`loopy.CompiledKernel`. + * :class:`loopy.auto`, in which case an offset argument + is added automatically, immediately following this argument. + :class:`loopy.CompiledKernel` is even smarter in its treatment of + this case and will compile custom versions of the kernel based on + whether the passed arrays have offsets or not. """ if dtype is not None: dtype = np.dtype(dtype) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 0a23d0b3d800e29280018ec5f35d77c8f3eb19a4..583dd9116b5100b35978c85ca40328ac268b8aba 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -943,6 +943,26 @@ def adjust_local_temp_var_storage(kernel): # }}} +# {{{ add automatic offset arguments + +def add_auto_offset_args(kernel): + import loopy as lp + + vng = kernel.get_var_name_generator() + + new_args = [] + for arg in kernel.args: + if getattr(arg, "offset", None) is lp.auto: + offset_arg_name = vng(arg.name+"_offset") + new_args.append(arg.copy(offset=offset_arg_name)) + new_args.append(lp.ValueArg(offset_arg_name, kernel.index_dtype)) + else: + new_args.append(arg) + + return kernel.copy(args=new_args) + +# }}} + @@ -973,6 +993,7 @@ def preprocess_kernel(kernel): kernel = add_boostability_and_automatic_dependencies(kernel) kernel = limit_boostability(kernel) kernel = adjust_local_temp_var_storage(kernel) + kernel = add_auto_offset_args(kernel) return kernel diff --git a/test/test_loopy.py b/test/test_loopy.py index 90dac0f97d4bfea6f4b76cdfc3988dce8ca5cac6..bc90c690675aa62314e7fb8a95e5aa2ca9fd401d 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -29,7 +29,9 @@ import numpy as np import loopy as lp import pyopencl as cl import pyopencl.clrandom + import logging +logger = logging.getLogger(__name__) from pyopencl.tools import pytest_generate_tests_for_pyopencl \ as pytest_generate_tests @@ -1179,7 +1181,6 @@ def test_triangle_domain(ctx_factory): def test_array_with_offset(ctx_factory): - dtype = np.float32 ctx = ctx_factory() queue = cl.CommandQueue(ctx) @@ -1191,7 +1192,8 @@ def test_array_with_offset(ctx_factory): """ b[i,j] = 2*a[i,j] """, - assumptions="n>=1 and m>=1") + assumptions="n>=1 and m>=1", + default_offset=lp.auto) cknl = lp.CompiledKernel(ctx, knl)