diff --git a/loopy/auto_test.py b/loopy/auto_test.py index 690e2bec2492eb19e029b5ad1b4f4e9919ceb3ba..c299df4e511603647de13d70614e886dad3257f1 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -324,8 +324,7 @@ def _enumerate_cl_devices_for_ref_test(): # {{{ main automatic testing entrypoint -def _complain_about_deferred_types(knl): - print "COMPL" +def _check_for_deferred_types(knl): for arg in knl.args: if arg.dtype is None: raise RuntimeError("Automatic testing requires that all " @@ -384,7 +383,7 @@ def auto_test_vs_ref( # {{{ compile and run reference code - _complain_about_deferred_types(ref_knl) + _check_for_deferred_types(ref_knl) found_ref_device = False @@ -486,7 +485,7 @@ def auto_test_vs_ref( test_kernels = [test_knl] for i, kernel in enumerate(test_kernels): - _complain_about_deferred_types(kernel) + _check_for_deferred_types(kernel) compiled = CompiledKernel(ctx, kernel, options=options, codegen_kwargs=codegen_kwargs) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 3541e8f222c292bd1f90e6b2e0bbcafc9b787f4b..5ebce957c00099d8dfa524c16b09ec559275a363 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -272,8 +272,22 @@ class ImplementedDataInfo(Record): # {{{ main code generation entrypoint +def _check_for_deferred_types(knl): + for arg in knl.args: + if arg.dtype is None: + raise RuntimeError("Code generation requires that all " + "argument types are known. Argument '%s' has " + "an unknown/deferred type." % arg.name) + + def generate_code(kernel, with_annotation=False, allow_complex=None): + if kernel.schedule is None: + from loopy.schedule import get_one_scheduled_kernel + kernel = get_one_scheduled_kernel(kernel) + + _check_for_deferred_types(kernel) + from cgen import (FunctionBody, FunctionDeclaration, Value, Module, Block, Line, Const, LiteralLines, Initializer) diff --git a/loopy/compiled.py b/loopy/compiled.py index c81f8dcdd900eb62d18575ce8e40b05808b67276..d799186f5811b1e0d8c7ae3120ea3358fbe018f3 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -694,25 +694,6 @@ def generate_invoker(kernel, impl_arg_info, flags): # {{{ compiled kernel object -def _get_kernel_from_iterable(iterable): - kernel_count = 0 - - for scheduled_kernel in iterable: - kernel_count += 1 - - if kernel_count == 1: - # use the first schedule - result = scheduled_kernel - - if kernel_count == 2: - from warnings import warn - warn("kernel scheduling was ambiguous--more than one " - "schedule found, ignoring", stacklevel=2) - break - - return result - - class _CLKernelInfo(Record): pass @@ -748,7 +729,6 @@ class CompiledKernel: def get_kernel(self, arg_to_dtype_set): kernel = self.kernel - import loopy as lp from loopy.kernel.tools import add_argument_dtypes if arg_to_dtype_set: @@ -762,8 +742,8 @@ class CompiledKernel: kernel = infer_unknown_types(kernel, expect_completion=True) if kernel.schedule is None: - kernel = _get_kernel_from_iterable( - lp.generate_loop_schedules(kernel)) + from loopy.schedule import get_one_scheduled_kernel + kernel = get_one_scheduled_kernel(kernel) return kernel diff --git a/loopy/schedule.py b/loopy/schedule.py index 4d5c0314ae6e53922391e35c2d2c9604b10119ed..52d001ed46f94c30f24a7711471cd5034c591672 100644 --- a/loopy/schedule.py +++ b/loopy/schedule.py @@ -879,4 +879,24 @@ def generate_loop_schedules(kernel, debug_args={}): # }}} + +def get_one_scheduled_kernel(kernel): + kernel_count = 0 + + for scheduled_kernel in generate_loop_schedules(kernel): + kernel_count += 1 + + if kernel_count == 1: + # use the first schedule + result = scheduled_kernel + + if kernel_count == 2: + from warnings import warn + warn("kernel scheduling was ambiguous--more than one " + "schedule found, ignoring", stacklevel=2) + break + + return result + + # vim: foldmethod=marker diff --git a/test/test_loopy.py b/test/test_loopy.py index 57e7476877d1ed85b543ec4a8a020b32426b1d50..272b0130b53edc600c599e566856308fe108ba1e 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1110,6 +1110,28 @@ def test_offsets_and_slicing(ctx_factory): assert la.norm(b_full.get() - b_full_h) < 1e-13 +def test_vector_ilp_with_prefetch(ctx_factory): + ctx = ctx_factory() + + knl = lp.make_kernel(ctx.devices[0], + "{ [i]: 0<=i 1: