diff --git a/doc/reference.rst b/doc/reference.rst index 80b9f6659501915937c4e39b356ab43e79920915..865ee8fbe110bdf2670eaae7b478df6e8e87a431 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -317,8 +317,6 @@ Finishing up Running ------- -.. autoclass:: InvocationFlags - .. autoclass:: CompiledKernel Automatic Testing @@ -344,4 +342,9 @@ following always works:: .. autofunction:: get_dot_dependency_graph +Flags +----- + +.. autoclass:: LoopyFlags + .. vim: tw=75 diff --git a/loopy/__init__.py b/loopy/__init__.py index fcc19be9cb5cf69f2e89eda49d65d95ccb607688..989d1a40645b336954ffaa6f13418e4f5d1b5c7e 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -70,7 +70,8 @@ from loopy.preprocess import (preprocess_kernel, realize_reduction, infer_unknown_types) from loopy.schedule import generate_loop_schedules from loopy.codegen import generate_code -from loopy.compiled import InvocationFlags, CompiledKernel +from loopy.compiled import CompiledKernel +from loopy.flags import LoopyFlags from loopy.auto_test import auto_test_vs_ref __all__ = [ @@ -96,10 +97,12 @@ __all__ = [ "generate_loop_schedules", "generate_code", - "InvocationFlags", "CompiledKernel", + "CompiledKernel", "auto_test_vs_ref", + "LoopyFlags", + "make_kernel", "split_iname", "join_inames", "tag_inames", "duplicate_inames", "split_dimension", "join_dimensions", "tag_dimensions", diff --git a/loopy/auto_test.py b/loopy/auto_test.py index 2a4a074e626ffbd6c05fcc2d4e6aa1c32c4d2879..04de9139cc15f29b52495edaee17b72d15668fb5 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -331,7 +331,7 @@ def _enumerate_cl_devices_for_ref_test(): def auto_test_vs_ref( ref_knl, ctx, test_knl, op_count=[], op_label=[], parameters={}, print_ref_code=False, print_code=True, warmup_rounds=2, - iflags=None, dump_binary=False, codegen_kwargs={}, + iflags=None, dump_binary=False, options=[], fills_entire_output=True, do_check=True, check_result=None ): @@ -396,8 +396,7 @@ def auto_test_vs_ref( ref_sched_kernel = knl break - ref_compiled = CompiledKernel(ref_ctx, ref_sched_kernel, - options=options, codegen_kwargs=codegen_kwargs) + ref_compiled = CompiledKernel(ref_ctx, ref_sched_kernel, options=options) if print_ref_code: print 75*"-" print "Reference Code:" @@ -486,7 +485,7 @@ def auto_test_vs_ref( kernel = infer_unknown_types(kernel, expect_completion=True) compiled = CompiledKernel(ctx, kernel, options=options, - codegen_kwargs=codegen_kwargs, iflags=iflags) + iflags=iflags) if args is None: cl_kernel_info = compiled.cl_kernel_info(frozenset()) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index ba6eb129b8531435472628e43340daeaff9d2326..0682b3dfff2a7d2b90aa4e67af2484e5b74ac0c7 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -273,8 +273,7 @@ class ImplementedDataInfo(Record): # {{{ main code generation entrypoint -def generate_code(kernel, with_annotation=False, - allow_complex=None): +def generate_code(kernel): if kernel.schedule is None: from loopy.schedule import get_one_scheduled_kernel kernel = get_one_scheduled_kernel(kernel) @@ -301,7 +300,6 @@ def generate_code(kernel, with_annotation=False, from loopy.codegen.expression import LoopyCCodeMapper ccm = (LoopyCCodeMapper(kernel, seen_dtypes, seen_functions, - with_annotation=with_annotation, allow_complex=allow_complex)) mod = [] diff --git a/loopy/codegen/expression.py b/loopy/codegen/expression.py index caf354f6f2d8970873a4ed8a75fd06d90d18f716..0cb3341aa163b27b8a2d56d94290e8de3614e629 100644 --- a/loopy/codegen/expression.py +++ b/loopy/codegen/expression.py @@ -251,7 +251,7 @@ def get_opencl_vec_member(idx): class LoopyCCodeMapper(RecursiveMapper): def __init__(self, kernel, seen_dtypes, seen_functions, var_subst_map={}, - with_annotation=False, allow_complex=False): + allow_complex=False): """ :arg seen_dtypes: set of dtypes that were encountered :arg seen_functions: set of tuples (name, c_name, arg_types) indicating @@ -265,7 +265,6 @@ class LoopyCCodeMapper(RecursiveMapper): self.type_inf_mapper = TypeInferenceMapper(kernel) self.allow_complex = allow_complex - self.with_annotation = with_annotation self.var_subst_map = var_subst_map.copy() # {{{ copy helpers @@ -275,7 +274,6 @@ class LoopyCCodeMapper(RecursiveMapper): var_subst_map = self.var_subst_map return LoopyCCodeMapper(self.kernel, self.seen_dtypes, self.seen_functions, var_subst_map=var_subst_map, - with_annotation=self.with_annotation, allow_complex=self.allow_complex) def copy_and_assign(self, name, value): @@ -337,7 +335,7 @@ class LoopyCCodeMapper(RecursiveMapper): def map_variable(self, expr, enclosing_prec, type_context): if expr.name in self.var_subst_map: - if self.with_annotation: + if self.kernel.flags.annotate_inames: return " /* %s */ %s" % ( expr.name, self.rec(self.var_subst_map[expr.name], diff --git a/loopy/compiled.py b/loopy/compiled.py index e64bc0f5a710add48c603988aef805df384e8426..02db5b9ac20e0e2cf656a4a7947059a1df6ed96d 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -219,7 +219,7 @@ def generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, flags) base_arg = kernel.impl_arg_to_arg[impl_array_name] - if not flags.skip_checks: + if not flags.skip_arg_checks: gen("%s, _lpy_remdr = divmod(_lpy_offset, %d)" % (arg.name, base_arg.dtype.itemsize)) @@ -231,7 +231,7 @@ def generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, flags) gen("%s = _lpy_offset // %d)" % (arg.name, base_arg.dtype.itemsize)) - if not flags.skip_checks: + if not flags.skip_arg_checks: gen("del _lpy_offset") gen("# }}}") @@ -252,7 +252,7 @@ def generate_integer_arg_finding_from_strides(gen, kernel, impl_arg_info, flags) gen("if %s is None:" % arg.name) with Indentation(gen): - if not flags.skip_checks: + if not flags.skip_arg_checks: gen("if %s is None:" % impl_array_name) with Indentation(gen): gen("raise RuntimeError(\"required stride '%s' for " @@ -262,7 +262,7 @@ def generate_integer_arg_finding_from_strides(gen, kernel, impl_arg_info, flags) base_arg = kernel.impl_arg_to_arg[impl_array_name] - if not flags.skip_checks: + if not flags.skip_arg_checks: gen("%s, _lpy_remdr = divmod(%s.strides[%d], %d)" % (arg.name, impl_array_name, stride_impl_axis, base_arg.dtype.itemsize)) @@ -298,7 +298,7 @@ def generate_value_arg_setup(gen, kernel, impl_arg_info, flags): gen("# {{{ process %s" % arg.name) gen("") - if not flags.skip_checks: + if not flags.skip_arg_checks: gen("if %s is None:" % arg.name) with Indentation(gen): gen("raise RuntimeError(\"input argument '%s' must " @@ -366,21 +366,21 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags): gen("") - if not flags.skip_checks and not is_written: + if not flags.skip_arg_checks and not is_written: gen("if %s is None:" % arg.name) with Indentation(gen): gen("raise RuntimeError(\"input argument '%s' must " "be supplied\")" % arg.name) gen("") - if is_written and arg.arg_class is lp.ImageArg and not flags.skip_checks: + if is_written and arg.arg_class is lp.ImageArg and not flags.skip_arg_checks: gen("if %s is None:" % arg.name) with Indentation(gen): gen("raise RuntimeError(\"written image '%s' must " "be supplied\")" % arg.name) gen("") - if is_written and arg.shape is None and not flags.skip_checks: + if is_written and arg.shape is None and not flags.skip_arg_checks: gen("if %s is None:" % arg.name) with Indentation(gen): gen("raise RuntimeError(\"written argument '%s' has " @@ -409,7 +409,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags): gen("_lpy_strides_%d = %s" % (i, strify( itemsize*arg.unvec_strides[i]))) - if not flags.skip_checks: + if not flags.skip_arg_checks: for i in xrange(num_axes): gen("assert _lpy_strides_%d > 0, " "\"'%s' has negative stride in axis %d\"" @@ -436,7 +436,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags): strides=strify(sym_strides), dtype=python_dtype_str(arg.dtype))) - if not flags.skip_checks: + if not flags.skip_arg_checks: for i in xrange(num_axes): gen("del _lpy_shape_%d" % i) gen("del _lpy_strides_%d" % i) @@ -451,7 +451,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags): # {{{ argument checking if arg.arg_class in [lp.GlobalArg, lp.ConstantArg] \ - and not flags.skip_checks: + and not flags.skip_arg_checks: if possibly_made_by_loopy: gen("if not _lpy_made_by_loopy:") else: @@ -497,7 +497,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags): # }}} - if possibly_made_by_loopy and not flags.skip_checks: + if possibly_made_by_loopy and not flags.skip_arg_checks: gen("del _lpy_made_by_loopy") gen("") @@ -516,36 +516,6 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags): # }}} -class InvocationFlags(Record): - """ - .. attribute:: skip_checks - .. attribute:: no_numpy - .. attribute:: return_dict - .. attribute:: print_wrapper - .. attribute:: print_hl_wrapper - .. attribute:: print_cl - .. attribute:: print_hl_cl - .. attribute:: edit_cl - """ - - def __init__( - # All of these should default to False for the string-based - # interface to make sense. - - self, skip_checks=False, no_numpy=False, return_dict=False, - print_wrapper=False, print_hl_wrapper=False, - print_cl=False, print_hl_cl=False, - edit_cl=False - ): - Record.__init__( - self, skip_checks=skip_checks, no_numpy=no_numpy, - return_dict=return_dict, - print_wrapper=print_wrapper, print_hl_wrapper=print_hl_wrapper, - print_cl=print_cl, print_hl_cl=print_hl_cl, - edit_cl=edit_cl, - ) - - def generate_invoker(kernel, impl_arg_info, flags): system_args = [ "cl_kernel", "queue", "allocator=None", "wait_for=None", @@ -653,14 +623,14 @@ class _CLKernelInfo(Record): class CompiledKernel: def __init__(self, context, kernel, options=[], codegen_kwargs={}, - iflags=None): + flags=None, iflags=None): """ :arg kernel: may be a loopy.LoopKernel, a generator returning kernels (a warning will be issued if more than one is returned). If the kernel has not yet been loop-scheduled, that is done, too, with no specific arguments. - :arg iflags: An :class:`InvocationFlags` instance, or a dictionary - of arguments with which a :class:`InvocationFlags` instance + :arg iflags: An :class:`loopy.Flags` instance, or a dictionary + of arguments with which a :class:`loopy.Flags` instance can be initialized. """ @@ -669,17 +639,21 @@ class CompiledKernel: self.codegen_kwargs = codegen_kwargs self.options = list(options) - if iflags is None: - iflags = InvocationFlags() - elif isinstance(iflags, str): - iflags_args = {} - for name in iflags.split(","): - iflags_args[name] = True - iflags = InvocationFlags(**iflags_args) - elif not isinstance(iflags, InvocationFlags): - iflags = InvocationFlags(**iflags) + if flags is not None and iflags is not None: + raise TypeError("cannot specify flags and iflags at the same time") + + if iflags is not None: + from warnings import warn + warn("The 'iflags' argument is deprecated", DeprecationWarning, + stacklevel=2) + + flags = iflags + + from loopy.flags import make_flags + my_flags = kernel.flags.copy() + my_flags.update(make_flags(flags)) - self.iflags = iflags + self.flags = my_flags self.packing_controller = SeparateArrayPackingController(kernel) @@ -715,11 +689,11 @@ class CompiledKernel: from loopy.codegen import generate_code code, impl_arg_info = generate_code(kernel, **self.codegen_kwargs) - if self.iflags.print_cl: + if self.flags.print_cl: print code - if self.iflags.print_hl_cl: + if self.flags.print_hl_cl: print get_highlighted_cl_code(code) - if self.iflags.edit_cl: + if self.flags.edit_cl: from pytools import invoke_editor code = invoke_editor(code, "code.cl") @@ -733,7 +707,7 @@ class CompiledKernel: cl_kernel=cl_kernel, impl_arg_info=impl_arg_info, invoker=generate_invoker( - kernel, impl_arg_info, self.iflags)) + kernel, impl_arg_info, self.flags)) # {{{ debugging aids diff --git a/loopy/flags.py b/loopy/flags.py new file mode 100644 index 0000000000000000000000000000000000000000..6717cedff3d248db12c14182dd4805a2a4d18bc6 --- /dev/null +++ b/loopy/flags.py @@ -0,0 +1,91 @@ +from __future__ import division, with_statement + +__copyright__ = "Copyright (C) 2013 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +from pytools import Record + + +class LoopyFlags(Record): + """ + .. rubric:: Code-generation flags + + .. attribute:: annotate_inames + .. attribute:: trace_assignments + + .. rubric:: Invocation-related flags + + .. attribute:: skip_arg_checks + .. attribute:: no_numpy + .. attribute:: return_dict + .. attribute:: print_wrapper + .. attribute:: print_hl_wrapper + .. attribute:: print_cl + .. attribute:: print_hl_cl + .. attribute:: edit_cl + """ + + def __init__( + # All of these should default to False for the string-based + # interface of make_flags (below) to make sense. + + self, + + annotate_inames=False, + trace_assignments=False, + + skip_arg_checks=False, no_numpy=False, return_dict=False, + print_wrapper=False, print_hl_wrapper=False, + print_cl=False, print_hl_cl=False, + edit_cl=False + ): + Record.__init__( + self, + + annotate_inames=annotate_inames, + trace_assignments=trace_assignments, + + skip_arg_checks=skip_arg_checks, no_numpy=no_numpy, + return_dict=return_dict, + print_wrapper=print_wrapper, print_hl_wrapper=print_hl_wrapper, + print_cl=print_cl, print_hl_cl=print_hl_cl, + edit_cl=edit_cl, + ) + + def update(self, other): + for f in self.__class__.fields: + setattr(self, f, + getattr(self, f, False) + or getattr(other, f, False)) + + +def make_flags(flags_arg): + if flags_arg is None: + return LoopyFlags() + elif isinstance(flags_arg, str): + iflags_args = {} + for name in flags_arg.split(","): + iflags_args[name] = True + return LoopyFlags(**iflags_args) + elif not isinstance(flags_arg, LoopyFlags): + return LoopyFlags(**flags_arg) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 2ab26ee51685afd4182e8f81da7417dcd3d63a2f..b5ef85468737d6276c7db668d9f665d9a4cf8bf8 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -134,6 +134,9 @@ class LoopKernel(Record): .. attribute:: cache_manager .. attribute:: isl_context + .. attribute:: flags + + An instance of :class:`loopy.LoopyFlags` """ # {{{ constructor @@ -162,6 +165,7 @@ class LoopKernel(Record): cache_manager=None, index_dtype=np.int32, isl_context=None, + flags=None, # When kernels get intersected in slab decomposition, # their grid sizes shouldn't change. This provides @@ -264,7 +268,8 @@ class LoopKernel(Record): function_manglers=function_manglers, symbol_manglers=symbol_manglers, index_dtype=index_dtype, - isl_context=isl_context) + isl_context=isl_context, + flags=flags) # }}} diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index eb17d0646b0530701825b689fe078e9fc88d8943..2d979321f187d4f2ed3940d1f9bd85d51aaf2a49 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -914,12 +914,18 @@ def make_kernel(device, domains, instructions, kernel_data=["..."], **kwargs): length 16. :arg silenced_warnings: a list (or semicolon-separated string) or warnings to silence + :arg flags: an instance of :class:`loopy.LoopyFlags` or an equivalent + string representation """ defines = kwargs.pop("defines", {}) default_order = kwargs.pop("default_order", "C") default_offset = kwargs.pop("default_offset", 0) silenced_warnings = kwargs.pop("silenced_warnings", []) + flags = kwargs.pop("flags", None) + + from loopy.flags import make_flags + flags = make_flags(flags) if isinstance(silenced_warnings, str): silenced_warnings = silenced_warnings.split(";") @@ -987,6 +993,7 @@ def make_kernel(device, domains, instructions, kernel_data=["..."], **kwargs): knl = LoopKernel(device, domains, instructions, kernel_args, temporary_variables=temporary_variables, silenced_warnings=silenced_warnings, + flags=flags, **kwargs) check_for_nonexistent_iname_deps(knl)