diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 29fab76e7af1d0a0dfa548a056a36273cf553b38..8e7f46722007d47e05e1d59f460a7af8d047398d 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -53,6 +53,13 @@ And some data on the host: .. }}} +We'll also disable console syntax highlighting because it confuses +doctest:: + + >>> # not a documented interface + >>> import loopy.options + >>> loopy.options.ALLOW_TERMINAL_COLORS = False + Getting started --------------- @@ -797,17 +804,19 @@ enabling some cost savings: a[4 * i_outer + 3] = 0.0f; } /* final slab for 'i_outer' */ - int const i_outer = -1 + n + -1 * (3 * n / 4); - - if (-1 + n >= 0) { - a[4 * i_outer] = 0.0f; - if (-2 + -4 * i_outer + n >= 0) - a[4 * i_outer + 1] = 0.0f; - if (-3 + -4 * i_outer + n >= 0) - a[4 * i_outer + 2] = 0.0f; - if (4 + 4 * i_outer + -1 * n == 0) - a[4 * i_outer + 3] = 0.0f; + int const i_outer = -1 + n + -1 * (3 * n / 4); + + if (-1 + n >= 0) + { + a[4 * i_outer] = 0.0f; + if (-2 + -4 * i_outer + n >= 0) + a[4 * i_outer + 1] = 0.0f; + if (-3 + -4 * i_outer + n >= 0) + a[4 * i_outer + 2] = 0.0f; + if (4 + 4 * i_outer + -1 * n == 0) + a[4 * i_outer + 3] = 0.0f; + } } ... @@ -1525,16 +1534,18 @@ Now to make things more interesting, we'll create a kernel with barriers: { __local int c[50 * 10 * 99]; - int const k_outer = 0; + { + int const k_outer = 0; - for (int j = 0; j <= 9; ++j) - for (int i = 0; i <= 49; ++i) - { - barrier(CLK_LOCAL_MEM_FENCE) /* for c (insn rev-depends on insn_0) */; - c[990 * i + 99 * j + lid(0) + 1] = 2 * a[980 * i + 98 * j + lid(0) + 1]; - barrier(CLK_LOCAL_MEM_FENCE) /* for c (insn_0 depends on insn) */; - e[980 * i + 98 * j + lid(0) + 1] = c[990 * i + 99 * j + 1 + lid(0) + 1] + c[990 * i + 99 * j + -1 + lid(0) + 1]; - } + for (int j = 0; j <= 9; ++j) + for (int i = 0; i <= 49; ++i) + { + barrier(CLK_LOCAL_MEM_FENCE) /* for c (insn rev-depends on insn_0) */; + c[990 * i + 99 * j + lid(0) + 1] = 2 * a[980 * i + 98 * j + lid(0) + 1]; + barrier(CLK_LOCAL_MEM_FENCE) /* for c (insn_0 depends on insn) */; + e[980 * i + 98 * j + lid(0) + 1] = c[990 * i + 99 * j + 1 + lid(0) + 1] + c[990 * i + 99 * j + -1 + lid(0) + 1]; + } + } } diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 3d3095d535e67161ec833549cb4b1aa0dedd1eef..8ac963835ec12702f2010806d1d49062422318a2 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -439,7 +439,7 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index): if (ubound - lbound).plain_is_equal(zero): # single-trip, generate just a variable assignment, not a loop - result.append(merge_codegen_results(codegen_state, [ + inner = merge_codegen_results(codegen_state, [ astb.emit_initializer( codegen_state, kernel.index_dtype, loop_iname, @@ -447,7 +447,12 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index): is_const=True), astb.emit_blank_line(), inner, - ])) + ]) + result.append( + inner.with_new_ast( + codegen_state, + astb.ast_block_scope_class( + inner.current_ast(codegen_state)))) else: inner_ast = inner.current_ast(codegen_state) diff --git a/loopy/codegen/result.py b/loopy/codegen/result.py index 04fab05afdc38a8843a566e0e6e6b10098d6415c..c683d120dbf6a1205618f8835e8f9c72dd13adf7 100644 --- a/loopy/codegen/result.py +++ b/loopy/codegen/result.py @@ -207,6 +207,7 @@ def merge_codegen_results(codegen_state, elements, collapse=True): codegen_result = None block_cls = codegen_state.ast_builder.ast_block_class + block_scope_cls = codegen_state.ast_builder.ast_block_scope_class for el in elements: if isinstance(el, CodeGenerationResult): @@ -227,7 +228,8 @@ def merge_codegen_results(codegen_state, elements, collapse=True): dev_program_names.add(dp.name) cur_ast = el.current_ast(codegen_state) - if isinstance(cur_ast, block_cls): + if (isinstance(cur_ast, block_cls) + and not isinstance(cur_ast, block_scope_cls)): ast_els.extend(cur_ast.contents) else: ast_els.append(cur_ast) diff --git a/loopy/options.py b/loopy/options.py index a19afdaca8a4d14bcdf21397d1ea2d7fdd5a1a82..c88c512cb332ceec4587fd3c5011b9f729cad7d5 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -28,6 +28,9 @@ from pytools import Record import re +ALLOW_TERMINAL_COLORS = False + + class _ColoramaStub(object): def __getattribute__(self, name): return "" @@ -38,10 +41,18 @@ def _apply_legacy_map(lmap, kwargs): for name, val in six.iteritems(kwargs): try: - new_name, translator = lmap[name] + lmap_value = lmap[name] except KeyError: new_name = name else: + if lmap_value is None: + # ignore this + from warnings import warn + warn("option '%s' is deprecated and was ignored" % name, + DeprecationWarning) + continue + + new_name, translator = lmap_value if name in result: raise TypeError("may not pass a value for both '%s' and '%s'" % (name, new_name)) @@ -113,19 +124,11 @@ class Options(Record): Accepts a file name as a value. Writes to ``sys.stdout`` if none is given. - .. attribute:: disable_wrapper_highlight - - Use syntax highlighting in :attr:`write_wrapper`. - .. attribute:: write_code Print the generated code. Accepts a file name or a boolean as a value. Writes to ``sys.stdout`` if set to *True*. - .. attribute:: disable_code_highlight - - Use syntax highlighting in :attr:`write_code`. - .. attribute:: edit_code Invoke an editor (given by the environment variable @@ -150,8 +153,10 @@ class Options(Record): _legacy_options_map = { "cl_build_options": ("build_options", None), "write_cl": ("write_code", None), - "highlight_cl": ("disable_code_highlight", lambda val: not val), - "highlight_wrapper": ("disable_wrapper_highlight", lambda val: not val), + "highlight_cl": None, + "highlight_wrapper": None, + "disable_wrapper_highlight": None, + "disable_code_highlight": None, "edit_cl": ("edit_code", None), } @@ -173,6 +178,9 @@ class Options(Record): else: allow_terminal_colors_def = True + allow_terminal_colors_def = ( + ALLOW_TERMINAL_COLORS and allow_terminal_colors_def) + Record.__init__( self, @@ -185,9 +193,7 @@ class Options(Record): no_numpy=kwargs.get("no_numpy", False), return_dict=kwargs.get("return_dict", False), write_wrapper=kwargs.get("write_wrapper", False), - highlight_wrapper=kwargs.get("highlight_wrapper", False), write_code=kwargs.get("write_code", False), - disable_code_highlight=kwargs.get("disable_code_highlight", False), edit_code=kwargs.get("edit_code", False), build_options=kwargs.get("build_options", []), allow_terminal_colors=kwargs.get("allow_terminal_colors", @@ -208,7 +214,11 @@ class Options(Record): @property def highlight_cl(self): - return not self.disable_code_highlight + return self.allow_terminal_colors + + @property + def highlight_wrapper(self): + return self.allow_terminal_colors @property def write_cl(self): diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 409b9badb639c500e70404e781036b2e39bf333f..5d5743bae322fc59c989cafd85122c8ca619c422 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -216,6 +216,9 @@ class ASTBuilderBase(object): def emit_initializer(self, codegen_state, dtype, name, val_str, is_const): raise NotImplementedError() + def emit_declaration_scope(self, codegen_state, inner): + raise NotImplementedError() + def emit_blank_line(self): raise NotImplementedError() @@ -267,6 +270,10 @@ class DummyHostASTBuilder(ASTBuilderBase): def ast_block_class(self): return _DummyASTBlock + @property + def ast_block_scope_class(self): + return _DummyASTBlock + def emit_assignment(self, codegen_state, insn): return None diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 8b81efb312d996a2cd972a416ddf7d190e580d13..be83ec90c4720f10876e1a5e47a43c429fc40aeb 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -29,7 +29,7 @@ import six import numpy as np # noqa from loopy.target import TargetBase, ASTBuilderBase, DummyHostASTBuilder from loopy.diagnostic import LoopyError -from cgen import Pointer, NestedDeclarator +from cgen import Pointer, NestedDeclarator, Block from cgen.mapper import IdentityMapper as CASTIdentityMapperBase from pymbolic.mapper.stringifier import PREC_NONE from loopy.symbolic import IdentityMapper @@ -133,6 +133,12 @@ class POD(Declarator): mapper_method = "map_loopy_pod" +class ScopingBlock(Block): + """A block that is mandatory for scoping and may not be simplified away + by :func:`loopy.codegen.results.merge_codegen_results`. + """ + + class FunctionDeclarationWrapper(NestedDeclarator): mapper_method = "map_function_decl_wrapper" @@ -513,6 +519,10 @@ class CASTBuilder(ASTBuilderBase): from cgen import Block return Block + @property + def ast_block_scope_class(self): + return ScopingBlock + # }}} # {{{ code generation guts diff --git a/loopy/target/python.py b/loopy/target/python.py index a348cba83008794ee3d02a61ff90f6d81d1a9322..09a86665b7d949d7bf35b910cd2a6fd66109c1ec 100644 --- a/loopy/target/python.py +++ b/loopy/target/python.py @@ -33,6 +33,7 @@ from loopy.type_inference import TypeInferenceMapper from loopy.kernel.data import ValueArg from loopy.diagnostic import LoopyError # noqa from loopy.target import ASTBuilderBase +from genpy import Suite # {{{ expression to code @@ -145,6 +146,17 @@ class ExpressionToPythonMapper(StringifyMapper): # }}} +# {{{ genpy extensions + +class Collection(Suite): + def generate(self): + for item in self.contents: + for item_line in item.generate(): + yield item_line + +# }}} + + # {{{ ast builder def _numpy_single_arg_function_mangler(kernel, name, arg_dtypes): @@ -232,9 +244,15 @@ class PythonASTBuilderBase(ASTBuilderBase): @property def ast_block_class(self): - from genpy import Suite return Suite + @property + def ast_block_scope_class(self): + # Once a new version of genpy is released, switch to this: + # from genpy import Collection + # and delete the implementation above. + return Collection + def emit_sequential_loop(self, codegen_state, iname, iname_dtype, lbound, ubound, inner): ecm = codegen_state.expression_to_code_mapper