From 04414c27b7f61cee7e626de1e5ed9450cdce42c2 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Fri, 4 May 2018 15:10:31 +0100 Subject: [PATCH 1/4] rebase to master --- loopy/__init__.py | 3 +- loopy/codegen/loop.py | 4 +- loopy/kernel/data.py | 2 + loopy/symbolic.py | 7 +- loopy/target/c/codegen/expression.py | 1 - loopy/target/openmp.py | 111 +++++++++++++++++++++++++++ test/test_target.py | 21 +++++ 7 files changed, 142 insertions(+), 7 deletions(-) create mode 100644 loopy/target/openmp.py diff --git a/loopy/__init__.py b/loopy/__init__.py index 92b7fca77..a6f41cf28 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -142,6 +142,7 @@ from loopy.target import TargetBase, ASTBuilderBase from loopy.target.c import CTarget, ExecutableCTarget, generate_header from loopy.target.cuda import CudaTarget from loopy.target.opencl import OpenCLTarget +from loopy.target.openmp import OpenMPTarget from loopy.target.pyopencl import PyOpenCLTarget from loopy.target.ispc import ISPCTarget from loopy.target.numba import NumbaTarget, NumbaCudaTarget @@ -262,7 +263,7 @@ __all__ = [ "TargetBase", "CTarget", "ExecutableCTarget", "generate_header", - "CudaTarget", "OpenCLTarget", + "CudaTarget", "OpenCLTarget", "OpenMPTarget", "PyOpenCLTarget", "ISPCTarget", "NumbaTarget", "NumbaCudaTarget", "ASTBuilderBase", diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index ebddf3153..172afa189 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -258,9 +258,9 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, tag, = kernel.iname_tags_of_type(iname, UniqueTag, max_num=1, min_num=1) if isinstance(tag, GroupIndexTag): - hw_axis_expr = GroupHardwareAxisIndex(tag.axis) + hw_axis_expr = GroupHardwareAxisIndex(iname, tag.axis) elif isinstance(tag, LocalIndexTag): - hw_axis_expr = LocalHardwareAxisIndex(tag.axis) + hw_axis_expr = LocalHardwareAxisIndex(iname, tag.axis) else: raise RuntimeError("unexpected hw tag type") diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 9b746bb99..87e6f9a8f 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -217,6 +217,8 @@ def parse_tag(tag): if tag == "for": return None + if tag == "forceseq": + return ForceSequentialTag() elif tag == "ord": return InOrderSequentialSequentialTag() elif tag in ["unr"]: diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 8927cd6fb..e844a48da 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -366,16 +366,17 @@ class ArrayLiteral(p.Leaf): class HardwareAxisIndex(p.Leaf): - def __init__(self, axis): + def __init__(self, iname, axis): + self.iname = iname self.axis = axis def stringifier(self): return StringifyMapper def __getinitargs__(self): - return (self.axis,) + return (self.iname, self.axis) - init_arg_names = ("axis",) + init_arg_names = ("iname", "axis") class GroupHardwareAxisIndex(HardwareAxisIndex): diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 59ed77f9c..4fcdd9588 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -791,7 +791,6 @@ class CExpressionToCodeMapper(RecursiveMapper): def map_comparison(self, expr, enclosing_prec): from pymbolic.mapper.stringifier import PREC_COMPARISON - return self.parenthesize_if_needed( "%s %s %s" % ( self.rec(expr.left, PREC_COMPARISON), diff --git a/loopy/target/openmp.py b/loopy/target/openmp.py new file mode 100644 index 000000000..180b01cc1 --- /dev/null +++ b/loopy/target/openmp.py @@ -0,0 +1,111 @@ +"""OpenMP target.""" + +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2015 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from loopy.diagnostic import LoopyError +from loopy.target.c import CTarget, CASTBuilder +from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper +from pymbolic import var + + +# {{{ expression mapper + +class ExprToOpenMPCExprMapper(ExpressionToCExpressionMapper): + + def map_group_hw_index(self, expr, type_context): + if expr.axis != 0: + raise LoopyError("OpenMP target only supports group axis=0") + return var(expr.iname) + + def map_local_hw_index(self, expr, type_context): + if expr.axis != 0: + raise LoopyError("OpenMP target only supports local axis=0") + return var(expr.iname) + +# }}} + + +# {{{ target + +class OpenMPTarget(CTarget): + """A target for the OpenMP programming model. + """ + + def __init__(self): + super(OpenMPTarget, self).__init__() + + def get_device_ast_builder(self): + return OpenMPCASTBuilder(self) + +# }}} + + +# {{{ ast builder + +class OpenMPCASTBuilder(CASTBuilder): + + # {{{ code generation guts + + def get_expression_to_c_expression_mapper(self, codegen_state): + return ExprToOpenMPCExprMapper(codegen_state) + + def emit_sequential_loop(self, codegen_state, iname, iname_dtype, + lbound, ubound, inner): + """mark loops with "l.0" tag with #pragma omp simd + """ + + from loopy.kernel.data import (filter_iname_tags_by_type, LocalIndexTag, + GroupIndexTag) + + loop = super(OpenMPCASTBuilder, self).emit_sequential_loop( + codegen_state, iname, iname_dtype, lbound, ubound, inner) + + tags = filter_iname_tags_by_type( + codegen_state.kernel.iname_to_tags[iname], + (LocalIndexTag, GroupIndexTag), 1) + if tags: + tag, = tags + from cgen import Block, Pragma + if isinstance(tag, LocalIndexTag) and tag.axis == 0: + loop = Block([Pragma("omp simd"), loop]) + elif isinstance(tag, GroupIndexTag) and tag.axis == 0: + loop = Block([Pragma("omp parallel for"), loop]) + + return loop + + def emit_barrier(self, synchronization_kind, mem_kind, comment): + if synchronization_kind == "local": + return + elif synchronization_kind == "global": + from cgen import Pragma + return Pragma("omp barrier") + else: + raise LoopyError("unknown barrier kind") + + # }}} + +# }}} + +# vim: foldmethod=marker diff --git a/test/test_target.py b/test/test_target.py index 7c0d003ee..c37b06d67 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -103,6 +103,27 @@ def test_cuda_target(): lp.preprocess_kernel(knl)))[0]) +def test_openmp_target(): + from loopy.target.openmp import OpenMPTarget + + knl = lp.make_kernel( + "{ [i]: 0 <= i < 1024 }", + "out[i] = 2*a[i]", + [ + lp.GlobalArg("out,a", np.float32, shape=lp.auto), + "..." + ], + target=OpenMPTarget()) + + knl = lp.split_iname(knl, "i", 4, inner_tag="ilp.seq") + knl = lp.tag_inames(knl, {"i_inner": "l.0", "i_outer": "g.0"}) + + print( + lp.generate_code( + lp.get_one_scheduled_kernel( + lp.preprocess_kernel(knl)))[0]) + + def test_generate_c_snippet(): from pymbolic import var I = var("I") # noqa -- GitLab From f8dcabc1dc05cec5f94725ee148795b467957ebb Mon Sep 17 00:00:00 2001 From: tj-sun Date: Tue, 8 May 2018 13:50:06 +0100 Subject: [PATCH 2/4] generate explicit loop for openmp target --- loopy/codegen/bounds.py | 5 +++++ loopy/codegen/control.py | 8 ++++++-- loopy/schedule/__init__.py | 11 ++++++++++- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index c946e09a0..923a59644 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -87,6 +87,8 @@ def get_usable_inames_for_conditional(kernel, sched_index): for insn in insn_ids_for_subkernel for iname in kernel.insn_inames(insn)) + from loopy.target.openmp import OpenMPTarget + for iname in inames_for_subkernel: # Parallel inames are defined within a subkernel, BUT: # @@ -94,9 +96,12 @@ def get_usable_inames_for_conditional(kernel, sched_index): # # - ILP indices are not available in loop bounds, they only get defined # at the innermost level of nesting. + # + # - OpenMP target generates explicit loops for concurrent inames. if ( kernel.iname_tags_of_type(iname, ConcurrentTag) + and not isinstance(kernel.target, OpenMPTarget) and not (kernel.iname_tags_of_type(iname, LocalIndexTagBase) and crosses_barrier) and not kernel.iname_tags_of_type(iname, IlpBaseTag) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 966fd1e07..ab4f839e6 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -136,8 +136,8 @@ def generate_code_for_sched_index(codegen_state, sched_index): generate_vectorize_loop, generate_sequential_loop_dim_code) - from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, - ForceSequentialTag, LoopedIlpTag, VectorizeTag, + from loopy.kernel.data import (UnrolledIlpTag, HardwareConcurrentTag, + ForceSequentialTag, LoopedIlpTag, VectorizeTag, UnrollTag, InOrderSequentialSequentialTag, filter_iname_tags_by_type) if filter_iname_tags_by_type(tags, (UnrollTag, UnrolledIlpTag)): func = generate_unroll_loop @@ -146,6 +146,10 @@ def generate_code_for_sched_index(codegen_state, sched_index): elif not tags or filter_iname_tags_by_type(tags, (LoopedIlpTag, ForceSequentialTag, InOrderSequentialSequentialTag)): func = generate_sequential_loop_dim_code + elif filter_iname_tags_by_type(tags, HardwareConcurrentTag, max_num=1): + from loopy.target.openmp import OpenMPTarget + assert isinstance(kernel.target, OpenMPTarget) + func = generate_sequential_loop_dim_code else: raise RuntimeError("encountered (invalid) EnterLoop " "for '%s', tagged '%s'" diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 440ac22cb..a569938c8 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1875,7 +1875,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): for insn_id in sched_item_to_insn_id(item)) from loopy.kernel.data import (IlpBaseTag, ConcurrentTag, VectorizeTag, - filter_iname_tags_by_type) + filter_iname_tags_by_type, HardwareConcurrentTag) ilp_inames = set( iname for iname, tags in six.iteritems(kernel.iname_to_tags) @@ -1889,6 +1889,15 @@ def generate_loop_schedules_inner(kernel, debug_args={}): for iname, tags in six.iteritems(kernel.iname_to_tags) if filter_iname_tags_by_type(tags, ConcurrentTag)) + from loopy.target.openmp import OpenMPTarget + if isinstance(kernel.target, OpenMPTarget): + # need to generate explicit loops for OpenMP target + parallel_inames -= set( + iname for iname, tags in six.iteritems(kernel.iname_to_tags) + if filter_iname_tags_by_type(tags, HardwareConcurrentTag) + # TODO: should this be AxisTag instead? + ) + loop_nest_with_map = find_loop_nest_with_map(kernel) loop_nest_around_map = find_loop_nest_around_map(kernel) sched_state = SchedulerState( -- GitLab From f46a306e3d48d60eff526e57ed18614b9813d2a9 Mon Sep 17 00:00:00 2001 From: tj-sun Date: Tue, 8 May 2018 14:12:13 +0100 Subject: [PATCH 3/4] new mechanism to map harware inames --- loopy/codegen/loop.py | 4 ++-- loopy/symbolic.py | 7 +++---- loopy/target/openmp.py | 22 +++++++++++++--------- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 172afa189..ebddf3153 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -258,9 +258,9 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, tag, = kernel.iname_tags_of_type(iname, UniqueTag, max_num=1, min_num=1) if isinstance(tag, GroupIndexTag): - hw_axis_expr = GroupHardwareAxisIndex(iname, tag.axis) + hw_axis_expr = GroupHardwareAxisIndex(tag.axis) elif isinstance(tag, LocalIndexTag): - hw_axis_expr = LocalHardwareAxisIndex(iname, tag.axis) + hw_axis_expr = LocalHardwareAxisIndex(tag.axis) else: raise RuntimeError("unexpected hw tag type") diff --git a/loopy/symbolic.py b/loopy/symbolic.py index e844a48da..8927cd6fb 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -366,17 +366,16 @@ class ArrayLiteral(p.Leaf): class HardwareAxisIndex(p.Leaf): - def __init__(self, iname, axis): - self.iname = iname + def __init__(self, axis): self.axis = axis def stringifier(self): return StringifyMapper def __getinitargs__(self): - return (self.iname, self.axis) + return (self.axis,) - init_arg_names = ("iname", "axis") + init_arg_names = ("axis",) class GroupHardwareAxisIndex(HardwareAxisIndex): diff --git a/loopy/target/openmp.py b/loopy/target/openmp.py index 180b01cc1..2739c3d5b 100644 --- a/loopy/target/openmp.py +++ b/loopy/target/openmp.py @@ -34,15 +34,19 @@ from pymbolic import var class ExprToOpenMPCExprMapper(ExpressionToCExpressionMapper): - def map_group_hw_index(self, expr, type_context): - if expr.axis != 0: - raise LoopyError("OpenMP target only supports group axis=0") - return var(expr.iname) - - def map_local_hw_index(self, expr, type_context): - if expr.axis != 0: - raise LoopyError("OpenMP target only supports local axis=0") - return var(expr.iname) + def map_variable(self, expr, type_context): + if expr.name in self.codegen_state.var_subst_map: + if not self.kernel.options.annotate_inames: + hardware_index = self.codegen_state.var_subst_map[expr.name] + from loopy.symbolic import HardwareAxisIndex + if isinstance(hardware_index, HardwareAxisIndex): + if hardware_index.axis != 0: + raise LoopyError( + "Can only have axis=0 for OpenMP hardware tags.") + return var(expr.name) + + return super(ExprToOpenMPCExprMapper, self).map_variable(expr, type_context) + # }}} -- GitLab From 2c8439b8f3ca0aa09bda243c70680460f430b0ca Mon Sep 17 00:00:00 2001 From: tj-sun Date: Mon, 2 Jul 2018 17:40:16 +0100 Subject: [PATCH 4/4] updates base on feedbacks on MR --- loopy/__init__.py | 4 +- loopy/codegen/bounds.py | 7 +- loopy/codegen/control.py | 2 +- loopy/schedule/__init__.py | 5 +- loopy/target/__init__.py | 3 + loopy/target/c/__init__.py | 79 ++++++++++++++++++ loopy/target/c/codegen/expression.py | 18 +++++ loopy/target/numba.py | 6 ++ loopy/target/openmp.py | 115 --------------------------- test/test_target.py | 2 +- 10 files changed, 115 insertions(+), 126 deletions(-) delete mode 100644 loopy/target/openmp.py diff --git a/loopy/__init__.py b/loopy/__init__.py index a6f41cf28..642cad183 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -139,10 +139,10 @@ from loopy.frontend.fortran import (c_preprocess, parse_transformed_fortran, parse_fortran) from loopy.target import TargetBase, ASTBuilderBase -from loopy.target.c import CTarget, ExecutableCTarget, generate_header +from loopy.target.c import (CTarget, ExecutableCTarget, generate_header, + OpenMPTarget) from loopy.target.cuda import CudaTarget from loopy.target.opencl import OpenCLTarget -from loopy.target.openmp import OpenMPTarget from loopy.target.pyopencl import PyOpenCLTarget from loopy.target.ispc import ISPCTarget from loopy.target.numba import NumbaTarget, NumbaCudaTarget diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 923a59644..b7e7d7085 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -87,8 +87,6 @@ def get_usable_inames_for_conditional(kernel, sched_index): for insn in insn_ids_for_subkernel for iname in kernel.insn_inames(insn)) - from loopy.target.openmp import OpenMPTarget - for iname in inames_for_subkernel: # Parallel inames are defined within a subkernel, BUT: # @@ -97,11 +95,12 @@ def get_usable_inames_for_conditional(kernel, sched_index): # - ILP indices are not available in loop bounds, they only get defined # at the innermost level of nesting. # - # - OpenMP target generates explicit loops for concurrent inames. + # - Target (e.g. OpenMP) would generate explicit loops for hardware + # concurrent inames. if ( kernel.iname_tags_of_type(iname, ConcurrentTag) - and not isinstance(kernel.target, OpenMPTarget) + and not kernel.target.needs_explicit_hw_concurrent_loops() and not (kernel.iname_tags_of_type(iname, LocalIndexTagBase) and crosses_barrier) and not kernel.iname_tags_of_type(iname, IlpBaseTag) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index ab4f839e6..d49b9a407 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -147,7 +147,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): ForceSequentialTag, InOrderSequentialSequentialTag)): func = generate_sequential_loop_dim_code elif filter_iname_tags_by_type(tags, HardwareConcurrentTag, max_num=1): - from loopy.target.openmp import OpenMPTarget + from loopy.target.c import OpenMPTarget assert isinstance(kernel.target, OpenMPTarget) func = generate_sequential_loop_dim_code else: diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index a569938c8..4f1bb44ef 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1889,9 +1889,8 @@ def generate_loop_schedules_inner(kernel, debug_args={}): for iname, tags in six.iteritems(kernel.iname_to_tags) if filter_iname_tags_by_type(tags, ConcurrentTag)) - from loopy.target.openmp import OpenMPTarget - if isinstance(kernel.target, OpenMPTarget): - # need to generate explicit loops for OpenMP target + if kernel.target.needs_explicit_hw_concurrent_loops(): + # need to generate explicit loops for targets such as OpenMP parallel_inames -= set( iname for iname, tags in six.iteritems(kernel.iname_to_tags) if filter_iname_tags_by_type(tags, HardwareConcurrentTag) diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index a08b406f5..5f981c0f3 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -117,6 +117,9 @@ class TargetBase(object): def is_vector_dtype(self, dtype): raise NotImplementedError() + def needs_explicit_hw_concurrent_loops(self): + raise NotImplementedError() + def vector_dtype(self, base, count): raise NotImplementedError() diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 8e69793e8..9f4d44c54 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -288,6 +288,9 @@ class CTarget(TargetBase): def is_vector_dtype(self, dtype): return False + def needs_explicit_hw_concurrent_loops(self): + return False + def get_vector_dtype(self, base, count): raise KeyError() @@ -331,6 +334,33 @@ class ExecutableCTarget(CTarget): # }}} +# {{{ OpenMP target + +class OpenMPTarget(CTarget): + """A target for the OpenMP programming model. + """ + + def __init__(self): + super(OpenMPTarget, self).__init__() + + def needs_explicit_hw_concurrent_loops(self): + return True + + def pre_codegen_check(self, kernel): + from loopy.kernel.data import AxisTag + for i, tags in kernel.iname_to_tags.items(): + for tag in tags: + if isinstance(tag, AxisTag): + if tag.axis != 0: + raise LoopyError( + "Only axis=0 allowed for OpenMP hardware tags.") + + def get_device_ast_builder(self): + return OpenMPCASTBuilder(self) + +# }}} + + class _ConstRestrictPointer(Pointer): def get_decl_pair(self): sub_tp, sub_decl = self.subdecl.get_decl_pair() @@ -969,6 +999,55 @@ class CASTBuilder(ASTBuilderBase): return node +# {{{ OpenMP target AST builder + +class OpenMPCASTBuilder(CASTBuilder): + + # {{{ code generation guts + + def get_expression_to_c_expression_mapper(self, codegen_state): + from loopy.target.c.codegen.expression import ExprToOpenMPCExprMapper + return ExprToOpenMPCExprMapper(codegen_state) + + def emit_sequential_loop(self, codegen_state, iname, iname_dtype, + lbound, ubound, inner): + """mark inames tagged as "l.0" with #pragma omp simd, and "g.0" with + #pragma parallel for. + """ + + from loopy.kernel.data import (filter_iname_tags_by_type, LocalIndexTag, + GroupIndexTag) + + loop = super(OpenMPCASTBuilder, self).emit_sequential_loop( + codegen_state, iname, iname_dtype, lbound, ubound, inner) + + tags = filter_iname_tags_by_type( + codegen_state.kernel.iname_to_tags[iname], + (LocalIndexTag, GroupIndexTag), 1) + if tags: + tag, = tags + from cgen import Block, Pragma + if isinstance(tag, LocalIndexTag) and tag.axis == 0: + loop = Block([Pragma("omp simd"), loop]) + elif isinstance(tag, GroupIndexTag) and tag.axis == 0: + loop = Block([Pragma("omp parallel for"), loop]) + + return loop + + def emit_barrier(self, synchronization_kind, mem_kind, comment): + if synchronization_kind == "local": + return + elif synchronization_kind == "global": + from cgen import Pragma + return Pragma("omp barrier") + else: + raise LoopyError("unknown barrier kind") + + # }}} + +# }}} + + # {{{ header generation class CFunctionDeclExtractor(CASTIdentityMapper): diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 4fcdd9588..15fe526ef 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -896,4 +896,22 @@ class CExpressionToCodeMapper(RecursiveMapper): # }}} + +# {{{ expression to OpenMP expression mapper + +class ExprToOpenMPCExprMapper(ExpressionToCExpressionMapper): + + def map_variable(self, expr, type_context): + if expr.name in self.codegen_state.var_subst_map: + if not self.kernel.options.annotate_inames: + hardware_index = self.codegen_state.var_subst_map[expr.name] + from loopy.symbolic import HardwareAxisIndex + if isinstance(hardware_index, HardwareAxisIndex): + return var(expr.name) + + return super(ExprToOpenMPCExprMapper, self).map_variable(expr, type_context) + +# }}} + + # vim: fdm=marker diff --git a/loopy/target/numba.py b/loopy/target/numba.py index 6946063ee..db86a37df 100644 --- a/loopy/target/numba.py +++ b/loopy/target/numba.py @@ -121,6 +121,9 @@ class NumbaTarget(TargetBase): def get_vector_dtype(self, base, count): raise KeyError() + def needs_explicit_hw_concurrent_loops(self): + return False + def get_or_register_dtype(self, names, dtype=None): # These kind of shouldn't be here. return self.get_dtype_registry().get_or_register_dtype(names, dtype) @@ -187,6 +190,9 @@ class NumbaCudaTarget(TargetBase): def get_device_ast_builder(self): return NumbaCudaASTBuilder(self) + def needs_explicit_hw_concurrent_loops(self): + return False + # {{{ types @memoize_method diff --git a/loopy/target/openmp.py b/loopy/target/openmp.py deleted file mode 100644 index 2739c3d5b..000000000 --- a/loopy/target/openmp.py +++ /dev/null @@ -1,115 +0,0 @@ -"""OpenMP target.""" - -from __future__ import division, absolute_import - -__copyright__ = "Copyright (C) 2015 Andreas Kloeckner" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - -from loopy.diagnostic import LoopyError -from loopy.target.c import CTarget, CASTBuilder -from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper -from pymbolic import var - - -# {{{ expression mapper - -class ExprToOpenMPCExprMapper(ExpressionToCExpressionMapper): - - def map_variable(self, expr, type_context): - if expr.name in self.codegen_state.var_subst_map: - if not self.kernel.options.annotate_inames: - hardware_index = self.codegen_state.var_subst_map[expr.name] - from loopy.symbolic import HardwareAxisIndex - if isinstance(hardware_index, HardwareAxisIndex): - if hardware_index.axis != 0: - raise LoopyError( - "Can only have axis=0 for OpenMP hardware tags.") - return var(expr.name) - - return super(ExprToOpenMPCExprMapper, self).map_variable(expr, type_context) - - -# }}} - - -# {{{ target - -class OpenMPTarget(CTarget): - """A target for the OpenMP programming model. - """ - - def __init__(self): - super(OpenMPTarget, self).__init__() - - def get_device_ast_builder(self): - return OpenMPCASTBuilder(self) - -# }}} - - -# {{{ ast builder - -class OpenMPCASTBuilder(CASTBuilder): - - # {{{ code generation guts - - def get_expression_to_c_expression_mapper(self, codegen_state): - return ExprToOpenMPCExprMapper(codegen_state) - - def emit_sequential_loop(self, codegen_state, iname, iname_dtype, - lbound, ubound, inner): - """mark loops with "l.0" tag with #pragma omp simd - """ - - from loopy.kernel.data import (filter_iname_tags_by_type, LocalIndexTag, - GroupIndexTag) - - loop = super(OpenMPCASTBuilder, self).emit_sequential_loop( - codegen_state, iname, iname_dtype, lbound, ubound, inner) - - tags = filter_iname_tags_by_type( - codegen_state.kernel.iname_to_tags[iname], - (LocalIndexTag, GroupIndexTag), 1) - if tags: - tag, = tags - from cgen import Block, Pragma - if isinstance(tag, LocalIndexTag) and tag.axis == 0: - loop = Block([Pragma("omp simd"), loop]) - elif isinstance(tag, GroupIndexTag) and tag.axis == 0: - loop = Block([Pragma("omp parallel for"), loop]) - - return loop - - def emit_barrier(self, synchronization_kind, mem_kind, comment): - if synchronization_kind == "local": - return - elif synchronization_kind == "global": - from cgen import Pragma - return Pragma("omp barrier") - else: - raise LoopyError("unknown barrier kind") - - # }}} - -# }}} - -# vim: foldmethod=marker diff --git a/test/test_target.py b/test/test_target.py index c37b06d67..df6faef36 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -104,7 +104,7 @@ def test_cuda_target(): def test_openmp_target(): - from loopy.target.openmp import OpenMPTarget + from loopy.target.c import OpenMPTarget knl = lp.make_kernel( "{ [i]: 0 <= i < 1024 }", -- GitLab