diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5e2805bd9cc8fbb64f63893093e45229d6803cb3..5f7d5a6463715da6564f20827865ae48944784e1 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,8 +1,8 @@ Python 2.7 AMD CPU: script: - - py_version=2.7 - - cl_dev=amd_pu - - EXTRA_INSTALL="numpy mako" + - export PY_EXE=python2.7 + - export PYOPENCL_TEST=amd:pu + - export EXTRA_INSTALL="numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -12,9 +12,10 @@ Python 2.7 AMD CPU: - tags Python 3.4 AMD CPU: script: - - py_version=3.4 - - cl_dev=amd_pu - - EXTRA_INSTALL="numpy mako" + - export PY_EXE=python3.4 + - export PYOPENCL_TEST=amd:pu + - export EXTRA_INSTALL="numpy mako" + - export NO_DOCTESTS=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: diff --git a/contrib/floopy-highlighting/floopy.vim b/contrib/floopy-highlighting/floopy.vim index 59c5b15a431e3c28072c96afd824b6937f973b56..57c09a652c0cb9141d6764d300ebb3618577b05d 100644 --- a/contrib/floopy-highlighting/floopy.vim +++ b/contrib/floopy-highlighting/floopy.vim @@ -7,7 +7,7 @@ " :set filetype=floopy " " You may also include a line -" vim: filetype=pyopencl.python +" vim: filetype=floopy.python " at the end of your file to set the file type automatically. " " Another option is to include the following in your .vimrc @@ -16,24 +16,7 @@ runtime! syntax/fortran.vim unlet b:current_syntax -try - syntax include @clCode syntax/opencl.vim -catch - syntax include @clCode syntax/c.vim -endtry - -if exists('b:current_syntax') - let s:current_syntax=b:current_syntax - " Remove current syntax definition, as some syntax files (e.g. cpp.vim) - " do nothing if b:current_syntax is defined. - unlet b:current_syntax -endif - syntax include @LoopyPython syntax/python.vim -try - syntax include @LoopyPython after/syntax/python.vim -catch -endtry if exists('s:current_syntax') let b:current_syntax=s:current_syntax @@ -43,6 +26,6 @@ endif syntax region textSnipLoopyPython \ matchgroup=Comment -\ start='$loopy begin transform' end='$loopy end transform' +\ start='$loopy begin' end='$loopy end' \ containedin=ALL \ contains=@LoopyPython diff --git a/doc/reference.rst b/doc/reference.rst index 6ed83d43084b5578c817964b13097b4aa78294ef..c7435bbf7f84570b6672ba1bf42eeab9d495ea56 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -356,7 +356,7 @@ TODO: Matching instruction tags .. automodule:: loopy.context_matching -.. autofunction:: parse_id_match +.. autofunction:: parse_match .. autofunction:: parse_stack_match diff --git a/doc/tutorial.rst b/doc/tutorial.rst index e566365f8d520d33f518bc95cbaa43e5af6e747c..3180b5066823481c01b43f659ec851e3951fa5e4 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -96,7 +96,7 @@ always see loopy's view of a kernel by printing it. .. doctest:: - >>> print knl + >>> print(knl) --------------------------------------------------------------------------- KERNEL: loopy_kernel --------------------------------------------------------------------------- @@ -248,7 +248,7 @@ call :func:`loopy.generate_code`: >>> typed_knl = lp.preprocess_kernel(typed_knl, device=ctx.devices[0]) >>> typed_knl = lp.get_one_scheduled_kernel(typed_knl) >>> code, _ = lp.generate_code(typed_knl) - >>> print code + >>> print(code) #define lid(N) ((int) get_local_id(N)) #define gid(N) ((int) get_group_id(N)) <BLANKLINE> @@ -316,7 +316,7 @@ that these dependencies show up there, too: .. doctest:: - >>> print knl + >>> print(knl) --------------------------------------------------------------------------- KERNEL: loopy_kernel --------------------------------------------------------------------------- @@ -444,8 +444,7 @@ around the *j* loop, or the other way around, in the following simple zero-fill kernel? It turns out that Loopy will typically choose a loop nesting for us, but it -does not like doing so. In this tutorial (where we've turned Loopy's warnings -into errors), we are told what is wrong in no uncertain terms:: +does not like doing so. Loo.py will react to the following code .. doctest:: @@ -455,14 +454,13 @@ into errors), we are told what is wrong in no uncertain terms:: ... a[i,j] = 0 ... """) +By saying:: - >>> knl = lp.set_options(knl, "write_cl") - >>> evt, (out,) = knl(queue, a=a_mat_dev) - Traceback (most recent call last): - ... LoopyWarning: kernel scheduling was ambiguous--more than one schedule found, ignoring -This is easily resolved: +And by picking one of the possible loop orderings at random. + +The warning (and the nondeterminism it warns about) is easily resolved: .. doctest:: @@ -476,6 +474,7 @@ ambiguous. .. doctest:: + >>> knl = lp.set_options(knl, "write_cl") >>> evt, (out,) = knl(queue, a=a_mat_dev) #define lid(N) ((int) get_local_id(N)) ... @@ -673,7 +672,7 @@ Iname implementation tags are also printed along with the entire kernel: .. doctest:: - >>> print knl + >>> print(knl) --------------------------------------------------------------------------- ... INAME IMPLEMENTATION TAGS: @@ -723,9 +722,9 @@ those for us: .. doctest:: >>> glob, loc = knl.get_grid_sizes() - >>> print glob + >>> print(glob) (Aff("[n] -> { [(floor((127 + n)/128))] }"),) - >>> print loc + >>> print(loc) (Aff("[n] -> { [(128)] }"),) Note that this functionality returns internal objects and is not really @@ -850,8 +849,8 @@ variable, as one might do in C or another programming language: ... "{ [i]: 0<=i<n }", ... """ ... <float32> a_temp = sin(a[i]) - ... out1[i] = a_temp - ... out2[i] = sqrt(1-a_temp*a_temp) + ... out1[i] = a_temp {id=out1} + ... out2[i] = sqrt(1-a_temp*a_temp) {dep=out1} ... """) The angle brackets ``<>`` denote the creation of a temporary. The name of @@ -862,6 +861,9 @@ the conventional :mod:`numpy` scalar types (:class:`numpy.int16`, :class:`numpy.complex128`) will work. (Yes, :mod:`loopy` supports and generates correct code for complex arithmetic.) +(If you're wondering, the dependencies above were added to make the doctest +produce predictable output.) + The generated code places this variable into what OpenCL calls 'private' memory, local to each work item. @@ -877,8 +879,8 @@ memory, local to each work item. for (int i = 0; i <= -1 + n; ++i) { a_temp = sin(a[i]); - out2[i] = sqrt(1.0f + -1.0f * a_temp * a_temp); out1[i] = a_temp; + out2[i] = sqrt(1.0f + -1.0f * a_temp * a_temp); } } @@ -921,7 +923,7 @@ Consider the following example: ... "{ [i_outer,i_inner, k]: " ... "0<= 16*i_outer + i_inner <n and 0<= i_inner,k <16}", ... """ - ... <> a_temp[i_inner] = a[16*i_outer + i_inner] + ... <> a_temp[i_inner] = a[16*i_outer + i_inner] {priority=10} ... out[16*i_outer + i_inner] = sum(k, a_temp[k]) ... """) >>> knl = lp.tag_inames(knl, dict(i_outer="g.0", i_inner="l.0")) @@ -952,6 +954,9 @@ it is written in parallel across values of the group-local iname *i_inner*. In addition, :mod:`loopy` has emitted a barrier instruction to achieve the :ref:`ordering` specified by the instruction dependencies. +(The ``priority=10`` attribute was added to make the output of the test +deterministic.) + .. note:: It is worth noting that it was not necessary to provide a size for the diff --git a/loopy/__init__.py b/loopy/__init__.py index dcd8e27c1eaf02a0f7aee133598b0e2fd34f70b9..58029467c7bb263203bf33430eb306fe6f16bf14 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -152,7 +152,10 @@ class _InameSplitter(RuleAwareIdentityMapper): def map_reduction(self, expr, expn_state): if (self.split_iname in expr.inames and self.split_iname not in expn_state.arg_context - and self.within(expn_state.stack)): + and self.within( + expn_state.kernel, + expn_state.instruction, + expn_state.stack)): new_inames = list(expr.inames) new_inames.remove(self.split_iname) new_inames.extend([self.outer_iname, self.inner_iname]) @@ -166,7 +169,10 @@ class _InameSplitter(RuleAwareIdentityMapper): def map_variable(self, expr, expn_state): if (expr.name == self.split_iname and self.split_iname not in expn_state.arg_context - and self.within(expn_state.stack)): + and self.within( + expn_state.kernel, + expn_state.instruction, + expn_state.stack)): return self.replacement_index else: return super(_InameSplitter, self).map_variable(expr, expn_state) @@ -318,7 +324,10 @@ class _InameJoiner(RuleAwareSubstitutionMapper): expr_inames = set(expr.inames) overlap = (self.join_inames & expr_inames - set(expn_state.arg_context)) - if overlap and self.within(expn_state.stack): + if overlap and self.within( + expn_state.kernel, + expn_state.instruction, + expn_state.stack): if overlap != expr_inames: raise LoopyError( "Cannot join inames '%s' if there is a reduction " @@ -520,7 +529,10 @@ class _InameDuplicator(RuleAwareIdentityMapper): def map_reduction(self, expr, expn_state): if (set(expr.inames) & self.old_inames_set - and self.within(expn_state.stack)): + and self.within( + expn_state.kernel, + expn_state.instruction, + expn_state.stack)): new_inames = tuple( self.old_to_new.get(iname, iname) if iname not in expn_state.arg_context @@ -538,14 +550,17 @@ class _InameDuplicator(RuleAwareIdentityMapper): if (new_name is None or expr.name in expn_state.arg_context - or not self.within(expn_state.stack)): + or not self.within( + expn_state.kernel, + expn_state.instruction, + expn_state.stack)): return super(_InameDuplicator, self).map_variable(expr, expn_state) else: from pymbolic import var return var(new_name) - def map_instruction(self, insn): - if not self.within(((insn.id, insn.tags),)): + def map_instruction(self, kernel, insn): + if not self.within(kernel, insn, ()): return insn new_fid = frozenset( @@ -1050,7 +1065,7 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, # If the rule survived past precompute() (i.e. some accesses fell outside # the footprint), get rid of it before moving on. if rule_name in new_kernel.substitutions: - return expand_subst(new_kernel, rule_name) + return expand_subst(new_kernel, "id:"+rule_name) else: return new_kernel @@ -1060,19 +1075,19 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, # {{{ instruction processing def find_instructions(kernel, insn_match): - from loopy.context_matching import parse_id_match - match = parse_id_match(insn_match) - return [insn for insn in kernel.instructions if match(insn.id, insn.tags)] + from loopy.context_matching import parse_match + match = parse_match(insn_match) + return [insn for insn in kernel.instructions if match(kernel, insn)] def map_instructions(kernel, insn_match, f): - from loopy.context_matching import parse_id_match - match = parse_id_match(insn_match) + from loopy.context_matching import parse_match + match = parse_match(insn_match) new_insns = [] for insn in kernel.instructions: - if match(insn.id, None): + if match(kernel, insn): new_insns.append(f(insn)) else: new_insns.append(insn) @@ -1084,7 +1099,7 @@ def set_instruction_priority(kernel, insn_match, priority): """Set the priority of instructions matching *insn_match* to *priority*. *insn_match* may be any instruction id match understood by - :func:`loopy.context_matching.parse_id_match`. + :func:`loopy.context_matching.parse_match`. """ def set_prio(insn): @@ -1098,7 +1113,7 @@ def add_dependency(kernel, insn_match, dependency): by *insn_match*. *insn_match* may be any instruction id match understood by - :func:`loopy.context_matching.parse_id_match`. + :func:`loopy.context_matching.parse_match`. """ def add_dep(insn): @@ -1220,7 +1235,11 @@ class _ReductionSplitter(RuleAwareIdentityMapper): # FIXME raise NotImplementedError() - if self.inames <= set(expr.inames) and self.within(expn_state.stack): + if (self.inames <= set(expr.inames) + and self.within( + expn_state.kernel, + expn_state.instruction, + expn_state.stack)): leftover_inames = set(expr.inames) - self.inames from loopy.symbolic import Reduction @@ -1659,10 +1678,12 @@ def affine_map_inames(kernel, old_inames, new_inames, equations): var_name_gen = kernel.get_var_name_generator() from pymbolic.mapper.substitutor import make_subst_func + from loopy.context_matching import parse_stack_match + rule_mapping_context = SubstitutionRuleMappingContext( kernel.substitutions, var_name_gen) old_to_new = RuleAwareSubstitutionMapper(rule_mapping_context, - make_subst_func(subst_dict), within=lambda stack: True) + make_subst_func(subst_dict), within=parse_stack_match(None)) kernel = ( rule_mapping_context.finish_kernel( @@ -1792,12 +1813,12 @@ def fold_constants(kernel): # {{{ tag_instructions def tag_instructions(kernel, new_tag, within=None): - from loopy.context_matching import parse_stack_match - within = parse_stack_match(within) + from loopy.context_matching import parse_match + within = parse_match(within) new_insns = [] for insn in kernel.instructions: - if within(((insn.id, insn.tags),)): + if within(kernel, insn): new_insns.append( insn.copy(tags=insn.tags + (new_tag,))) else: diff --git a/loopy/buffer.py b/loopy/buffer.py index d155dba7e852e6a3978c3c92d4f4a7cba29bc4f0..fdc3774b29f64ba5ae8c465076f48b805836d40b 100644 --- a/loopy/buffer.py +++ b/loopy/buffer.py @@ -51,7 +51,10 @@ class ArrayAccessReplacer(RuleAwareIdentityMapper): def map_variable(self, expr, expn_state): result = None - if expr.name == self.var_name and self.within(expn_state): + if expr.name == self.var_name and self.within( + expn_state.kernel, + expn_state.instruction, + expn_state.stack): result = self.map_array_access((), expn_state) if result is None: @@ -62,7 +65,10 @@ class ArrayAccessReplacer(RuleAwareIdentityMapper): def map_subscript(self, expr, expn_state): result = None - if expr.aggregate.name == self.var_name and self.within(expn_state): + if expr.aggregate.name == self.var_name and self.within( + expn_state.kernel, + expn_state.instruction, + expn_state.stack): result = self.map_array_access(expr.index, expn_state) if result is None: @@ -172,7 +178,7 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None, access_descriptors = [] for insn in kernel.instructions: - if not within((insn.id, insn.tags)): + if not within(kernel, insn.id, ()): continue for assignee, index in insn.assignees_and_indices(): diff --git a/loopy/context_matching.py b/loopy/context_matching.py index b259a0ddd33e20d98bf628db0c993501908bcb36..cc375c43897c0ad5e138d4f020f4e48f9a463451 100644 --- a/loopy/context_matching.py +++ b/loopy/context_matching.py @@ -1,9 +1,7 @@ """Matching functionality for instruction ids and subsitution rule invocations stacks.""" -from __future__ import division -from __future__ import absolute_import -from six.moves import range +from __future__ import division, absolute_import __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -27,150 +25,328 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +from six.moves import range, intern + + NoneType = type(None) +from pytools.lex import RE + + +def re_from_glob(s): + import re + from fnmatch import translate + return re.compile("^"+translate(s.strip())+"$") + +# {{{ parsing -# {{{ id match objects +# {{{ lexer data + +_and = intern("and") +_or = intern("or") +_not = intern("not") +_openpar = intern("openpar") +_closepar = intern("closepar") + +_id = intern("_id") +_tag = intern("_tag") +_writes = intern("_writes") +_reads = intern("_reads") +_iname = intern("_reads") + +_whitespace = intern("_whitespace") + +# }}} -class AllMatch(object): - def __call__(self, identifier, tag): + +_LEX_TABLE = [ + (_and, RE(r"and\b")), + (_or, RE(r"or\b")), + (_not, RE(r"not\b")), + (_openpar, RE(r"\(")), + (_closepar, RE(r"\)")), + + # TERMINALS + (_id, RE(r"id:([\w?*]+)")), + (_tag, RE(r"tag:([\w?*]+)")), + (_writes, RE(r"writes:([\w?*]+)")), + (_reads, RE(r"reads:([\w?*]+)")), + (_iname, RE(r"iname:([\w?*]+)")), + + (_whitespace, RE("[ \t]+")), + ] + + +_TERMINALS = ([_id, _tag, _writes, _reads, _iname]) + +# {{{ operator precedence + +_PREC_OR = 10 +_PREC_AND = 20 +_PREC_NOT = 30 + +# }}} + + +# {{{ match expression + +class MatchExpressionBase(object): + def __call__(self, kernel, matchable): + raise NotImplementedError + + +class AllMatchExpression(MatchExpressionBase): + def __call__(self, kernel, matchable): return True -class RegexIdentifierMatch(object): - def __init__(self, id_re, tag_re=None): - self.id_re = id_re - self.tag_re = tag_re +class AndMatchExpression(MatchExpressionBase): + def __init__(self, children): + self.children = children + + def __call__(self, kernel, matchable): + return all(ch(kernel, matchable) for ch in self.children) + + def __str__(self): + return "(%s)" % (" and ".join(str(ch) for ch in self.children)) + + +class OrMatchExpression(MatchExpressionBase): + def __init__(self, children): + self.children = children - def __call__(self, identifier, tags): - assert isinstance(tags, (tuple, NoneType)) + def __call__(self, kernel, matchable): + return any(ch(kernel, matchable) for ch in self.children) - if self.tag_re is None: - return self.id_re.match(identifier) is not None + def __str__(self): + return "(%s)" % (" or ".join(str(ch) for ch in self.children)) + + +class NotMatchExpression(MatchExpressionBase): + def __init__(self, child): + self.child = child + + def __call__(self, kernel, matchable): + return not self.child(kernel, matchable) + + def __str__(self): + return "(not %s)" % str(self.child) + + +class GlobMatchExpressionBase(MatchExpressionBase): + def __init__(self, glob): + self.glob = glob + + import re + from fnmatch import translate + self.re = re.compile("^"+translate(glob.strip())+"$") + + def __str__(self): + descr = type(self).__name__ + descr = descr[:descr.find("Match")] + return descr.lower() + ":" + self.glob + + +class IdMatchExpression(GlobMatchExpressionBase): + def __call__(self, kernel, matchable): + return self.re.match(matchable.id) + + +class TagMatchExpression(GlobMatchExpressionBase): + def __call__(self, kernel, matchable): + if matchable.tags: + return any(self.re.match(tag) for tag in matchable.tags) else: - if not tags: - tags = ("",) + return False + - return ( - self.id_re.match(identifier) is not None - and any( - self.tag_re.match(tag) is not None - for tag in tags)) +class WritesMatchExpression(GlobMatchExpressionBase): + def __call__(self, kernel, matchable): + return any(self.re.match(name) + for name in matchable.write_dependency_names()) -class AlternativeMatch(object): - def __init__(self, matches): - self.matches = matches +class ReadsMatchExpression(GlobMatchExpressionBase): + def __call__(self, kernel, matchable): + return any(self.re.match(name) + for name in matchable.read_dependency_names()) - def __call__(self, identifier, tags): - from pytools import any - return any( - mtch(identifier, tags) for mtch in self.matches) + +class InameMatchExpression(GlobMatchExpressionBase): + def __call__(self, kernel, matchable): + return any(self.re.match(name) + for name in matchable.inames(kernel)) # }}} -# {{{ single id match parsing +# {{{ parser -def parse_id_match(id_matches): +def parse_match(expr_str): """Syntax examples:: - my_insn - compute_* - fetch*$first - fetch*$first,store*$first - - Alternatively, a list of *(name_glob, tag_glob)* tuples. + * ``id:yoink and writes:a_temp`` + * ``id:yoink and (not writes:a_temp or tagged:input)`` """ + if not expr_str: + return AllMatchExpression() + + def parse_terminal(pstate): + next_tag = pstate.next_tag() + if next_tag is _id: + result = IdMatchExpression(pstate.next_match_obj().group(1)) + pstate.advance() + return result + elif next_tag is _tag: + result = TagMatchExpression(pstate.next_match_obj().group(1)) + pstate.advance() + return result + elif next_tag is _writes: + result = WritesMatchExpression(pstate.next_match_obj().group(1)) + pstate.advance() + return result + elif next_tag is _reads: + result = ReadsMatchExpression(pstate.next_match_obj().group(1)) + pstate.advance() + return result + elif next_tag is _iname: + result = InameMatchExpression(pstate.next_match_obj().group(1)) + pstate.advance() + return result + else: + pstate.expected("terminal") + + def inner_parse(pstate, min_precedence=0): + pstate.expect_not_end() + + if pstate.is_next(_not): + pstate.advance() + left_query = NotMatchExpression(inner_parse(pstate, _PREC_NOT)) + elif pstate.is_next(_openpar): + pstate.advance() + left_query = inner_parse(pstate) + pstate.expect(_closepar) + pstate.advance() + else: + left_query = parse_terminal(pstate) - if id_matches is None: - return AllMatch() + did_something = True + while did_something: + did_something = False + if pstate.is_at_end(): + return left_query - if isinstance(id_matches, str): - id_matches = id_matches.strip() - id_matches = id_matches.split(",") + next_tag = pstate.next_tag() - if len(id_matches) > 1: - return AlternativeMatch([ - parse_id_match(im) for im in id_matches]) + if next_tag is _and and _PREC_AND > min_precedence: + pstate.advance() + left_query = AndMatchExpression( + (left_query, inner_parse(pstate, _PREC_AND))) + did_something = True + elif next_tag is _or and _PREC_OR > min_precedence: + pstate.advance() + left_query = OrMatchExpression( + (left_query, inner_parse(pstate, _PREC_OR))) + did_something = True - if len(id_matches) == 0: - return AllMatch() + return left_query - id_match, = id_matches - del id_matches + from pytools.lex import LexIterator, lex + pstate = LexIterator( + [(tag, s, idx, matchobj) + for (tag, s, idx, matchobj) in lex(_LEX_TABLE, expr_str, match_objects=True) + if tag is not _whitespace], expr_str) - def re_from_glob(s): - import re - from fnmatch import translate - return re.compile("^"+translate(s.strip())+"$") + if pstate.is_at_end(): + pstate.raise_parse_error("unexpected end of input") - if not isinstance(id_match, tuple): - components = id_match.split("$") + result = inner_parse(pstate) + if not pstate.is_at_end(): + pstate.raise_parse_error("leftover input after completed parse") - if len(components) == 1: - return RegexIdentifierMatch(re_from_glob(components[0])) - elif len(components) == 2: - return RegexIdentifierMatch( - re_from_glob(components[0]), - re_from_glob(components[1])) - else: - raise RuntimeError("too many (%d) $-separated components in id match" - % len(components)) + return result # }}} +# }}} -# {{{ stack match objects -# these match from the tail of the stack +# {{{ stack match objects -class StackMatchBase(object): +class StackMatchComponent(object): pass -class AllStackMatch(StackMatchBase): - def __call__(self, stack): +class StackAllMatchComponent(StackMatchComponent): + def __call__(self, kernel, stack): return True -class StackIdMatch(StackMatchBase): - def __init__(self, id_match, up_match): - self.id_match = id_match - self.up_match = up_match +class StackBottomMatchComponent(StackMatchComponent): + def __call__(self, kernel, stack): + return not stack + + +class StackItemMatchComponent(StackMatchComponent): + def __init__(self, match_expr, inner_match): + self.match_expr = match_expr + self.inner_match = inner_match - def __call__(self, stack): + def __call__(self, kernel, stack): if not stack: return False - last = stack[-1] - if not self.id_match(*last): + outer = stack[0] + if not self.match_expr(kernel, outer): return False - if self.up_match is None: - return True - else: - return self.up_match(stack[:-1]) + return self.inner_match(kernel, stack[1:]) -class StackWildcardMatch(StackMatchBase): - def __init__(self, up_match): - self.up_match = up_match +class StackWildcardMatchComponent(StackMatchComponent): + def __init__(self, inner_match): + self.inner_match = inner_match - def __call__(self, stack): - if self.up_match is None: - return True + def __call__(self, kernel, stack): + for i in range(0, len(stack)): + if self.inner_match(kernel, stack[i:]): + return True - n = len(stack) + return False - if self.up_match(stack): - return True +# }}} - for i in range(1, n): - if self.up_match(stack[:-i]): - return True - return False +# {{{ stack matcher + +class RuleInvocationMatchable(object): + def __init__(self, id, tags): + self.id = id + self.tags = tags + + def write_dependency_names(self): + raise TypeError("writes: query may not be applied to rule invocations") + + def read_dependency_names(self): + raise TypeError("reads: query may not be applied to rule invocations") + + def inames(self, kernel): + raise TypeError("inames: query may not be applied to rule invocations") + + +class StackMatch(object): + def __init__(self, root_component): + self.root_component = root_component + + def __call__(self, kernel, insn, rule_stack): + """ + :arg rule_stack: a tuple of (name, tags) rule invocation, outermost first + """ + stack_of_matchables = [insn] + for id, tags in rule_stack: + stack_of_matchables.append(RuleInvocationMatchable(id, tags)) + + return self.root_component(kernel, stack_of_matchables) # }}} @@ -180,34 +356,41 @@ class StackWildcardMatch(StackMatchBase): def parse_stack_match(smatch): """Syntax example:: - lowest < next < ... < highest + ... > outer > ... > next > innermost $ + insn > next + insn > ... > next > innermost $ - where `lowest` is necessarily the bottom of the stack. ``...`` matches an - arbitrary number of intervening stack levels. There is currently no way to - match the top of the stack. + ``...`` matches an arbitrary number of intervening stack levels. - Each of the entries is an identifier match as understood by - :func:`parse_id_match`. + Each of the entries is a match expression as understood by + :func:`parse_match`. """ - if isinstance(smatch, StackMatchBase): + if isinstance(smatch, StackMatch): return smatch - match = AllStackMatch() - if smatch is None: - return match + return StackMatch(StackAllMatchComponent()) + + smatch = smatch.strip() + + match = StackAllMatchComponent() + if smatch[-1] == "$": + match = StackBottomMatchComponent() + smatch = smatch[:-1] + + smatch = smatch.strip() - components = smatch.split("<") + components = smatch.split(">") for comp in components[::-1]: comp = comp.strip() if comp == "...": - match = StackWildcardMatch(match) + match = StackWildcardMatchComponent(match) else: - match = StackIdMatch(parse_id_match(comp), match) + match = StackItemMatchComponent(parse_match(comp), match) - return match + return StackMatch(match) # }}} diff --git a/loopy/expression.py b/loopy/expression.py index 2afb803b97b0e5b9f9ff1510da92ad10a50711dc..d912d528a7cc6a567db5aca684103eb736737030 100644 --- a/loopy/expression.py +++ b/loopy/expression.py @@ -1,4 +1,4 @@ -from __future__ import division, absolute_import +from __future__ import division, absolute_import, print_function __copyright__ = "Copyright (C) 2012-15 Andreas Kloeckner" @@ -79,9 +79,9 @@ class TypeInferenceMapper(CombineMapper): while dtypes: other = dtypes.pop() - if result.isbuiltin and other.isbuiltin: + if result.fields is None and other.fields is None: if (result, other) in [ - (np.int32, np.float32), (np.int32, np.float32)]: + (np.int32, np.float32), (np.float32, np.int32)]: # numpy makes this a double. I disagree. result = np.dtype(np.float32) else: @@ -89,11 +89,14 @@ class TypeInferenceMapper(CombineMapper): np.empty(0, dtype=result) + np.empty(0, dtype=other) ).dtype - elif result.isbuiltin and not other.isbuiltin: + + elif result.fields is None and other.fields is not None: # assume the non-native type takes over + # (This is used for vector types.) result = other - elif not result.isbuiltin and other.isbuiltin: + elif result.fields is not None and other.fields is None: # assume the non-native type takes over + # (This is used for vector types.) pass else: if result is not other: diff --git a/loopy/frontend/fortran/__init__.py b/loopy/frontend/fortran/__init__.py index 1c3d1283541c33ce3944e358f4abd451e2c881ee..aad2328b0bfbf4b37c6c6453358668d14fd39388 100644 --- a/loopy/frontend/fortran/__init__.py +++ b/loopy/frontend/fortran/__init__.py @@ -215,6 +215,13 @@ def parse_fortran(source, filename="<floopy code>", free_form=True, strict=True) """ :returns: a list of :class:`loopy.LoopKernel` objects """ + import logging + console = logging.StreamHandler() + console.setLevel(logging.INFO) + formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s') + console.setFormatter(formatter) + logging.getLogger('fparser').addHandler(console) + from fparser import api tree = api.parse(source, isfree=free_form, isstrict=strict, analyze=False, ignore_comments=False) diff --git a/loopy/fusion.py b/loopy/fusion.py index 4431c2c7f56009b8e205cb310060b96cfa210eae..c14d936afb4ff063bad9e9ff7e1189daadf15a5c 100644 --- a/loopy/fusion.py +++ b/loopy/fusion.py @@ -170,11 +170,13 @@ def _fuse_two_kernels(knla, knlb): SubstitutionRuleMappingContext, RuleAwareSubstitutionMapper) from pymbolic.mapper.substitutor import make_subst_func + from loopy.context_matching import parse_stack_match srmc = SubstitutionRuleMappingContext( knlb.substitutions, knlb.get_var_name_generator()) subst_map = RuleAwareSubstitutionMapper( - srmc, make_subst_func(b_var_renames), within=lambda stack: True) + srmc, make_subst_func(b_var_renames), + within=parse_stack_match(None)) knlb = subst_map.map_kernel(knlb) # }}} diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 1f64e59e6c9e8ab6d599ea0cc61937df5a60ffcb..f930df60c93b1be4df80bcfd2789c60dca1e3654 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -902,43 +902,54 @@ class LoopKernel(RecordWithoutPickling): def __str__(self): lines = [] + from loopy.preprocess import add_default_dependencies + kernel = add_default_dependencies(self) + sep = 75*"-" lines.append(sep) - lines.append("KERNEL: " + self.name) + lines.append("KERNEL: " + kernel.name) lines.append(sep) lines.append("ARGUMENTS:") - for arg_name in sorted(self.arg_dict): - lines.append(str(self.arg_dict[arg_name])) + for arg_name in sorted(kernel.arg_dict): + lines.append(str(kernel.arg_dict[arg_name])) lines.append(sep) lines.append("DOMAINS:") - for dom, parents in zip(self.domains, self.all_parents_per_domain()): + for dom, parents in zip(kernel.domains, kernel.all_parents_per_domain()): lines.append(len(parents)*" " + str(dom)) lines.append(sep) lines.append("INAME IMPLEMENTATION TAGS:") - for iname in sorted(self.all_inames()): - line = "%s: %s" % (iname, self.iname_to_tag.get(iname)) + for iname in sorted(kernel.all_inames()): + line = "%s: %s" % (iname, kernel.iname_to_tag.get(iname)) lines.append(line) - if self.temporary_variables: + if kernel.temporary_variables: lines.append(sep) lines.append("TEMPORARIES:") - for tv in sorted(six.itervalues(self.temporary_variables), + for tv in sorted(six.itervalues(kernel.temporary_variables), key=lambda tv: tv.name): lines.append(str(tv)) - if self.substitutions: + if kernel.substitutions: lines.append(sep) lines.append("SUBSTIUTION RULES:") - for rule_name in sorted(six.iterkeys(self.substitutions)): - lines.append(str(self.substitutions[rule_name])) + for rule_name in sorted(six.iterkeys(kernel.substitutions)): + lines.append(str(kernel.substitutions[rule_name])) lines.append(sep) lines.append("INSTRUCTIONS:") loop_list_width = 35 - import loopy as lp - for insn in self.instructions: + printed_insn_ids = set() + + def print_insn(insn): + if insn.id in printed_insn_ids: + return + printed_insn_ids.add(insn.id) + + for dep_id in insn.insn_deps: + print_insn(kernel.id_to_insn[dep_id]) + if isinstance(insn, lp.ExpressionInstruction): lhs = str(insn.assignee) rhs = str(insn.expression) @@ -952,7 +963,7 @@ class LoopKernel(RecordWithoutPickling): trailing = [" "+l for l in insn.code.split("\n")] - loop_list = ",".join(sorted(self.insn_inames(insn))) + loop_list = ",".join(sorted(kernel.insn_inames(insn))) options = [insn.id] if insn.priority: @@ -975,8 +986,12 @@ class LoopKernel(RecordWithoutPickling): if insn.predicates: lines.append(10*" " + "if (%s)" % " && ".join(insn.predicates)) + import loopy as lp + for insn in kernel.instructions: + print_insn(insn) + dep_lines = [] - for insn in self.instructions: + for insn in kernel.instructions: if insn.insn_deps: dep_lines.append("%s : %s" % (insn.id, ",".join(insn.insn_deps))) if dep_lines: @@ -987,10 +1002,10 @@ class LoopKernel(RecordWithoutPickling): lines.append(sep) - if self.schedule is not None: + if kernel.schedule is not None: lines.append("SCHEDULE:") from loopy.schedule import dump_schedule - lines.append(dump_schedule(self.schedule)) + lines.append(dump_schedule(kernel.schedule)) lines.append(sep) return "\n".join(lines) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 7edf43e43bceb6ee3622f3645ad37c4375a703e0..1daa041c54a17214b6531751cf206878e6b1e676 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -1,10 +1,6 @@ """UI for kernel creation.""" -from __future__ import division -from __future__ import absolute_import -import six -from six.moves import range -from six.moves import zip +from __future__ import division, absolute_import, print_function __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -36,6 +32,9 @@ from loopy.kernel.data import ( import islpy as isl from islpy import dim_type +import six +from six.moves import range, zip + import re import sys @@ -904,9 +903,10 @@ def guess_arg_shape_if_requested(kernel, default_order): armap.access_range, i) + 1, constants_only=False))) except: - print>>sys.stderr, "While trying to find shape axis %d of "\ - "argument '%s', the following " \ - "exception occurred:" % (i, arg.name) + print("While trying to find shape axis %d of " + "argument '%s', the following " + "exception occurred:" % (i, arg.name), + file=sys.stderr) raise shape = tuple(shape) diff --git a/loopy/precompute.py b/loopy/precompute.py index 935d6d44040cf56b3ca3bdbbec26e22ec750a05d..1227082a9b304ad920940e2339e643ad2da65daa 100644 --- a/loopy/precompute.py +++ b/loopy/precompute.py @@ -81,7 +81,10 @@ class RuleInvocationGatherer(RuleAwareIdentityMapper): if self.subst_tag is not None and self.subst_tag != tag: process_me = False - process_me = process_me and self.within(expn_state.stack) + process_me = process_me and self.within( + expn_state.kernel, + expn_state.instruction, + expn_state.stack) if not process_me: return super(RuleInvocationGatherer, self).map_substitution( @@ -132,7 +135,7 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): access_descriptors, array_base_map, storage_axis_names, storage_axis_sources, non1_storage_axis_names, - target_var_name): + temporary_name): super(RuleInvocationReplacer, self).__init__(rule_mapping_context) self.subst_name = subst_name @@ -146,12 +149,15 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): self.storage_axis_sources = storage_axis_sources self.non1_storage_axis_names = non1_storage_axis_names - self.target_var_name = target_var_name + self.temporary_name = temporary_name def map_substitution(self, name, tag, arguments, expn_state): if not ( name == self.subst_name - and self.within(expn_state.stack) + and self.within( + expn_state.kernel, + expn_state.instruction, + expn_state.stack) and (self.subst_tag is None or self.subst_tag == tag)): return super(RuleInvocationReplacer, self).map_substitution( name, tag, arguments, expn_state) @@ -196,7 +202,7 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): ax_index = simplify_via_aff(ax_index - sax_base_idx) stor_subscript.append(ax_index) - new_outer_expr = var(self.target_var_name) + new_outer_expr = var(self.temporary_name) if stor_subscript: new_outer_expr = new_outer_expr.index(tuple(stor_subscript)) @@ -210,9 +216,9 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): def precompute(kernel, subst_use, sweep_inames=[], within=None, - storage_axes=None, precompute_inames=None, storage_axis_to_tag={}, - default_tag="l.auto", dtype=None, fetch_bounding_box=False, - temporary_is_local=None): + storage_axes=None, temporary_name=None, precompute_inames=None, + storage_axis_to_tag={}, default_tag="l.auto", dtype=None, + fetch_bounding_box=False, temporary_is_local=None): """Precompute the expression described in the substitution rule determined by *subst_use* and store it in a temporary array. A precomputation needs two things to operate, a list of *sweep_inames* (order irrelevant) and an @@ -263,6 +269,11 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, May also equivalently be a comma-separated string. :arg within: a stack match as understood by :func:`loopy.context_matching.parse_stack_match`. + :arg temporary_name: + The temporary variable name to use for storing the precomputed data. + If it does not exist, it will be created. If it does exist, its properties + (such as size, type) are checked (and updated, if possible) to match + its use. :arg precompute_inames: If the specified inames do not already exist, they will be created. If they do already exist, their loop domain is verified @@ -382,8 +393,8 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, import loopy as lp for insn in kernel.instructions: if isinstance(insn, lp.ExpressionInstruction): - invg(insn.assignee, insn.id, insn.tags) - invg(insn.expression, insn.id, insn.tags) + invg(insn.assignee, kernel, insn) + invg(insn.expression, kernel, insn) access_descriptors = invg.access_descriptors if not access_descriptors: @@ -584,8 +595,10 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, # {{{ set up compute insn - target_var_name = var_name_gen(based_on=c_subst_name) - assignee = var(target_var_name) + if temporary_name is None: + temporary_name = var_name_gen(based_on=c_subst_name) + + assignee = var(temporary_name) if non1_storage_axis_names: assignee = assignee.index( @@ -607,13 +620,13 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, rule_mapping_context = SubstitutionRuleMappingContext( kernel.substitutions, kernel.get_var_name_generator()) - from loopy.context_matching import AllStackMatch + from loopy.context_matching import parse_stack_match expr_subst_map = RuleAwareSubstitutionMapper( rule_mapping_context, make_subst_func(storage_axis_subst_dict), - within=AllStackMatch()) + within=parse_stack_match(None)) - compute_expression = expr_subst_map(subst.expression, None, None) + compute_expression = expr_subst_map(subst.expression, kernel, None) # }}} @@ -633,7 +646,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, access_descriptors, abm, storage_axis_names, storage_axis_sources, non1_storage_axis_names, - target_var_name) + temporary_name) kernel = invr.map_kernel(kernel) kernel = kernel.copy( @@ -655,15 +668,69 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, if temporary_is_local is None: temporary_is_local = lp.auto + new_temp_shape = tuple(abm.non1_storage_shape) + new_temporary_variables = kernel.temporary_variables.copy() - temp_var = lp.TemporaryVariable( - name=target_var_name, - dtype=dtype, - base_indices=(0,)*len(abm.non1_storage_shape), - shape=tuple(abm.non1_storage_shape), - is_local=temporary_is_local) - - new_temporary_variables[target_var_name] = temp_var + if temporary_name not in new_temporary_variables: + temp_var = lp.TemporaryVariable( + name=temporary_name, + dtype=dtype, + base_indices=(0,)*len(new_temp_shape), + shape=tuple(abm.non1_storage_shape), + is_local=temporary_is_local) + + else: + temp_var = new_temporary_variables[temporary_name] + + # {{{ check and adapt existing temporary + + if temp_var.dtype is lp.auto: + pass + elif temp_var.dtype is not lp.auto and dtype is lp.auto: + dtype = temp_var.dtype + elif temp_var.dtype is not lp.auto and dtype is not lp.auto: + if temp_var.dtype != dtype: + raise LoopyError("Existing and new dtype of temporary '%s' " + "do not match (existing: %s, new: %s)" + % (temporary_name, temp_var.dtype, dtype)) + + temp_var = temp_var.copy(dtype=dtype) + + if len(temp_var.shape) != len(new_temp_shape): + raise LoopyError("Existing and new temporary '%s' do not " + "have matching number of dimensions " + % (temporary_name, + len(temp_var.shape), len(new_temp_shape))) + + if temp_var.base_indices != (0,) * len(new_temp_shape): + raise LoopyError("Existing and new temporary '%s' do not " + "have matching number of dimensions " + % (temporary_name, + len(temp_var.shape), len(new_temp_shape))) + + new_temp_shape = tuple( + max(i, ex_i) + for i, ex_i in zip(new_temp_shape, temp_var.shape)) + + temp_var = temp_var.copy(shape=new_temp_shape) + + if temporary_is_local == temp_var.is_local: + pass + elif temporary_is_local is lp.auto: + temporary_is_local = temp_var.is_local + elif temp_var.is_local is lp.auto: + pass + else: + raise LoopyError("Existing and new temporary '%s' do not " + "have matching values of 'is_local'" + % (temporary_name, + temp_var.is_local, temporary_is_local)) + + temp_var = temp_var.copy(is_local=temporary_is_local) + + # }}} + + new_temporary_variables[temporary_name] = temp_var kernel = kernel.copy( temporary_variables=new_temporary_variables) diff --git a/loopy/statistics.py b/loopy/statistics.py index efe2f1d0eabb1133dc29d2a2b7592b95f3a260be..2fc5b37fa49acbfe3ff5f1672937369b8b8884a5 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -1,4 +1,4 @@ -from __future__ import division, absolute_import +from __future__ import division, absolute_import, print_function __copyright__ = "Copyright (C) 2015 James Stevens" @@ -73,9 +73,6 @@ class TypeToOpCountMap: except KeyError: return isl.PwQPolynomial('{ 0 }') - def __setitem__(self, index, value): - self.dict[index] = value - def __str__(self): return str(self.dict) diff --git a/loopy/subst.py b/loopy/subst.py index 3b112a4fd44880f6cd66019aff1023ee426ae125..a0a031718962df3053b80058818b2f2a4b88d2c8 100644 --- a/loopy/subst.py +++ b/loopy/subst.py @@ -258,7 +258,10 @@ class TemporaryToSubstChanger(RuleAwareIdentityMapper): my_def_id = self.usage_to_definition[my_insn_id] - if not self.within(expn_state.stack): + if not self.within( + expn_state.kernel, + expn_state.instruction, + expn_state.stack): self.saw_unmatched_usage_sites[my_def_id] = True return None diff --git a/loopy/symbolic.py b/loopy/symbolic.py index bad7f840f648bc72d7f505fb72a200f3df2fb0d6..3311d23165b5f34f16c0c745218fd7da3a0ad2ac 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -398,11 +398,13 @@ def parse_tagged_name(expr): class ExpansionState(Record): """ + .. attribute:: kernel + .. attribute:: instruction + .. attribute:: stack a tuple representing the current expansion stack, as a tuple - of (name, tag) pairs. At the top level, this should be initialized to a - tuple with the id of the calling instruction. + of (name, tag) pairs. .. attribute:: arg_context @@ -411,7 +413,7 @@ class ExpansionState(Record): @property def insn_id(self): - return self.stack[0][0] + return self.instruction.id def apply_arg_context(self, expr): from pymbolic.mapper.substitutor import make_subst_func @@ -625,16 +627,18 @@ class RuleAwareIdentityMapper(IdentityMapper): else: return sym - def __call__(self, expr, insn_id, insn_tags): - if insn_id is not None: - stack = ((insn_id, insn_tags),) - else: - stack = () + def __call__(self, expr, kernel, insn): + from loopy.kernel.data import InstructionBase + assert insn is None or isinstance(insn, InstructionBase) - return IdentityMapper.__call__(self, expr, ExpansionState( - stack=stack, arg_context={})) + return IdentityMapper.__call__(self, expr, + ExpansionState( + kernel=kernel, + instruction=insn, + stack=(), + arg_context={})) - def map_instruction(self, insn): + def map_instruction(self, kernel, insn): return insn def map_kernel(self, kernel): @@ -642,8 +646,8 @@ class RuleAwareIdentityMapper(IdentityMapper): # While subst rules are not allowed in assignees, the mapper # may perform tasks entirely unrelated to subst rules, so # we must map assignees, too. - self.map_instruction( - insn.with_transformed_expressions(self, insn.id, insn.tags)) + self.map_instruction(kernel, + insn.with_transformed_expressions(self, kernel, insn)) for insn in kernel.instructions] return kernel.copy(instructions=new_insns) @@ -658,7 +662,8 @@ class RuleAwareSubstitutionMapper(RuleAwareIdentityMapper): def map_variable(self, expr, expn_state): if (expr.name in expn_state.arg_context - or not self.within(expn_state.stack)): + or not self.within( + expn_state.kernel, expn_state.instruction, expn_state.stack)): return super(RuleAwareSubstitutionMapper, self).map_variable( expr, expn_state) @@ -685,7 +690,7 @@ class RuleAwareSubstitutionRuleExpander(RuleAwareIdentityMapper): new_stack = expn_state.stack + ((name, tags),) - if self.within(new_stack): + if self.within(expn_state.kernel, expn_state.instruction, new_stack): # expand rule = self.rules[name] diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 1dc3aafcfb63789255a7f5fd79b178e657b3c74d..d54640b5d8e277be32f9314db1453a05f9097a08 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -373,17 +373,17 @@ class LoopyCCodeMapper(RecursiveMapper): # This made it through type 'guessing' above, and it # was concluded above (search for COMPLEX_GUESS_LOGIC), # that nothing was lost by using single precision. - cast_type = "cfloat_t" + cast_type = "cfloat" else: if dtype == np.complex128: - cast_type = "cdouble_t" + cast_type = "cdouble" elif dtype == np.complex64: - cast_type = "cfloat_t" + cast_type = "cfloat" else: raise RuntimeError("unsupported complex type in expression " "generation: %s" % type(expr)) - return "(%s) (%s, %s)" % (cast_type, repr(expr.real), repr(expr.imag)) + return "%s_new(%s, %s)" % (cast_type, repr(expr.real), repr(expr.imag)) else: if type_context == "f": return repr(float(expr))+"f" @@ -490,11 +490,20 @@ class LoopyCCodeMapper(RecursiveMapper): if 'c' == self.infer_type(child).kind] real_sum = self.join_rec(" + ", reals, PREC_SUM, type_context) - complex_sum = self.join_rec( - " + ", complexes, PREC_SUM, type_context, tgt_dtype) + + if len(complexes) == 1: + myprec = PREC_SUM + else: + myprec = PREC_NONE + + complex_sum = self.rec(complexes[0], myprec, type_context, tgt_dtype) + for child in complexes[1:]: + complex_sum = "%s_add(%s, %s)" % ( + tgt_name, complex_sum, + self.rec(child, PREC_NONE, type_context, tgt_dtype)) if real_sum: - result = "%s_fromreal(%s) + %s" % (tgt_name, real_sum, complex_sum) + result = "%s_radd(%s, %s)" % (tgt_name, real_sum, complex_sum) else: result = complex_sum @@ -545,8 +554,7 @@ class LoopyCCodeMapper(RecursiveMapper): self.rec(child, PREC_NONE, type_context, tgt_dtype)) if real_prd: - # elementwise semantics are correct - result = "%s*%s" % (real_prd, complex_prd) + result = "%s_rmul(%s, %s)" % (tgt_name, real_prd, complex_prd) else: result = complex_prd @@ -591,9 +599,10 @@ class LoopyCCodeMapper(RecursiveMapper): if not (n_complex or d_complex): return base_impl(expr, enclosing_prec, type_context) elif n_complex and not d_complex: - # elementwise semantics are correct - return base_impl(expr, enclosing_prec, type_context, - num_tgt_dtype=tgt_dtype) + return "%s_divider(%s, %s)" % ( + self.complex_type_name(tgt_dtype), + self.rec(expr.numerator, PREC_NONE, type_context, tgt_dtype), + self.rec(expr.denominator, PREC_NONE, type_context)) elif not n_complex and d_complex: return "%s_rdivide(%s, %s)" % ( self.complex_type_name(tgt_dtype), diff --git a/loopy/version.py b/loopy/version.py index e7d877aa136ef4ad15774388ba9c9cb37b828ed1..71911e9570f3e404c9c2fe64a479f8d97259906d 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -25,4 +25,4 @@ VERSION = (2014, 1) VERSION_STATUS = "" VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS -DATA_MODEL_VERSION = "v6" +DATA_MODEL_VERSION = "v8" diff --git a/test/test_fortran.py b/test/test_fortran.py index d361b15dc3d5f280bd9a4dadd4dbd603d4470284..4117b80a27b243dee1db94b5a0bb2b83b2ec8d49 100644 --- a/test/test_fortran.py +++ b/test/test_fortran.py @@ -267,7 +267,7 @@ def test_tagged(ctx_factory): knl, = lp.parse_fortran(fortran_src) - assert sum(1 for insn in lp.find_instructions(knl, "*$input")) == 2 + assert sum(1 for insn in lp.find_instructions(knl, "tag:input")) == 2 @pytest.mark.parametrize("buffer_inames", [ @@ -408,6 +408,10 @@ def test_fuse_kernels(ctx_factory): assert len(knl.temporary_variables) == 2 + # This is needed for correctness, otherwise ordering could foul things up. + knl = lp.temporary_to_subst(knl, "prev") + knl = lp.temporary_to_subst(knl, "prev_0") + ctx = ctx_factory() lp.auto_test_vs_ref(xyderiv, ctx, knl, parameters=dict(nelements=20, ndofs=4)) diff --git a/test/test_loopy.py b/test/test_loopy.py index 2173347cadaea66a4ec53d78fb8c77abb24cc7bb..1527e4ff710871f4ee7cddde95ad2016d78969a2 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -65,7 +65,7 @@ def test_complicated_subst(ctx_factory): a[i] = h$one(i) * h$two(i) """) - knl = lp.expand_subst(knl, "g$two < h$two") + knl = lp.expand_subst(knl, "... > id:h and tag:two > id:g and tag:two") print(knl) diff --git a/test/test_sem_reagan.py b/test/test_sem_reagan.py index 33d15f88dab634c0420474d91fdd74efbfae4ae2..a00fce1776dd8d0722e59ae544ce439568a66d5d 100644 --- a/test/test_sem_reagan.py +++ b/test/test_sem_reagan.py @@ -39,7 +39,7 @@ def test_tim2d(ctx_factory): n = 8 from pymbolic import var - K_sym = var("K") + K_sym = var("K") # noqa field_shape = (K_sym, n, n) @@ -70,8 +70,8 @@ def test_tim2d(ctx_factory): ], name="semlap2D", assumptions="K>=1") - knl = lp.duplicate_inames(knl, "o", within="ur") - knl = lp.duplicate_inames(knl, "o", within="us") + knl = lp.duplicate_inames(knl, "o", within="id:ur") + knl = lp.duplicate_inames(knl, "o", within="id:us") seq_knl = knl @@ -93,13 +93,13 @@ def test_tim2d(ctx_factory): knl = lp.tag_inames(knl, dict(o="unr")) knl = lp.tag_inames(knl, dict(m="unr")) - knl = lp.set_instruction_priority(knl, "D_fetch", 5) + knl = lp.set_instruction_priority(knl, "id:D_fetch", 5) print(knl) return knl for variant in [variant_orig]: - K = 1000 + K = 1000 # noqa lp.auto_test_vs_ref(seq_knl, ctx, variant(knl), op_count=[K*(n*n*n*2*2 + n*n*2*3 + n**3 * 2*2)/1e9], op_label=["GFlops"], diff --git a/test/test_statistics.py b/test/test_statistics.py index 29cb5f98ed273a178a0dd4cc927b952ec41f8d76..b9ec0a1c48b0e579b1029efeef81aa1877b68b76 100644 --- a/test/test_statistics.py +++ b/test/test_statistics.py @@ -26,7 +26,8 @@ import sys from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) -from loopy.statistics import * # noqa +import loopy as lp +from loopy.statistics import get_op_poly # noqa import numpy as np @@ -131,7 +132,7 @@ def test_op_counter_bitwise(): [ """ c[i, j, k] = (a[i,j,k] | 1) + (b[i,j,k] & 1) - e[i, k] = (g[i,k] ^ k)*~(h[i,k+1]) + (g[i, k] << (h[i,k] >> k)) + e[i, k] = (g[i,k] ^ k)*(~h[i,k+1]) + (g[i, k] << (h[i,k] >> k)) """ ], name="bitwise", assumptions="n,m,l >= 1") @@ -140,6 +141,7 @@ def test_op_counter_bitwise(): knl, dict( a=np.int32, b=np.int32, g=np.int64, h=np.int64)) + poly = get_op_poly(knl) n = 10 @@ -192,3 +194,4 @@ if __name__ == "__main__": else: from py.test.cmdline import main main([__file__]) +