From 344c1145920f5fd9870d9fddf6632b4d3d287068 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 23 Jun 2019 00:51:11 -0500 Subject: [PATCH 1/6] switch to ply parsing in query lang --- loopy/match.py | 236 ++++++++++++++++++++----------------------------- 1 file changed, 95 insertions(+), 141 deletions(-) diff --git a/loopy/match.py b/loopy/match.py index 3c047e463..c322cfb9f 100644 --- a/loopy/match.py +++ b/loopy/match.py @@ -25,13 +25,14 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -from six.moves import range, intern +from six.moves import range +from loopy.diagnostic import LoopyError +import ply.lex as lex +import ply.yacc as yacc NoneType = type(None) -from pytools.lex import RE - __doc__ = """ .. autofunction:: parse_match @@ -59,58 +60,6 @@ def re_from_glob(s): return re.compile("^"+translate(s.strip())+"$") -# {{{ parsing - -# {{{ lexer data - -_and = intern("and") -_or = intern("or") -_not = intern("not") -_openpar = intern("openpar") -_closepar = intern("closepar") - -_id = intern("_id") -_tag = intern("_tag") -_writes = intern("_writes") -_reads = intern("_reads") -_iname = intern("_iname") - -_whitespace = intern("_whitespace") - -# }}} - - -_LEX_TABLE = [ - (_and, RE(r"and\b")), - (_or, RE(r"or\b")), - (_not, RE(r"not\b")), - (_openpar, RE(r"\(")), - (_closepar, RE(r"\)")), - - # TERMINALS - (_id, RE(r"id:([\w?*]+)")), - (_tag, RE(r"tag:([\w?*]+)")), - (_writes, RE(r"writes:([\w?*]+)")), - (_reads, RE(r"reads:([\w?*]+)")), - (_iname, RE(r"iname:([\w?*]+)")), - - (_whitespace, RE("[ \t]+")), - ] - - -_TERMINALS = ([_id, _tag, _writes, _reads, _iname]) - -# {{{ operator precedence - -_PREC_OR = 10 -_PREC_AND = 20 -_PREC_NOT = 30 - -# }}} - -# }}} - - # {{{ match expression class MatchExpressionBase(object): @@ -278,94 +227,99 @@ def parse_match(expr): * ``id:yoink and writes:a_temp`` * ``id:yoink and (not writes:a_temp or tag:input)`` """ - if not expr: - return All() - - def parse_terminal(pstate): - next_tag = pstate.next_tag() - if next_tag is _id: - result = Id(pstate.next_match_obj().group(1)) - pstate.advance() - return result - elif next_tag is _tag: - result = Tagged(pstate.next_match_obj().group(1)) - pstate.advance() - return result - elif next_tag is _writes: - result = Writes(pstate.next_match_obj().group(1)) - pstate.advance() - return result - elif next_tag is _reads: - result = Reads(pstate.next_match_obj().group(1)) - pstate.advance() - return result - elif next_tag is _iname: - result = Iname(pstate.next_match_obj().group(1)) - pstate.advance() - return result + + # None -> '' + expr = expr if expr else '' + + _RESERVED_TO_TYPES = { + 'id': Id, + 'iname': Iname, + 'reads': Reads, + 'writes': Writes, + 'tag': Tagged + } + + _BINARY_OPS_TO_TYPES = { + 'and': And, + 'or': Or + } + + reserved = { + 'id': 'ID', + 'iname': 'INAME', + 'reads': 'READS', + 'writes': 'WRITES', + 'tag': 'TAG', + + 'not': 'NOT', + 'or': 'OR', + 'and': 'AND'} + + tokens = ('NAME', 'COLON', 'LPAREN', 'RPAREN',) + tuple(reserved.values()) # noqa + + precedence = ( # noqa + ('left', 'OR'), + ('left', 'AND'), + ('left', 'NOT'),) + + def t_NAME(t): + r'[a-zA-Z_*][a-zA-Z0-9_*.]*' + t.type = reserved.get(t.value, 'NAME') + return t + + t_COLON = r':' # noqa + t_LPAREN = r'\(' # noqa + t_RPAREN = r'\)' # noqa + t_ignore = ' \t' # noqa + + def t_error(t): + raise RuntimeError("Illegal character '%s'." % t.value[0]) + + def p_expr_of_binary_ops(p): + '''expression : expression AND expression + | expression OR expression''' + children = [] + if type(p[1]) == _BINARY_OPS_TO_TYPES[p[2]]: + children.extend(p[1].children) else: - pstate.expected("terminal") - - def inner_parse(pstate, min_precedence=0): - pstate.expect_not_end() - - if pstate.is_next(_not): - pstate.advance() - left_query = Not(inner_parse(pstate, _PREC_NOT)) - elif pstate.is_next(_openpar): - pstate.advance() - left_query = inner_parse(pstate) - pstate.expect(_closepar) - pstate.advance() + children.append(p[1]) + if type(p[3]) == _BINARY_OPS_TO_TYPES[p[2]]: + children.extend(p[3].children) else: - left_query = parse_terminal(pstate) - - did_something = True - while did_something: - did_something = False - if pstate.is_at_end(): - return left_query - - next_tag = pstate.next_tag() - - if next_tag is _and and _PREC_AND > min_precedence: - pstate.advance() - left_query = And( - (left_query, inner_parse(pstate, _PREC_AND))) - did_something = True - elif next_tag is _or and _PREC_OR > min_precedence: - pstate.advance() - left_query = Or( - (left_query, inner_parse(pstate, _PREC_OR))) - did_something = True - - return left_query - - if isinstance(expr, MatchExpressionBase): - return expr - - from pytools.lex import LexIterator, lex, InvalidTokenError - try: - pstate = LexIterator( - [(tag, s, idx, matchobj) - for (tag, s, idx, matchobj) in lex(_LEX_TABLE, expr, - match_objects=True) - if tag is not _whitespace], expr) - except InvalidTokenError as e: - from loopy.diagnostic import LoopyError - raise LoopyError( - "invalid match expression: '{match_expr}' ({err_type}: {err_str})" - .format( - match_expr=expr, - err_type=type(e).__name__, - err_str=str(e))) - - if pstate.is_at_end(): - pstate.raise_parse_error("unexpected end of input") - - result = inner_parse(pstate) - if not pstate.is_at_end(): - pstate.raise_parse_error("leftover input after completed parse") + children.append(p[3]) + p[0] = _BINARY_OPS_TO_TYPES[p[2]](tuple(children)) + + def p_expr_of_unary_ops(p): + 'expression : NOT expression' + p[0] = Not(p[2]) + + def p_expr_of_parens(p): + 'expression : LPAREN expression RPAREN' + p[0] = p[2] + + def p_expr_as_term(p): + 'expression : term' + p[0] = p[1] + + def p_terminal(p): + '''term : ID COLON NAME + | INAME COLON NAME + | READS COLON NAME + | WRITES COLON NAME + | TAG COLON NAME''' + p[0] = _RESERVED_TO_TYPES[p[1]](p[3]) + + def p_empty_terminal(p): + 'term :' + p[0] = All() + + def p_error(p): + raise LoopyError("Syntax error at '%s'." % str(p)) + + lexer = lex.lex() # noqa + + parser = yacc.yacc() + result = parser.parse(expr) return result -- GitLab From 0852ce8acbd3dcd9cd9cb11d69efdba3e2437df9 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 23 Jun 2019 02:42:59 -0500 Subject: [PATCH 2/6] dont parse expressions which are already MatchExpressionBase --- loopy/match.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/loopy/match.py b/loopy/match.py index c322cfb9f..6f395d058 100644 --- a/loopy/match.py +++ b/loopy/match.py @@ -231,6 +231,9 @@ def parse_match(expr): # None -> '' expr = expr if expr else '' + if isinstance(expr, MatchExpressionBase): + return expr + _RESERVED_TO_TYPES = { 'id': Id, 'iname': Iname, -- GitLab From cbcc220c3a7bdbfbdbac90e7254a419dcb614566 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 23 Jun 2019 02:51:29 -0500 Subject: [PATCH 3/6] removes duplication in property matching --- loopy/match.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/loopy/match.py b/loopy/match.py index 6f395d058..9a07f5cb6 100644 --- a/loopy/match.py +++ b/loopy/match.py @@ -305,13 +305,17 @@ def parse_match(expr): p[0] = p[1] def p_terminal(p): - '''term : ID COLON NAME - | INAME COLON NAME - | READS COLON NAME - | WRITES COLON NAME - | TAG COLON NAME''' + 'term : prop COLON NAME' p[0] = _RESERVED_TO_TYPES[p[1]](p[3]) + def p_prop(p): + '''prop : ID + | INAME + | TAG + | READS + | WRITES''' + p[0] = p[1] + def p_empty_terminal(p): 'term :' p[0] = All() -- GitLab From ec75a04e19e63651a576f8f0d287e939683b9f85 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 24 Jun 2019 14:07:39 -0500 Subject: [PATCH 4/6] stops PLY from logging info to stderr --- loopy/match.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/match.py b/loopy/match.py index 9a07f5cb6..7a627bd3f 100644 --- a/loopy/match.py +++ b/loopy/match.py @@ -325,7 +325,7 @@ def parse_match(expr): lexer = lex.lex() # noqa - parser = yacc.yacc() + parser = yacc.yacc(debug=False) result = parser.parse(expr) return result -- GitLab From 803515b78580738fb9596c9e5018dbd05a36596f Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 4 Jul 2019 09:30:57 -0500 Subject: [PATCH 5/6] changes according to review: adds tests; details noqa's --- loopy/match.py | 21 ++++++++++++--------- test/test_loopy.py | 12 ++++++++++++ 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/loopy/match.py b/loopy/match.py index 7a627bd3f..6bb260a3c 100644 --- a/loopy/match.py +++ b/loopy/match.py @@ -228,12 +228,14 @@ def parse_match(expr): * ``id:yoink and (not writes:a_temp or tag:input)`` """ - # None -> '' - expr = expr if expr else '' + if expr is None: + return All() if isinstance(expr, MatchExpressionBase): return expr + assert isinstance(expr, str) + _RESERVED_TO_TYPES = { 'id': Id, 'iname': Iname, @@ -258,9 +260,10 @@ def parse_match(expr): 'or': 'OR', 'and': 'AND'} - tokens = ('NAME', 'COLON', 'LPAREN', 'RPAREN',) + tuple(reserved.values()) # noqa + tokens = ('NAME', 'COLON', 'LPAREN', 'RPAREN', # noqa: F841 + ) + tuple(reserved.values()) - precedence = ( # noqa + precedence = ( # noqa: F841 ('left', 'OR'), ('left', 'AND'), ('left', 'NOT'),) @@ -270,10 +273,10 @@ def parse_match(expr): t.type = reserved.get(t.value, 'NAME') return t - t_COLON = r':' # noqa - t_LPAREN = r'\(' # noqa - t_RPAREN = r'\)' # noqa - t_ignore = ' \t' # noqa + t_COLON = r':' # noqa: F841 + t_LPAREN = r'\(' # noqa: F841 + t_RPAREN = r'\)' # noqa: F841 + t_ignore = ' \t' # noqa: F841 def t_error(t): raise RuntimeError("Illegal character '%s'." % t.value[0]) @@ -323,7 +326,7 @@ def parse_match(expr): def p_error(p): raise LoopyError("Syntax error at '%s'." % str(p)) - lexer = lex.lex() # noqa + lexer = lex.lex() # noqa: F841 parser = yacc.yacc(debug=False) result = parser.parse(expr) diff --git a/test/test_loopy.py b/test/test_loopy.py index 89b4f5e63..ef445fa5e 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2986,6 +2986,18 @@ def test_shape_mismatch_check(ctx_factory): prg(queue, a=a, b=b) +def test_query_lang_parse(): + from loopy.match import (parse_match, Id, And, Writes, Reads, Not, Iname, + Tagged, Or) + expr1 = 'id:yoink and writes:a_temp and reads:b_temp' + expr2 = 'id:yoink and (not iname:i* or tag:input)' + + assert parse_match(expr1) == And( + (Id('yoink'), Writes('a_temp'), Reads('b_temp'))) + assert parse_match(expr2) == And(( + Id('yoink'), Or((Not(Iname('i*')), Tagged('input'))))) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From eb32deadd581324378e352ea14d821ca5eca3e6d Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 4 Jul 2019 10:42:12 -0500 Subject: [PATCH 6/6] flake8: ignore N806 --- loopy/match.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/loopy/match.py b/loopy/match.py index 6bb260a3c..8ab9fd743 100644 --- a/loopy/match.py +++ b/loopy/match.py @@ -273,9 +273,9 @@ def parse_match(expr): t.type = reserved.get(t.value, 'NAME') return t - t_COLON = r':' # noqa: F841 - t_LPAREN = r'\(' # noqa: F841 - t_RPAREN = r'\)' # noqa: F841 + t_COLON = r':' # noqa: F841,N806 + t_LPAREN = r'\(' # noqa: F841,N806 + t_RPAREN = r'\)' # noqa: F841,N806 t_ignore = ' \t' # noqa: F841 def t_error(t): -- GitLab