From 344c1145920f5fd9870d9fddf6632b4d3d287068 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Sun, 23 Jun 2019 00:51:11 -0500
Subject: [PATCH 1/6] switch to ply parsing in query lang

---
 loopy/match.py | 236 ++++++++++++++++++++-----------------------------
 1 file changed, 95 insertions(+), 141 deletions(-)

diff --git a/loopy/match.py b/loopy/match.py
index 3c047e463..c322cfb9f 100644
--- a/loopy/match.py
+++ b/loopy/match.py
@@ -25,13 +25,14 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
-from six.moves import range, intern
+from six.moves import range
+from loopy.diagnostic import LoopyError
+import ply.lex as lex
+import ply.yacc as yacc
 
 
 NoneType = type(None)
 
-from pytools.lex import RE
-
 __doc__ = """
 .. autofunction:: parse_match
 
@@ -59,58 +60,6 @@ def re_from_glob(s):
     return re.compile("^"+translate(s.strip())+"$")
 
 
-# {{{ parsing
-
-# {{{ lexer data
-
-_and = intern("and")
-_or = intern("or")
-_not = intern("not")
-_openpar = intern("openpar")
-_closepar = intern("closepar")
-
-_id = intern("_id")
-_tag = intern("_tag")
-_writes = intern("_writes")
-_reads = intern("_reads")
-_iname = intern("_iname")
-
-_whitespace = intern("_whitespace")
-
-# }}}
-
-
-_LEX_TABLE = [
-    (_and, RE(r"and\b")),
-    (_or, RE(r"or\b")),
-    (_not, RE(r"not\b")),
-    (_openpar, RE(r"\(")),
-    (_closepar, RE(r"\)")),
-
-    # TERMINALS
-    (_id, RE(r"id:([\w?*]+)")),
-    (_tag, RE(r"tag:([\w?*]+)")),
-    (_writes, RE(r"writes:([\w?*]+)")),
-    (_reads, RE(r"reads:([\w?*]+)")),
-    (_iname, RE(r"iname:([\w?*]+)")),
-
-    (_whitespace, RE("[ \t]+")),
-    ]
-
-
-_TERMINALS = ([_id, _tag, _writes, _reads, _iname])
-
-# {{{ operator precedence
-
-_PREC_OR = 10
-_PREC_AND = 20
-_PREC_NOT = 30
-
-# }}}
-
-# }}}
-
-
 # {{{ match expression
 
 class MatchExpressionBase(object):
@@ -278,94 +227,99 @@ def parse_match(expr):
     * ``id:yoink and writes:a_temp``
     * ``id:yoink and (not writes:a_temp or tag:input)``
     """
-    if not expr:
-        return All()
-
-    def parse_terminal(pstate):
-        next_tag = pstate.next_tag()
-        if next_tag is _id:
-            result = Id(pstate.next_match_obj().group(1))
-            pstate.advance()
-            return result
-        elif next_tag is _tag:
-            result = Tagged(pstate.next_match_obj().group(1))
-            pstate.advance()
-            return result
-        elif next_tag is _writes:
-            result = Writes(pstate.next_match_obj().group(1))
-            pstate.advance()
-            return result
-        elif next_tag is _reads:
-            result = Reads(pstate.next_match_obj().group(1))
-            pstate.advance()
-            return result
-        elif next_tag is _iname:
-            result = Iname(pstate.next_match_obj().group(1))
-            pstate.advance()
-            return result
+
+    # None -> ''
+    expr = expr if expr else ''
+
+    _RESERVED_TO_TYPES = {
+            'id': Id,
+            'iname': Iname,
+            'reads': Reads,
+            'writes': Writes,
+            'tag': Tagged
+            }
+
+    _BINARY_OPS_TO_TYPES = {
+            'and': And,
+            'or': Or
+            }
+
+    reserved = {
+            'id':     'ID',
+            'iname':  'INAME',
+            'reads':  'READS',
+            'writes': 'WRITES',
+            'tag':    'TAG',
+
+            'not': 'NOT',
+            'or': 'OR',
+            'and': 'AND'}
+
+    tokens = ('NAME', 'COLON', 'LPAREN', 'RPAREN',) + tuple(reserved.values())  # noqa
+
+    precedence = ( # noqa
+            ('left', 'OR'),
+            ('left', 'AND'),
+            ('left', 'NOT'),)
+
+    def t_NAME(t):
+        r'[a-zA-Z_*][a-zA-Z0-9_*.]*'
+        t.type = reserved.get(t.value, 'NAME')
+        return t
+
+    t_COLON = r':'  # noqa
+    t_LPAREN = r'\('  # noqa
+    t_RPAREN = r'\)'  # noqa
+    t_ignore = ' \t'  # noqa
+
+    def t_error(t):
+        raise RuntimeError("Illegal character '%s'." % t.value[0])
+
+    def p_expr_of_binary_ops(p):
+        '''expression : expression AND expression
+                      | expression OR expression'''
+        children = []
+        if type(p[1]) == _BINARY_OPS_TO_TYPES[p[2]]:
+            children.extend(p[1].children)
         else:
-            pstate.expected("terminal")
-
-    def inner_parse(pstate, min_precedence=0):
-        pstate.expect_not_end()
-
-        if pstate.is_next(_not):
-            pstate.advance()
-            left_query = Not(inner_parse(pstate, _PREC_NOT))
-        elif pstate.is_next(_openpar):
-            pstate.advance()
-            left_query = inner_parse(pstate)
-            pstate.expect(_closepar)
-            pstate.advance()
+            children.append(p[1])
+        if type(p[3]) == _BINARY_OPS_TO_TYPES[p[2]]:
+            children.extend(p[3].children)
         else:
-            left_query = parse_terminal(pstate)
-
-        did_something = True
-        while did_something:
-            did_something = False
-            if pstate.is_at_end():
-                return left_query
-
-            next_tag = pstate.next_tag()
-
-            if next_tag is _and and _PREC_AND > min_precedence:
-                pstate.advance()
-                left_query = And(
-                        (left_query, inner_parse(pstate, _PREC_AND)))
-                did_something = True
-            elif next_tag is _or and _PREC_OR > min_precedence:
-                pstate.advance()
-                left_query = Or(
-                        (left_query, inner_parse(pstate, _PREC_OR)))
-                did_something = True
-
-        return left_query
-
-    if isinstance(expr, MatchExpressionBase):
-        return expr
-
-    from pytools.lex import LexIterator, lex, InvalidTokenError
-    try:
-        pstate = LexIterator(
-            [(tag, s, idx, matchobj)
-             for (tag, s, idx, matchobj) in lex(_LEX_TABLE, expr,
-                 match_objects=True)
-             if tag is not _whitespace], expr)
-    except InvalidTokenError as e:
-        from loopy.diagnostic import LoopyError
-        raise LoopyError(
-                "invalid match expression: '{match_expr}' ({err_type}: {err_str})"
-                .format(
-                    match_expr=expr,
-                    err_type=type(e).__name__,
-                    err_str=str(e)))
-
-    if pstate.is_at_end():
-        pstate.raise_parse_error("unexpected end of input")
-
-    result = inner_parse(pstate)
-    if not pstate.is_at_end():
-        pstate.raise_parse_error("leftover input after completed parse")
+            children.append(p[3])
+        p[0] = _BINARY_OPS_TO_TYPES[p[2]](tuple(children))
+
+    def p_expr_of_unary_ops(p):
+        'expression : NOT expression'
+        p[0] = Not(p[2])
+
+    def p_expr_of_parens(p):
+        'expression : LPAREN expression RPAREN'
+        p[0] = p[2]
+
+    def p_expr_as_term(p):
+        'expression : term'
+        p[0] = p[1]
+
+    def p_terminal(p):
+        '''term : ID COLON NAME
+                | INAME COLON NAME
+                | READS COLON NAME
+                | WRITES COLON NAME
+                | TAG COLON NAME'''
+        p[0] = _RESERVED_TO_TYPES[p[1]](p[3])
+
+    def p_empty_terminal(p):
+        'term :'
+        p[0] = All()
+
+    def p_error(p):
+        raise LoopyError("Syntax error at '%s'." % str(p))
+
+    lexer = lex.lex()  # noqa
+
+    parser = yacc.yacc()
+    result = parser.parse(expr)
 
     return result
 
-- 
GitLab


From 0852ce8acbd3dcd9cd9cb11d69efdba3e2437df9 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Sun, 23 Jun 2019 02:42:59 -0500
Subject: [PATCH 2/6] dont parse expressions which are already
 MatchExpressionBase

---
 loopy/match.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/loopy/match.py b/loopy/match.py
index c322cfb9f..6f395d058 100644
--- a/loopy/match.py
+++ b/loopy/match.py
@@ -231,6 +231,9 @@ def parse_match(expr):
     # None -> ''
     expr = expr if expr else ''
 
+    if isinstance(expr, MatchExpressionBase):
+        return expr
+
     _RESERVED_TO_TYPES = {
             'id': Id,
             'iname': Iname,
-- 
GitLab


From cbcc220c3a7bdbfbdbac90e7254a419dcb614566 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Sun, 23 Jun 2019 02:51:29 -0500
Subject: [PATCH 3/6] removes duplication in property matching

---
 loopy/match.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/loopy/match.py b/loopy/match.py
index 6f395d058..9a07f5cb6 100644
--- a/loopy/match.py
+++ b/loopy/match.py
@@ -305,13 +305,17 @@ def parse_match(expr):
         p[0] = p[1]
 
     def p_terminal(p):
-        '''term : ID COLON NAME
-                | INAME COLON NAME
-                | READS COLON NAME
-                | WRITES COLON NAME
-                | TAG COLON NAME'''
+        'term : prop COLON NAME'
         p[0] = _RESERVED_TO_TYPES[p[1]](p[3])
 
+    def p_prop(p):
+        '''prop : ID
+                | INAME
+                | TAG
+                | READS
+                | WRITES'''
+        p[0] = p[1]
+
     def p_empty_terminal(p):
         'term :'
         p[0] = All()
-- 
GitLab


From ec75a04e19e63651a576f8f0d287e939683b9f85 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Mon, 24 Jun 2019 14:07:39 -0500
Subject: [PATCH 4/6] stops PLY from logging info to stderr

---
 loopy/match.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loopy/match.py b/loopy/match.py
index 9a07f5cb6..7a627bd3f 100644
--- a/loopy/match.py
+++ b/loopy/match.py
@@ -325,7 +325,7 @@ def parse_match(expr):
 
     lexer = lex.lex()  # noqa
 
-    parser = yacc.yacc()
+    parser = yacc.yacc(debug=False)
     result = parser.parse(expr)
 
     return result
-- 
GitLab


From 803515b78580738fb9596c9e5018dbd05a36596f Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Thu, 4 Jul 2019 09:30:57 -0500
Subject: [PATCH 5/6] changes according to review: adds tests; details noqa's

---
 loopy/match.py     | 21 ++++++++++++---------
 test/test_loopy.py | 12 ++++++++++++
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/loopy/match.py b/loopy/match.py
index 7a627bd3f..6bb260a3c 100644
--- a/loopy/match.py
+++ b/loopy/match.py
@@ -228,12 +228,14 @@ def parse_match(expr):
     * ``id:yoink and (not writes:a_temp or tag:input)``
     """
 
-    # None -> ''
-    expr = expr if expr else ''
+    if expr is None:
+        return All()
 
     if isinstance(expr, MatchExpressionBase):
         return expr
 
+    assert isinstance(expr, str)
+
     _RESERVED_TO_TYPES = {
             'id': Id,
             'iname': Iname,
@@ -258,9 +260,10 @@ def parse_match(expr):
             'or': 'OR',
             'and': 'AND'}
 
-    tokens = ('NAME', 'COLON', 'LPAREN', 'RPAREN',) + tuple(reserved.values())  # noqa
+    tokens = ('NAME', 'COLON', 'LPAREN', 'RPAREN',  # noqa: F841
+            ) + tuple(reserved.values())
 
-    precedence = ( # noqa
+    precedence = (  # noqa: F841
             ('left', 'OR'),
             ('left', 'AND'),
             ('left', 'NOT'),)
@@ -270,10 +273,10 @@ def parse_match(expr):
         t.type = reserved.get(t.value, 'NAME')
         return t
 
-    t_COLON = r':'  # noqa
-    t_LPAREN = r'\('  # noqa
-    t_RPAREN = r'\)'  # noqa
-    t_ignore = ' \t'  # noqa
+    t_COLON = r':'  # noqa: F841
+    t_LPAREN = r'\('  # noqa: F841
+    t_RPAREN = r'\)'  # noqa: F841
+    t_ignore = ' \t'  # noqa: F841
 
     def t_error(t):
         raise RuntimeError("Illegal character '%s'." % t.value[0])
@@ -323,7 +326,7 @@ def parse_match(expr):
     def p_error(p):
         raise LoopyError("Syntax error at '%s'." % str(p))
 
-    lexer = lex.lex()  # noqa
+    lexer = lex.lex()  # noqa: F841
 
     parser = yacc.yacc(debug=False)
     result = parser.parse(expr)
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 89b4f5e63..ef445fa5e 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -2986,6 +2986,18 @@ def test_shape_mismatch_check(ctx_factory):
         prg(queue, a=a, b=b)
 
 
+def test_query_lang_parse():
+    from loopy.match import (parse_match, Id, And, Writes, Reads, Not, Iname,
+            Tagged, Or)
+    expr1 = 'id:yoink and writes:a_temp and reads:b_temp'
+    expr2 = 'id:yoink and (not iname:i* or tag:input)'
+
+    assert parse_match(expr1) == And(
+            (Id('yoink'),  Writes('a_temp'), Reads('b_temp')))
+    assert parse_match(expr2) == And((
+            Id('yoink'), Or((Not(Iname('i*')), Tagged('input')))))
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
-- 
GitLab


From eb32deadd581324378e352ea14d821ca5eca3e6d Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Thu, 4 Jul 2019 10:42:12 -0500
Subject: [PATCH 6/6] flake8: ignore N806

---
 loopy/match.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/loopy/match.py b/loopy/match.py
index 6bb260a3c..8ab9fd743 100644
--- a/loopy/match.py
+++ b/loopy/match.py
@@ -273,9 +273,9 @@ def parse_match(expr):
         t.type = reserved.get(t.value, 'NAME')
         return t
 
-    t_COLON = r':'  # noqa: F841
-    t_LPAREN = r'\('  # noqa: F841
-    t_RPAREN = r'\)'  # noqa: F841
+    t_COLON = r':'  # noqa: F841,N806
+    t_LPAREN = r'\('  # noqa: F841,N806
+    t_RPAREN = r'\)'  # noqa: F841,N806
     t_ignore = ' \t'  # noqa: F841
 
     def t_error(t):
-- 
GitLab