From 16cc9c8784888f0480e2c76bcac4f5259f530421 Mon Sep 17 00:00:00 2001 From: Phil Elson Date: Tue, 29 Jan 2019 13:54:30 +0000 Subject: [PATCH] Added the UDUNITS2 grammar and a graph representation of a unit (#140) * Added the UDUNITS2 grammar, a graph representation of a unit and an extensive test suite. * First tranche of review actions. * Second tranche of review actions. * Tidy up the parse rules, particularly timestamp and shift. * Remove operand from binary graph binary ops. * Fix lint. * Move the tokens definition as suggested in review. --- cf_units/_udunits2_parser/__init__.py | 221 ++++- cf_units/_udunits2_parser/compile.py | 1 + cf_units/_udunits2_parser/graph.py | 111 +++ .../_udunits2_parser/parser/udunits2Lexer.py | 361 ++++++- .../_udunits2_parser/parser/udunits2Parser.py | 933 +++++++++++++++++- .../parser/udunits2ParserListener.py | 19 - .../parser/udunits2ParserVisitor.py | 63 ++ .../_udunits2_parser/udunits2Lexer.g4.jinja | 155 ++- cf_units/_udunits2_parser/udunits2Parser.g4 | 71 +- cf_units/conftest.py | 8 +- .../tests/integration/parse/test_graph.py | 72 ++ .../tests/integration/parse/test_parse.py | 327 ++++++ 12 files changed, 2285 insertions(+), 57 deletions(-) create mode 100644 cf_units/_udunits2_parser/graph.py delete mode 100644 cf_units/_udunits2_parser/parser/udunits2ParserListener.py create mode 100644 cf_units/_udunits2_parser/parser/udunits2ParserVisitor.py create mode 100644 cf_units/tests/integration/parse/test_graph.py create mode 100644 cf_units/tests/integration/parse/test_parse.py diff --git a/cf_units/_udunits2_parser/__init__.py b/cf_units/_udunits2_parser/__init__.py index 864213ac..ef871dfa 100644 --- a/cf_units/_udunits2_parser/__init__.py +++ b/cf_units/_udunits2_parser/__init__.py @@ -15,10 +15,221 @@ # You should have received a copy of the GNU Lesser General Public License # along with cf-units. If not, see . +import unicodedata -def py_3_only(): - # This function is only syntactically valid in Python 3. - # We want PY2 users to see this syntax error, rather than a - # significantly more complex SyntaxError in the generated parser. +from antlr4 import InputStream, CommonTokenStream +from antlr4.error.ErrorListener import ErrorListener - return f'is_py_3!' +from .parser.udunits2Lexer import udunits2Lexer +from .parser.udunits2Parser import udunits2Parser +from .parser.udunits2ParserVisitor import udunits2ParserVisitor +from . import graph + + +# Dictionary mapping token rule id to token name. +TOKEN_ID_NAMES = {getattr(udunits2Lexer, rule, None): rule + for rule in udunits2Lexer.ruleNames} + + +def handle_UNICODE_EXPONENT(string): + # Convert unicode to compatibility form, replacing unicode minus with + # ascii minus (which is actually a less good version + # of unicode minus). + normd = unicodedata.normalize('NFKC', string).replace('−', '-') + return int(normd) + + +class UnitParseVisitor(udunits2ParserVisitor): + """ + A visitor which converts the parse tree into an abstract expression graph. + + """ + #: A dictionary mapping lexer TOKEN names to the action that should be + #: taken on them when visited. For full context of what is allowed, see + #: visitTerminal. + TERM_HANDLERS = { + 'CLOSE_PAREN': None, + 'DATE': str, + 'DIVIDE': graph.Operand('/'), # Drop context, such as " per ". + 'E_POWER': str, + 'FLOAT': graph.Number, # Preserve precision as str. + 'HOUR_MINUTE_SECOND': str, + 'HOUR_MINUTE': str, + 'ID': graph.Identifier, + 'INT': lambda c: graph.Number(int(c)), + 'MULTIPLY': graph.Operand('*'), + 'OPEN_PAREN': None, + 'PERIOD': str, + 'RAISE': graph.Operand, + 'TIMESTAMP': graph.Timestamp, + 'SIGNED_INT': lambda c: graph.Number(int(c)), + 'SHIFT_OP': None, + 'WS': None, + 'UNICODE_EXPONENT': handle_UNICODE_EXPONENT, + } + + def defaultResult(self): + # Called once per ``visitChildren`` call. + return [] + + def aggregateResult(self, aggregate, nextResult): + # Always result a list from visitChildren + # (default behaviour is to return the last element). + if nextResult is not None: + aggregate.append(nextResult) + return aggregate + + def visitChildren(self, node): + # If there is only a single item in the visitChildren's list, + # return the item. The list itself has no semantics. + result = super().visitChildren(node) + while isinstance(result, list) and len(result) == 1: + result = result[0] + return result + + def visitTerminal(self, ctx): + """ + Return a graph.Node, or None, to represent the given lexer terminal. + + """ + content = ctx.getText() + + symbol_idx = ctx.symbol.type + if symbol_idx == -1: + # EOF, and all unmatched characters (which will have + # already raised a SyntaxError). + result = None + else: + name = TOKEN_ID_NAMES[symbol_idx] + handler = self.TERM_HANDLERS[name] + + if callable(handler): + result = handler(content) + else: + result = handler + + if result is not None and not isinstance(result, graph.Node): + result = graph.Terminal(result) + return result + + def visitProduct(self, ctx): + # UDUNITS grammar makes no parse distinction for Product + # types ('/' and '*'), so we have to do the grunt work here. + nodes = self.visitChildren(ctx) + + op_type = graph.Multiply + + if isinstance(nodes, list): + last = nodes[-1] + + # Walk the nodes backwards applying the appropriate binary + # operation to each node successively. + # e.g. 1*2/3*4*5 = 1*(2/(3*(4*5))) + for node in nodes[:-1][::-1]: + if isinstance(node, graph.Operand): + if node.content == '/': + op_type = graph.Divide + else: + op_type = graph.Multiply + else: + last = op_type(node, last) + node = last + else: + node = nodes + return node + + def visitTimestamp(self, ctx): + # For now, we simply amalgamate timestamps into a single Terminal. + # More work is needed to turn this into a good date/time/timezone + # representation. + return graph.Terminal(ctx.getText()) + + def visitPower(self, ctx): + node = self.visitChildren(ctx) + if isinstance(node, list): + if len(node) == 3: + # node[1] is the operator, so ignore it. + node = graph.Raise(node[0], node[2]) + else: + node = graph.Raise(*node) + return node + + def visitShift_spec(self, ctx): + nodes = self.visitChildren(ctx) + if isinstance(nodes, list): + nodes = graph.Shift(*nodes) + return nodes + + def visitUnit_spec(self, ctx): + node = self.visitChildren(ctx) + if not node: + node = graph.Terminal('') + return node + + +class SyntaxErrorRaiser(ErrorListener): + """ + Turn any parse errors into sensible SyntaxErrors. + + """ + def __init__(self, unit_string): + self.unit_string = unit_string + super(ErrorListener, self).__init__() + + def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e): + # https://stackoverflow.com/a/36367357/741316 + context = ("inline", line, column+2, "'{}'".format(self.unit_string)) + syntax_error = SyntaxError(msg, context) + raise syntax_error from None + + +def _debug_tokens(unit_string): + """ + A really handy way of printing the tokens produced for a given input. + + """ + unit_str = unit_string.strip() + lexer = udunits2Lexer(InputStream(unit_str)) + stream = CommonTokenStream(lexer) + parser = udunits2Parser(stream) + + # Actually do the parsing so that we can go through the identified tokens. + parser.unit_spec() + + for token in stream.tokens: + if token.text == '': + continue + token_type_idx = token.type + rule = TOKEN_ID_NAMES[token_type_idx] + print("%s: %s" % (token.text, rule)) + + +def normalize(unit_string): + """ + Parse the given unit string, and return its string representation. + + No standardisation of units, nor simplification of expressions is done, + but some tokens and operators will be converted to their canonical form. + + """ + return str(parse(unit_string)) + + +def parse(unit_str): + # The udunits2 definition (C code) says to strip the unit string + # first. + unit_str = unit_str.strip() + lexer = udunits2Lexer(InputStream(unit_str)) + stream = CommonTokenStream(lexer) + parser = udunits2Parser(stream) + + # Raise a SyntaxError if we encounter an issue when parsing. + parser.removeErrorListeners() + parser.addErrorListener(SyntaxErrorRaiser(unit_str)) + + # Get the top level concept. + tree = parser.unit_spec() + + visitor = UnitParseVisitor() + # Return the graph representation. + return visitor.visit(tree) diff --git a/cf_units/_udunits2_parser/compile.py b/cf_units/_udunits2_parser/compile.py index 12ab7bc5..9e73ee1f 100644 --- a/cf_units/_udunits2_parser/compile.py +++ b/cf_units/_udunits2_parser/compile.py @@ -94,6 +94,7 @@ def main(): print('Compiling parser...') subprocess.run( ['java', '-jar', str(JAR), '-Dlanguage=Python3', + '-no-listener', '-visitor', str(PARSER), '-o', 'parser'], check=True) diff --git a/cf_units/_udunits2_parser/graph.py b/cf_units/_udunits2_parser/graph.py new file mode 100644 index 00000000..9be3c11c --- /dev/null +++ b/cf_units/_udunits2_parser/graph.py @@ -0,0 +1,111 @@ +# (C) British Crown Copyright 2019, Met Office +# +# This file is part of cf-units. +# +# cf-units is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# cf-units is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with cf-units. If not, see . + + +class Node: + """ + Represents a node in an expression graph. + + """ + def __init__(self, **kwargs): + self._attrs = kwargs + + def children(self): + """ + Return the children of this node. + + """ + # Since this is py>=36, the order of the attributes is well defined. + return list(self._attrs.values()) + + def __getattr__(self, name): + # Allow the dictionary to raise KeyError if the key doesn't exist. + return self._attrs[name] + + def _repr_ctx(self): + # Return a dictionary that is useful for passing to string.format. + kwargs = ', '.join( + '{}={!r}'.format(key, value) + for key, value in self._attrs.items()) + return dict(cls_name=self.__class__.__name__, kwargs=kwargs) + + def __repr__(self): + return '{cls_name}({kwargs})'.format(**self._repr_ctx()) + + +class Terminal(Node): + """ + A generic terminal node in an expression graph. + + """ + def __init__(self, content): + super().__init__(content=content) + + def children(self): + return [] + + def __str__(self): + return '{}'.format(self.content) + + +class Operand(Terminal): + pass + + +class Number(Terminal): + pass + + +class Identifier(Terminal): + """The unit itself (e.g. meters, m, km and π)""" + pass + + +class BinaryOp(Node): + def __init__(self, lhs, rhs): + super().__init__(lhs=lhs, rhs=rhs) + + +class Raise(BinaryOp): + def __str__(self): + return f'{self.lhs}^{self.rhs}' + + +class Multiply(BinaryOp): + def __str__(self): + return f'{self.lhs}·{self.rhs}' + + +class Divide(BinaryOp): + def __str__(self): + return f'{self.lhs}/{self.rhs}' + + +class Shift(Node): + def __init__(self, unit, shift_from): + # The product unit to be shifted. + super().__init__(unit=unit, shift_from=shift_from) + + def __str__(self): + return f'({self.unit} @ {self.shift_from})' + + +class Timestamp(Terminal): + # Currently we do not try to interpret the timestamp. + # This is likely to change in the future, but there are some + # gnarly test cases, and should not be undertaken lightly. + pass diff --git a/cf_units/_udunits2_parser/parser/udunits2Lexer.py b/cf_units/_udunits2_parser/parser/udunits2Lexer.py index 5c9f1ebf..ce86bd13 100644 --- a/cf_units/_udunits2_parser/parser/udunits2Lexer.py +++ b/cf_units/_udunits2_parser/parser/udunits2Lexer.py @@ -1,4 +1,4 @@ -# Generated from udunits2Lexer.g4 by ANTLR 4.7.2 +# Generated from /Users/pelson/dev/scitools/cf-units/cf_units/_udunits2_parser/parser/udunits2Lexer.g4 by ANTLR 4.7.2 from antlr4 import * from io import StringIO from typing.io import TextIO @@ -8,9 +8,299 @@ def serializedATN(): with StringIO() as buf: - buf.write("\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\2\3") - buf.write("\7\b\1\4\2\t\2\3\2\3\2\2\2\3\3\3\3\2\2\2\6\2\3\3\2\2\2") - buf.write("\3\5\3\2\2\2\5\6\13\2\2\2\6\4\3\2\2\2\3\2\2") + buf.write("\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\2\35") + buf.write("\u026b\b\1\b\1\b\1\4\2\t\2\4\3\t\3\4\4\t\4\4\5\t\5\4\6") + buf.write("\t\6\4\7\t\7\4\b\t\b\4\t\t\t\4\n\t\n\4\13\t\13\4\f\t\f") + buf.write("\4\r\t\r\4\16\t\16\4\17\t\17\4\20\t\20\4\21\t\21\4\22") + buf.write("\t\22\4\23\t\23\4\24\t\24\4\25\t\25\4\26\t\26\4\27\t\27") + buf.write("\4\30\t\30\4\31\t\31\4\32\t\32\4\33\t\33\4\34\t\34\4\35") + buf.write("\t\35\4\36\t\36\4\37\t\37\4 \t \4!\t!\4\"\t\"\4#\t#\4") + buf.write("$\t$\4%\t%\4&\t&\4\'\t\'\4(\t(\4)\t)\4*\t*\4+\t+\4,\t") + buf.write(",\4-\t-\4.\t.\4/\t/\4\60\t\60\4\61\t\61\4\62\t\62\4\63") + buf.write("\t\63\4\64\t\64\4\65\t\65\4\66\t\66\4\67\t\67\48\t8\4") + buf.write("9\t9\4:\t:\4;\t;\4<\t<\4=\t=\4>\t>\4?\t?\4@\t@\4A\tA\4") + buf.write("B\tB\4C\tC\4D\tD\4E\tE\4F\tF\4G\tG\4H\tH\4I\tI\4J\tJ\4") + buf.write("K\tK\4L\tL\4M\tM\4N\tN\4O\tO\3\2\3\2\5\2\u00a4\n\2\3\2") + buf.write("\3\2\3\3\3\3\3\4\3\4\3\5\3\5\5\5\u00ae\n\5\3\6\7\6\u00b1") + buf.write("\n\6\f\6\16\6\u00b4\13\6\3\6\3\6\3\6\3\6\3\6\3\6\3\6\3") + buf.write("\6\3\6\3\6\3\6\5\6\u00c1\n\6\3\6\7\6\u00c4\n\6\f\6\16") + buf.write("\6\u00c7\13\6\3\7\3\7\3\b\3\b\3\t\3\t\3\n\3\n\3\13\6\13") + buf.write("\u00d2\n\13\r\13\16\13\u00d3\3\f\3\f\5\f\u00d8\n\f\3\r") + buf.write("\3\r\3\r\3\16\3\16\3\16\5\16\u00e0\n\16\3\16\5\16\u00e3") + buf.write("\n\16\3\16\3\16\3\16\5\16\u00e8\n\16\3\16\5\16\u00eb\n") + buf.write("\16\3\16\3\16\3\16\5\16\u00f0\n\16\3\17\3\17\3\17\3\17") + buf.write("\3\17\3\17\3\17\3\17\3\17\3\17\3\17\3\17\3\17\3\17\3\17") + buf.write("\3\17\3\17\3\17\5\17\u0104\n\17\3\17\3\17\3\20\6\20\u0109") + buf.write("\n\20\r\20\16\20\u010a\3\21\3\21\3\21\5\21\u0110\n\21") + buf.write("\3\22\3\22\3\22\3\22\3\22\3\22\3\22\3\22\3\22\5\22\u011b") + buf.write("\n\22\3\23\3\23\7\23\u011f\n\23\f\23\16\23\u0122\13\23") + buf.write("\3\23\3\23\3\23\3\23\5\23\u0128\n\23\3\23\5\23\u012b\n") + buf.write("\23\3\23\7\23\u012e\n\23\f\23\16\23\u0131\13\23\3\24\3") + buf.write("\24\6\24\u0135\n\24\r\24\16\24\u0136\3\24\3\24\3\25\3") + buf.write("\25\3\26\3\26\3\27\3\27\3\30\3\30\3\30\3\30\3\30\3\30") + buf.write("\3\30\5\30\u0148\n\30\3\31\3\31\5\31\u014c\n\31\3\32\5") + buf.write("\32\u014f\n\32\3\32\5\32\u0152\n\32\3\32\3\32\3\32\5\32") + buf.write("\u0157\n\32\3\33\5\33\u015a\n\33\3\33\3\33\3\34\3\34\3") + buf.write("\34\5\34\u0161\n\34\3\34\3\34\7\34\u0165\n\34\f\34\16") + buf.write("\34\u0168\13\34\5\34\u016a\n\34\3\35\5\35\u016d\n\35\3") + buf.write("\35\3\35\3\35\5\35\u0172\n\35\3\36\5\36\u0175\n\36\3\36") + buf.write("\3\36\3\36\3\36\3\36\3\36\3\36\5\36\u017e\n\36\3\37\3") + buf.write("\37\5\37\u0182\n\37\3\37\5\37\u0185\n\37\3\37\5\37\u0188") + buf.write("\n\37\3 \3 \3 \3 \3 \3 \3!\3!\3!\3!\3\"\3\"\3#\3#\3#\3") + buf.write("#\3#\3#\5#\u019c\n#\3$\3$\3$\5$\u01a1\n$\5$\u01a3\n$\3") + buf.write("%\3%\3%\5%\u01a8\n%\5%\u01aa\n%\3%\3%\3%\3&\3&\3&\3&\3") + buf.write("\'\3\'\3\'\3\'\3(\3(\3(\3(\3)\3)\3)\3)\3*\3*\3*\3*\3+") + buf.write("\3+\3+\3+\3,\3,\3,\3,\3-\3-\3-\3-\3.\3.\3.\3.\3/\3/\3") + buf.write("/\3/\3\60\3\60\3\60\3\60\3\61\3\61\3\61\3\61\3\62\3\62") + buf.write("\3\62\3\62\3\63\3\63\3\63\3\63\3\64\3\64\3\64\3\64\3\65") + buf.write("\3\65\3\65\3\65\3\66\3\66\3\66\3\66\3\67\3\67\3\67\3\67") + buf.write("\38\38\38\38\39\39\39\39\3:\3:\3:\3:\3;\3;\3;\3;\3;\3") + buf.write("<\3<\3<\3<\3<\3=\3=\3=\3=\3=\3>\3>\3>\3>\3>\3?\3?\3?\3") + buf.write("?\3?\3@\3@\3@\3@\3@\3A\3A\3A\3A\3A\3B\3B\3B\3B\3B\3C\3") + buf.write("C\3C\3C\3C\3D\3D\3D\3D\3D\3E\3E\3E\3E\3E\3F\3F\3F\3F\3") + buf.write("F\3G\3G\3G\3G\3G\3H\3H\3H\3H\3H\3I\3I\3I\3I\3I\3J\3J\3") + buf.write("J\3J\3J\3K\3K\3K\3K\3K\3L\3L\3L\3L\3L\3M\3M\3M\3M\3M\3") + buf.write("N\3N\3N\3N\3N\3O\3O\3O\3O\3O\2\2P\5\3\7\4\t\2\13\5\r\6") + buf.write("\17\7\21\b\23\t\25\n\27\13\31\2\33\f\35\r\37\16!\17#\20") + buf.write("%\21\'\22)\23+\24-\25/\26\61\27\63\2\65\2\67\29\2;\2=") + buf.write("\2?\2A\30C\31E\32G\33I\2K\34M\35O\2Q\2S\2U\2W\2Y\2[\2") + buf.write("]\2_\2a\2c\2e\2g\2i\2k\2m\2o\2q\2s\2u\2w\2y\2{\2}\2\177") + buf.write("\2\u0081\2\u0083\2\u0085\2\u0087\2\u0089\2\u008b\2\u008d") + buf.write("\2\u008f\2\u0091\2\u0093\2\u0095\2\u0097\2\u0099\2\u009b") + buf.write("\2\u009d\2\u009f\2\5\2\3\4\b\4\2,,\u00b9\u00b9\4\2GGg") + buf.write("g\6\2\u00b4\u00b5\u00bb\u00bb\u2072\u2072\u2076\u207d") + buf.write("\5\2C\\aac|\13\2\u0082\u0082\u00af\u00af\u00b2\u00b2\u00b7") + buf.write("\u00b7\u00c2\u00d8\u00da\u00f8\u00fa\u0101\u03ab\u03ab") + buf.write("\u03c2\u03c2\3\2\"\"\2\u0292\2\5\3\2\2\2\2\7\3\2\2\2\2") + buf.write("\13\3\2\2\2\2\r\3\2\2\2\2\17\3\2\2\2\2\21\3\2\2\2\2\23") + buf.write("\3\2\2\2\2\25\3\2\2\2\2\27\3\2\2\2\2\33\3\2\2\2\2\35\3") + buf.write("\2\2\2\2\37\3\2\2\2\2!\3\2\2\2\2#\3\2\2\2\2%\3\2\2\2\2") + buf.write("\'\3\2\2\2\2)\3\2\2\2\2+\3\2\2\2\2-\3\2\2\2\2/\3\2\2\2") + buf.write("\3\61\3\2\2\2\3A\3\2\2\2\3C\3\2\2\2\3E\3\2\2\2\3G\3\2") + buf.write("\2\2\3K\3\2\2\2\3M\3\2\2\2\3O\3\2\2\2\3Q\3\2\2\2\3S\3") + buf.write("\2\2\2\3U\3\2\2\2\3W\3\2\2\2\3Y\3\2\2\2\3[\3\2\2\2\3]") + buf.write("\3\2\2\2\3_\3\2\2\2\3a\3\2\2\2\3c\3\2\2\2\3e\3\2\2\2\3") + buf.write("g\3\2\2\2\3i\3\2\2\2\3k\3\2\2\2\3m\3\2\2\2\3o\3\2\2\2") + buf.write("\3q\3\2\2\2\3s\3\2\2\2\3u\3\2\2\2\4w\3\2\2\2\4y\3\2\2") + buf.write("\2\4{\3\2\2\2\4}\3\2\2\2\4\177\3\2\2\2\4\u0081\3\2\2\2") + buf.write("\4\u0083\3\2\2\2\4\u0085\3\2\2\2\4\u0087\3\2\2\2\4\u0089") + buf.write("\3\2\2\2\4\u008b\3\2\2\2\4\u008d\3\2\2\2\4\u008f\3\2\2") + buf.write("\2\4\u0091\3\2\2\2\4\u0093\3\2\2\2\4\u0095\3\2\2\2\4\u0097") + buf.write("\3\2\2\2\4\u0099\3\2\2\2\4\u009b\3\2\2\2\4\u009d\3\2\2") + buf.write("\2\4\u009f\3\2\2\2\5\u00a3\3\2\2\2\7\u00a7\3\2\2\2\t\u00a9") + buf.write("\3\2\2\2\13\u00ad\3\2\2\2\r\u00b2\3\2\2\2\17\u00c8\3\2") + buf.write("\2\2\21\u00ca\3\2\2\2\23\u00cc\3\2\2\2\25\u00ce\3\2\2") + buf.write("\2\27\u00d1\3\2\2\2\31\u00d7\3\2\2\2\33\u00d9\3\2\2\2") + buf.write("\35\u00ef\3\2\2\2\37\u0103\3\2\2\2!\u0108\3\2\2\2#\u010f") + buf.write("\3\2\2\2%\u011a\3\2\2\2\'\u011c\3\2\2\2)\u0134\3\2\2\2") + buf.write("+\u013a\3\2\2\2-\u013c\3\2\2\2/\u013e\3\2\2\2\61\u0147") + buf.write("\3\2\2\2\63\u014b\3\2\2\2\65\u0156\3\2\2\2\67\u0159\3") + buf.write("\2\2\29\u0160\3\2\2\2;\u0171\3\2\2\2=\u017d\3\2\2\2?\u017f") + buf.write("\3\2\2\2A\u0189\3\2\2\2C\u018f\3\2\2\2E\u0193\3\2\2\2") + buf.write("G\u0195\3\2\2\2I\u019d\3\2\2\2K\u01a4\3\2\2\2M\u01ae\3") + buf.write("\2\2\2O\u01b2\3\2\2\2Q\u01b6\3\2\2\2S\u01ba\3\2\2\2U\u01be") + buf.write("\3\2\2\2W\u01c2\3\2\2\2Y\u01c6\3\2\2\2[\u01ca\3\2\2\2") + buf.write("]\u01ce\3\2\2\2_\u01d2\3\2\2\2a\u01d6\3\2\2\2c\u01da\3") + buf.write("\2\2\2e\u01de\3\2\2\2g\u01e2\3\2\2\2i\u01e6\3\2\2\2k\u01ea") + buf.write("\3\2\2\2m\u01ee\3\2\2\2o\u01f2\3\2\2\2q\u01f6\3\2\2\2") + buf.write("s\u01fa\3\2\2\2u\u01fe\3\2\2\2w\u0202\3\2\2\2y\u0207\3") + buf.write("\2\2\2{\u020c\3\2\2\2}\u0211\3\2\2\2\177\u0216\3\2\2\2") + buf.write("\u0081\u021b\3\2\2\2\u0083\u0220\3\2\2\2\u0085\u0225\3") + buf.write("\2\2\2\u0087\u022a\3\2\2\2\u0089\u022f\3\2\2\2\u008b\u0234") + buf.write("\3\2\2\2\u008d\u0239\3\2\2\2\u008f\u023e\3\2\2\2\u0091") + buf.write("\u0243\3\2\2\2\u0093\u0248\3\2\2\2\u0095\u024d\3\2\2\2") + buf.write("\u0097\u0252\3\2\2\2\u0099\u0257\3\2\2\2\u009b\u025c\3") + buf.write("\2\2\2\u009d\u0261\3\2\2\2\u009f\u0266\3\2\2\2\u00a1\u00a4") + buf.write("\5\t\4\2\u00a2\u00a4\5\7\3\2\u00a3\u00a1\3\2\2\2\u00a3") + buf.write("\u00a2\3\2\2\2\u00a4\u00a5\3\2\2\2\u00a5\u00a6\5\27\13") + buf.write("\2\u00a6\6\3\2\2\2\u00a7\u00a8\7-\2\2\u00a8\b\3\2\2\2") + buf.write("\u00a9\u00aa\7/\2\2\u00aa\n\3\2\2\2\u00ab\u00ae\t\2\2") + buf.write("\2\u00ac\u00ae\5\t\4\2\u00ad\u00ab\3\2\2\2\u00ad\u00ac") + buf.write("\3\2\2\2\u00ae\f\3\2\2\2\u00af\u00b1\5-\26\2\u00b0\u00af") + buf.write("\3\2\2\2\u00b1\u00b4\3\2\2\2\u00b2\u00b0\3\2\2\2\u00b2") + buf.write("\u00b3\3\2\2\2\u00b3\u00c0\3\2\2\2\u00b4\u00b2\3\2\2\2") + buf.write("\u00b5\u00c1\7\61\2\2\u00b6\u00b7\7\"\2\2\u00b7\u00b8") + buf.write("\7R\2\2\u00b8\u00b9\7G\2\2\u00b9\u00ba\7T\2\2\u00ba\u00c1") + buf.write("\7\"\2\2\u00bb\u00bc\7\"\2\2\u00bc\u00bd\7r\2\2\u00bd") + buf.write("\u00be\7g\2\2\u00be\u00bf\7t\2\2\u00bf\u00c1\7\"\2\2\u00c0") + buf.write("\u00b5\3\2\2\2\u00c0\u00b6\3\2\2\2\u00c0\u00bb\3\2\2\2") + buf.write("\u00c1\u00c5\3\2\2\2\u00c2\u00c4\5-\26\2\u00c3\u00c2\3") + buf.write("\2\2\2\u00c4\u00c7\3\2\2\2\u00c5\u00c3\3\2\2\2\u00c5\u00c6") + buf.write("\3\2\2\2\u00c6\16\3\2\2\2\u00c7\u00c5\3\2\2\2\u00c8\u00c9") + buf.write("\7\60\2\2\u00c9\20\3\2\2\2\u00ca\u00cb\7*\2\2\u00cb\22") + buf.write("\3\2\2\2\u00cc\u00cd\7+\2\2\u00cd\24\3\2\2\2\u00ce\u00cf") + buf.write("\7<\2\2\u00cf\26\3\2\2\2\u00d0\u00d2\4\62;\2\u00d1\u00d0") + buf.write("\3\2\2\2\u00d2\u00d3\3\2\2\2\u00d3\u00d1\3\2\2\2\u00d3") + buf.write("\u00d4\3\2\2\2\u00d4\30\3\2\2\2\u00d5\u00d8\5\27\13\2") + buf.write("\u00d6\u00d8\5\5\2\2\u00d7\u00d5\3\2\2\2\u00d7\u00d6\3") + buf.write("\2\2\2\u00d8\32\3\2\2\2\u00d9\u00da\t\3\2\2\u00da\u00db") + buf.write("\5\31\f\2\u00db\34\3\2\2\2\u00dc\u00dd\5\31\f\2\u00dd") + buf.write("\u00df\5\17\7\2\u00de\u00e0\5\27\13\2\u00df\u00de\3\2") + buf.write("\2\2\u00df\u00e0\3\2\2\2\u00e0\u00e8\3\2\2\2\u00e1\u00e3") + buf.write("\5\31\f\2\u00e2\u00e1\3\2\2\2\u00e2\u00e3\3\2\2\2\u00e3") + buf.write("\u00e4\3\2\2\2\u00e4\u00e5\5\17\7\2\u00e5\u00e6\5\27\13") + buf.write("\2\u00e6\u00e8\3\2\2\2\u00e7\u00dc\3\2\2\2\u00e7\u00e2") + buf.write("\3\2\2\2\u00e8\u00ea\3\2\2\2\u00e9\u00eb\5\33\r\2\u00ea") + buf.write("\u00e9\3\2\2\2\u00ea\u00eb\3\2\2\2\u00eb\u00f0\3\2\2\2") + buf.write("\u00ec\u00ed\5\31\f\2\u00ed\u00ee\5\33\r\2\u00ee\u00f0") + buf.write("\3\2\2\2\u00ef\u00e7\3\2\2\2\u00ef\u00ec\3\2\2\2\u00f0") + buf.write("\36\3\2\2\2\u00f1\u0104\7B\2\2\u00f2\u00f3\7c\2\2\u00f3") + buf.write("\u00f4\7h\2\2\u00f4\u00f5\7v\2\2\u00f5\u00f6\7g\2\2\u00f6") + buf.write("\u0104\7t\2\2\u00f7\u00f8\7h\2\2\u00f8\u00f9\7t\2\2\u00f9") + buf.write("\u00fa\7q\2\2\u00fa\u0104\7o\2\2\u00fb\u00fc\7u\2\2\u00fc") + buf.write("\u00fd\7k\2\2\u00fd\u00fe\7p\2\2\u00fe\u00ff\7e\2\2\u00ff") + buf.write("\u0104\7g\2\2\u0100\u0101\7t\2\2\u0101\u0102\7g\2\2\u0102") + buf.write("\u0104\7h\2\2\u0103\u00f1\3\2\2\2\u0103\u00f2\3\2\2\2") + buf.write("\u0103\u00f7\3\2\2\2\u0103\u00fb\3\2\2\2\u0103\u0100\3") + buf.write("\2\2\2\u0104\u0105\3\2\2\2\u0105\u0106\b\17\2\2\u0106") + buf.write(" \3\2\2\2\u0107\u0109\t\4\2\2\u0108\u0107\3\2\2\2\u0109") + buf.write("\u010a\3\2\2\2\u010a\u0108\3\2\2\2\u010a\u010b\3\2\2\2") + buf.write("\u010b\"\3\2\2\2\u010c\u0110\7`\2\2\u010d\u010e\7,\2\2") + buf.write("\u010e\u0110\7,\2\2\u010f\u010c\3\2\2\2\u010f\u010d\3") + buf.write("\2\2\2\u0110$\3\2\2\2\u0111\u0112\7n\2\2\u0112\u0113\7") + buf.write("q\2\2\u0113\u011b\7i\2\2\u0114\u0115\7n\2\2\u0115\u011b") + buf.write("\7i\2\2\u0116\u0117\7n\2\2\u0117\u011b\7p\2\2\u0118\u0119") + buf.write("\7n\2\2\u0119\u011b\7d\2\2\u011a\u0111\3\2\2\2\u011a\u0114") + buf.write("\3\2\2\2\u011a\u0116\3\2\2\2\u011a\u0118\3\2\2\2\u011b") + buf.write("&\3\2\2\2\u011c\u0120\7*\2\2\u011d\u011f\5-\26\2\u011e") + buf.write("\u011d\3\2\2\2\u011f\u0122\3\2\2\2\u0120\u011e\3\2\2\2") + buf.write("\u0120\u0121\3\2\2\2\u0121\u0127\3\2\2\2\u0122\u0120\3") + buf.write("\2\2\2\u0123\u0124\7T\2\2\u0124\u0128\7G\2\2\u0125\u0126") + buf.write("\7t\2\2\u0126\u0128\7g\2\2\u0127\u0123\3\2\2\2\u0127\u0125") + buf.write("\3\2\2\2\u0128\u012a\3\2\2\2\u0129\u012b\7<\2\2\u012a") + buf.write("\u0129\3\2\2\2\u012a\u012b\3\2\2\2\u012b\u012f\3\2\2\2") + buf.write("\u012c\u012e\5-\26\2\u012d\u012c\3\2\2\2\u012e\u0131\3") + buf.write("\2\2\2\u012f\u012d\3\2\2\2\u012f\u0130\3\2\2\2\u0130(") + buf.write("\3\2\2\2\u0131\u012f\3\2\2\2\u0132\u0135\t\5\2\2\u0133") + buf.write("\u0135\5+\25\2\u0134\u0132\3\2\2\2\u0134\u0133\3\2\2\2") + buf.write("\u0135\u0136\3\2\2\2\u0136\u0134\3\2\2\2\u0136\u0137\3") + buf.write("\2\2\2\u0137\u0138\3\2\2\2\u0138\u0139\b\24\3\2\u0139") + buf.write("*\3\2\2\2\u013a\u013b\t\6\2\2\u013b,\3\2\2\2\u013c\u013d") + buf.write("\t\7\2\2\u013d.\3\2\2\2\u013e\u013f\13\2\2\2\u013f\60") + buf.write("\3\2\2\2\u0140\u0141\7W\2\2\u0141\u0142\7V\2\2\u0142\u0148") + buf.write("\7E\2\2\u0143\u0148\7\\\2\2\u0144\u0145\7I\2\2\u0145\u0146") + buf.write("\7O\2\2\u0146\u0148\7V\2\2\u0147\u0140\3\2\2\2\u0147\u0143") + buf.write("\3\2\2\2\u0147\u0144\3\2\2\2\u0148\62\3\2\2\2\u0149\u014c") + buf.write("\5\7\3\2\u014a\u014c\5\t\4\2\u014b\u0149\3\2\2\2\u014b") + buf.write("\u014a\3\2\2\2\u014c\64\3\2\2\2\u014d\u014f\5\63\31\2") + buf.write("\u014e\u014d\3\2\2\2\u014e\u014f\3\2\2\2\u014f\u0151\3") + buf.write("\2\2\2\u0150\u0152\4\62\63\2\u0151\u0150\3\2\2\2\u0151") + buf.write("\u0152\3\2\2\2\u0152\u0153\3\2\2\2\u0153\u0157\4\62;\2") + buf.write("\u0154\u0155\7\64\2\2\u0155\u0157\4\62\65\2\u0156\u014e") + buf.write("\3\2\2\2\u0156\u0154\3\2\2\2\u0157\66\3\2\2\2\u0158\u015a") + buf.write("\4\62\67\2\u0159\u0158\3\2\2\2\u0159\u015a\3\2\2\2\u015a") + buf.write("\u015b\3\2\2\2\u015b\u015c\4\62;\2\u015c8\3\2\2\2\u015d") + buf.write("\u0161\5\67\33\2\u015e\u015f\78\2\2\u015f\u0161\7\62\2") + buf.write("\2\u0160\u015d\3\2\2\2\u0160\u015e\3\2\2\2\u0161\u0169") + buf.write("\3\2\2\2\u0162\u0166\5\17\7\2\u0163\u0165\4\62;\2\u0164") + buf.write("\u0163\3\2\2\2\u0165\u0168\3\2\2\2\u0166\u0164\3\2\2\2") + buf.write("\u0166\u0167\3\2\2\2\u0167\u016a\3\2\2\2\u0168\u0166\3") + buf.write("\2\2\2\u0169\u0162\3\2\2\2\u0169\u016a\3\2\2\2\u016a:") + buf.write("\3\2\2\2\u016b\u016d\7\62\2\2\u016c\u016b\3\2\2\2\u016c") + buf.write("\u016d\3\2\2\2\u016d\u016e\3\2\2\2\u016e\u0172\4\63;\2") + buf.write("\u016f\u0170\7\63\2\2\u0170\u0172\4\62\64\2\u0171\u016c") + buf.write("\3\2\2\2\u0171\u016f\3\2\2\2\u0172<\3\2\2\2\u0173\u0175") + buf.write("\7\62\2\2\u0174\u0173\3\2\2\2\u0174\u0175\3\2\2\2\u0175") + buf.write("\u0176\3\2\2\2\u0176\u017e\4\63;\2\u0177\u0178\4\63\64") + buf.write("\2\u0178\u017e\4\62;\2\u0179\u017a\7\65\2\2\u017a\u017e") + buf.write("\7\62\2\2\u017b\u017c\7\65\2\2\u017c\u017e\7\63\2\2\u017d") + buf.write("\u0174\3\2\2\2\u017d\u0177\3\2\2\2\u017d\u0179\3\2\2\2") + buf.write("\u017d\u017b\3\2\2\2\u017e>\3\2\2\2\u017f\u0181\5\31\f") + buf.write("\2\u0180\u0182\5\27\13\2\u0181\u0180\3\2\2\2\u0181\u0182") + buf.write("\3\2\2\2\u0182\u0184\3\2\2\2\u0183\u0185\5\27\13\2\u0184") + buf.write("\u0183\3\2\2\2\u0184\u0185\3\2\2\2\u0185\u0187\3\2\2\2") + buf.write("\u0186\u0188\5\27\13\2\u0187\u0186\3\2\2\2\u0187\u0188") + buf.write("\3\2\2\2\u0188@\3\2\2\2\u0189\u018a\5\65\32\2\u018a\u018b") + buf.write("\7<\2\2\u018b\u018c\5\67\33\2\u018c\u018d\7<\2\2\u018d") + buf.write("\u018e\59\34\2\u018eB\3\2\2\2\u018f\u0190\5\65\32\2\u0190") + buf.write("\u0191\7<\2\2\u0191\u0192\5\67\33\2\u0192D\3\2\2\2\u0193") + buf.write("\u0194\7/\2\2\u0194F\3\2\2\2\u0195\u0196\5?\37\2\u0196") + buf.write("\u0197\5\t\4\2\u0197\u019b\5;\35\2\u0198\u0199\5\t\4\2") + buf.write("\u0199\u019a\5=\36\2\u019a\u019c\3\2\2\2\u019b\u0198\3") + buf.write("\2\2\2\u019b\u019c\3\2\2\2\u019cH\3\2\2\2\u019d\u01a2") + buf.write("\5\65\32\2\u019e\u01a0\5\67\33\2\u019f\u01a1\59\34\2\u01a0") + buf.write("\u019f\3\2\2\2\u01a0\u01a1\3\2\2\2\u01a1\u01a3\3\2\2\2") + buf.write("\u01a2\u019e\3\2\2\2\u01a2\u01a3\3\2\2\2\u01a3J\3\2\2") + buf.write("\2\u01a4\u01a9\5?\37\2\u01a5\u01a7\5;\35\2\u01a6\u01a8") + buf.write("\5=\36\2\u01a7\u01a6\3\2\2\2\u01a7\u01a8\3\2\2\2\u01a8") + buf.write("\u01aa\3\2\2\2\u01a9\u01a5\3\2\2\2\u01a9\u01aa\3\2\2\2") + buf.write("\u01aa\u01ab\3\2\2\2\u01ab\u01ac\7V\2\2\u01ac\u01ad\5") + buf.write("I$\2\u01adL\3\2\2\2\u01ae\u01af\5G#\2\u01af\u01b0\7V\2") + buf.write("\2\u01b0\u01b1\5I$\2\u01b1N\3\2\2\2\u01b2\u01b3\5\5\2") + buf.write("\2\u01b3\u01b4\3\2\2\2\u01b4\u01b5\b\'\4\2\u01b5P\3\2") + buf.write("\2\2\u01b6\u01b7\5\7\3\2\u01b7\u01b8\3\2\2\2\u01b8\u01b9") + buf.write("\b(\5\2\u01b9R\3\2\2\2\u01ba\u01bb\5\13\5\2\u01bb\u01bc") + buf.write("\3\2\2\2\u01bc\u01bd\b)\6\2\u01bdT\3\2\2\2\u01be\u01bf") + buf.write("\5\r\6\2\u01bf\u01c0\3\2\2\2\u01c0\u01c1\b*\7\2\u01c1") + buf.write("V\3\2\2\2\u01c2\u01c3\5\17\7\2\u01c3\u01c4\3\2\2\2\u01c4") + buf.write("\u01c5\b+\b\2\u01c5X\3\2\2\2\u01c6\u01c7\5\21\b\2\u01c7") + buf.write("\u01c8\3\2\2\2\u01c8\u01c9\b,\t\2\u01c9Z\3\2\2\2\u01ca") + buf.write("\u01cb\5\23\t\2\u01cb\u01cc\3\2\2\2\u01cc\u01cd\b-\n\2") + buf.write("\u01cd\\\3\2\2\2\u01ce\u01cf\5\25\n\2\u01cf\u01d0\3\2") + buf.write("\2\2\u01d0\u01d1\b.\13\2\u01d1^\3\2\2\2\u01d2\u01d3\5") + buf.write("\27\13\2\u01d3\u01d4\3\2\2\2\u01d4\u01d5\b/\f\2\u01d5") + buf.write("`\3\2\2\2\u01d6\u01d7\5\33\r\2\u01d7\u01d8\3\2\2\2\u01d8") + buf.write("\u01d9\b\60\r\2\u01d9b\3\2\2\2\u01da\u01db\5\35\16\2\u01db") + buf.write("\u01dc\3\2\2\2\u01dc\u01dd\b\61\16\2\u01ddd\3\2\2\2\u01de") + buf.write("\u01df\5\37\17\2\u01df\u01e0\3\2\2\2\u01e0\u01e1\b\62") + buf.write("\17\2\u01e1f\3\2\2\2\u01e2\u01e3\5!\20\2\u01e3\u01e4\3") + buf.write("\2\2\2\u01e4\u01e5\b\63\20\2\u01e5h\3\2\2\2\u01e6\u01e7") + buf.write("\5#\21\2\u01e7\u01e8\3\2\2\2\u01e8\u01e9\b\64\21\2\u01e9") + buf.write("j\3\2\2\2\u01ea\u01eb\5%\22\2\u01eb\u01ec\3\2\2\2\u01ec") + buf.write("\u01ed\b\65\22\2\u01edl\3\2\2\2\u01ee\u01ef\5\'\23\2\u01ef") + buf.write("\u01f0\3\2\2\2\u01f0\u01f1\b\66\23\2\u01f1n\3\2\2\2\u01f2") + buf.write("\u01f3\5)\24\2\u01f3\u01f4\3\2\2\2\u01f4\u01f5\b\67\24") + buf.write("\2\u01f5p\3\2\2\2\u01f6\u01f7\5+\25\2\u01f7\u01f8\3\2") + buf.write("\2\2\u01f8\u01f9\b8\25\2\u01f9r\3\2\2\2\u01fa\u01fb\5") + buf.write("-\26\2\u01fb\u01fc\3\2\2\2\u01fc\u01fd\b9\26\2\u01fdt") + buf.write("\3\2\2\2\u01fe\u01ff\5/\27\2\u01ff\u0200\3\2\2\2\u0200") + buf.write("\u0201\b:\27\2\u0201v\3\2\2\2\u0202\u0203\5\5\2\2\u0203") + buf.write("\u0204\3\2\2\2\u0204\u0205\b;\4\2\u0205\u0206\b;\30\2") + buf.write("\u0206x\3\2\2\2\u0207\u0208\4/\60\2\u0208\u0209\3\2\2") + buf.write("\2\u0209\u020a\b<\6\2\u020a\u020b\b<\30\2\u020bz\3\2\2") + buf.write("\2\u020c\u020d\5\5\2\2\u020d\u020e\3\2\2\2\u020e\u020f") + buf.write("\b=\4\2\u020f\u0210\b=\30\2\u0210|\3\2\2\2\u0211\u0212") + buf.write("\5\7\3\2\u0212\u0213\3\2\2\2\u0213\u0214\b>\5\2\u0214") + buf.write("\u0215\b>\30\2\u0215~\3\2\2\2\u0216\u0217\5\13\5\2\u0217") + buf.write("\u0218\3\2\2\2\u0218\u0219\b?\6\2\u0219\u021a\b?\30\2") + buf.write("\u021a\u0080\3\2\2\2\u021b\u021c\5\r\6\2\u021c\u021d\3") + buf.write("\2\2\2\u021d\u021e\b@\7\2\u021e\u021f\b@\30\2\u021f\u0082") + buf.write("\3\2\2\2\u0220\u0221\5\17\7\2\u0221\u0222\3\2\2\2\u0222") + buf.write("\u0223\bA\b\2\u0223\u0224\bA\30\2\u0224\u0084\3\2\2\2") + buf.write("\u0225\u0226\5\21\b\2\u0226\u0227\3\2\2\2\u0227\u0228") + buf.write("\bB\t\2\u0228\u0229\bB\30\2\u0229\u0086\3\2\2\2\u022a") + buf.write("\u022b\5\23\t\2\u022b\u022c\3\2\2\2\u022c\u022d\bC\n\2") + buf.write("\u022d\u022e\bC\30\2\u022e\u0088\3\2\2\2\u022f\u0230\5") + buf.write("\25\n\2\u0230\u0231\3\2\2\2\u0231\u0232\bD\13\2\u0232") + buf.write("\u0233\bD\30\2\u0233\u008a\3\2\2\2\u0234\u0235\5\27\13") + buf.write("\2\u0235\u0236\3\2\2\2\u0236\u0237\bE\f\2\u0237\u0238") + buf.write("\bE\30\2\u0238\u008c\3\2\2\2\u0239\u023a\5\33\r\2\u023a") + buf.write("\u023b\3\2\2\2\u023b\u023c\bF\r\2\u023c\u023d\bF\30\2") + buf.write("\u023d\u008e\3\2\2\2\u023e\u023f\5\37\17\2\u023f\u0240") + buf.write("\3\2\2\2\u0240\u0241\bG\17\2\u0241\u0242\bG\30\2\u0242") + buf.write("\u0090\3\2\2\2\u0243\u0244\5!\20\2\u0244\u0245\3\2\2\2") + buf.write("\u0245\u0246\bH\20\2\u0246\u0247\bH\30\2\u0247\u0092\3") + buf.write("\2\2\2\u0248\u0249\5#\21\2\u0249\u024a\3\2\2\2\u024a\u024b") + buf.write("\bI\21\2\u024b\u024c\bI\30\2\u024c\u0094\3\2\2\2\u024d") + buf.write("\u024e\5%\22\2\u024e\u024f\3\2\2\2\u024f\u0250\bJ\22\2") + buf.write("\u0250\u0251\bJ\30\2\u0251\u0096\3\2\2\2\u0252\u0253\5") + buf.write("\'\23\2\u0253\u0254\3\2\2\2\u0254\u0255\bK\23\2\u0255") + buf.write("\u0256\bK\30\2\u0256\u0098\3\2\2\2\u0257\u0258\5)\24\2") + buf.write("\u0258\u0259\3\2\2\2\u0259\u025a\bL\24\2\u025a\u025b\b") + buf.write("L\30\2\u025b\u009a\3\2\2\2\u025c\u025d\5+\25\2\u025d\u025e") + buf.write("\3\2\2\2\u025e\u025f\bM\25\2\u025f\u0260\bM\30\2\u0260") + buf.write("\u009c\3\2\2\2\u0261\u0262\5-\26\2\u0262\u0263\3\2\2\2") + buf.write("\u0263\u0264\bN\26\2\u0264\u0265\bN\30\2\u0265\u009e\3") + buf.write("\2\2\2\u0266\u0267\5/\27\2\u0267\u0268\3\2\2\2\u0268\u0269") + buf.write("\bO\27\2\u0269\u026a\bO\30\2\u026a\u00a0\3\2\2\2\60\2") + buf.write("\3\4\u00a3\u00ad\u00b2\u00c0\u00c5\u00d3\u00d7\u00df\u00e2") + buf.write("\u00e7\u00ea\u00ef\u0103\u010a\u010f\u011a\u0120\u0127") + buf.write("\u012a\u012f\u0134\u0136\u0147\u014b\u014e\u0151\u0156") + buf.write("\u0159\u0160\u0166\u0169\u016c\u0171\u0174\u017d\u0181") + buf.write("\u0184\u0187\u019b\u01a0\u01a2\u01a7\u01a9\31\7\3\2\4") + buf.write("\4\2\t\3\2\t\4\2\t\5\2\t\6\2\t\7\2\t\b\2\t\t\2\t\n\2\t") + buf.write("\13\2\t\f\2\t\r\2\t\16\2\t\17\2\t\20\2\t\21\2\t\22\2\t") + buf.write("\23\2\t\24\2\t\25\2\t\26\2\4\2\2") return buf.getvalue() @@ -20,19 +310,72 @@ class udunits2Lexer(Lexer): decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ] - ALL = 1 + SHIFT_MODE = 1 + ID_SEEN = 2 + + SIGNED_INT = 1 + PLUS = 2 + MULTIPLY = 3 + DIVIDE = 4 + PERIOD = 5 + OPEN_PAREN = 6 + CLOSE_PAREN = 7 + SEMICOLON = 8 + INT = 9 + E_POWER = 10 + FLOAT = 11 + SHIFT_OP = 12 + UNICODE_EXPONENT = 13 + RAISE = 14 + LOG = 15 + LOGREF = 16 + ID = 17 + LATIN_SUBSET = 18 + WS = 19 + ERRORCHARACTER = 20 + TIMEZONE = 21 + HOUR_MINUTE_SECOND = 22 + HOUR_MINUTE = 23 + M_MINUS = 24 + DATE = 25 + TIMESTAMP = 26 + DT_T_CLOCK = 27 channelNames = [ u"DEFAULT_TOKEN_CHANNEL", u"HIDDEN" ] - modeNames = [ "DEFAULT_MODE" ] + modeNames = [ "DEFAULT_MODE", "SHIFT_MODE", "ID_SEEN" ] literalNames = [ "", - ] + "'+'", "'.'", "'('", "')'", "':'", "'-'" ] symbolicNames = [ "", - "ALL" ] + "SIGNED_INT", "PLUS", "MULTIPLY", "DIVIDE", "PERIOD", "OPEN_PAREN", + "CLOSE_PAREN", "SEMICOLON", "INT", "E_POWER", "FLOAT", "SHIFT_OP", + "UNICODE_EXPONENT", "RAISE", "LOG", "LOGREF", "ID", "LATIN_SUBSET", + "WS", "ERRORCHARACTER", "TIMEZONE", "HOUR_MINUTE_SECOND", "HOUR_MINUTE", + "M_MINUS", "DATE", "TIMESTAMP", "DT_T_CLOCK" ] - ruleNames = [ "ALL" ] + ruleNames = [ "SIGNED_INT", "PLUS", "MINUS", "MULTIPLY", "DIVIDE", "PERIOD", + "OPEN_PAREN", "CLOSE_PAREN", "SEMICOLON", "INT", "ANY_INT", + "E_POWER", "FLOAT", "SHIFT_OP", "UNICODE_EXPONENT", "RAISE", + "LOG", "LOGREF", "ID", "LATIN_SUBSET", "WS", "ERRORCHARACTER", + "TIMEZONE", "SIGN", "HOUR", "MINUTE", "SECOND", "MONTH", + "DAY", "YEAR", "HOUR_MINUTE_SECOND", "HOUR_MINUTE", "M_MINUS", + "DATE", "CLOCK", "TIMESTAMP", "DT_T_CLOCK", "SHIFT_MODE_SIGNED_INT", + "SHIFT_MODE_PLUS", "SHIFT_MODE_MULTIPLY", "SHIFT_MODE_DIVIDE", + "SHIFT_MODE_PERIOD", "SHIFT_MODE_OPEN_PAREN", "SHIFT_MODE_CLOSE_PAREN", + "SHIFT_MODE_SEMICOLON", "SHIFT_MODE_INT", "SHIFT_MODE_E_POWER", + "SHIFT_MODE_FLOAT", "SHIFT_MODE_SHIFT_OP", "SHIFT_MODE_UNICODE_EXPONENT", + "SHIFT_MODE_RAISE", "SHIFT_MODE_LOG", "SHIFT_MODE_LOGREF", + "SHIFT_MODE_ID", "SHIFT_MODE_LATIN_SUBSET", "SHIFT_MODE_WS", + "SHIFT_MODE_ERRORCHARACTER", "ID_SEEN_SIGNED_INT", "EXTRA_MULTIPLY", + "ID_SEEN_AUTO_SIGNED_INT", "ID_SEEN_AUTO_PLUS", "ID_SEEN_AUTO_MULTIPLY", + "ID_SEEN_AUTO_DIVIDE", "ID_SEEN_AUTO_PERIOD", "ID_SEEN_AUTO_OPEN_PAREN", + "ID_SEEN_AUTO_CLOSE_PAREN", "ID_SEEN_AUTO_SEMICOLON", + "ID_SEEN_AUTO_INT", "ID_SEEN_AUTO_E_POWER", "ID_SEEN_AUTO_SHIFT_OP", + "ID_SEEN_AUTO_UNICODE_EXPONENT", "ID_SEEN_AUTO_RAISE", + "ID_SEEN_AUTO_LOG", "ID_SEEN_AUTO_LOGREF", "ID_SEEN_AUTO_ID", + "ID_SEEN_AUTO_LATIN_SUBSET", "ID_SEEN_AUTO_WS", "ID_SEEN_AUTO_ERRORCHARACTER" ] grammarFileName = "udunits2Lexer.g4" diff --git a/cf_units/_udunits2_parser/parser/udunits2Parser.py b/cf_units/_udunits2_parser/parser/udunits2Parser.py index 98e3dce4..47e1a8ea 100644 --- a/cf_units/_udunits2_parser/parser/udunits2Parser.py +++ b/cf_units/_udunits2_parser/parser/udunits2Parser.py @@ -1,4 +1,4 @@ -# Generated from udunits2Parser.g4 by ANTLR 4.7.2 +# Generated from /Users/pelson/dev/scitools/cf-units/cf_units/_udunits2_parser/udunits2Parser.g4 by ANTLR 4.7.2 # encoding: utf-8 from antlr4 import * from io import StringIO @@ -8,9 +8,51 @@ def serializedATN(): with StringIO() as buf: - buf.write("\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\3\3") - buf.write("\7\4\2\t\2\3\2\3\2\3\2\2\2\3\2\2\2\2\5\2\4\3\2\2\2\4\5") - buf.write("\7\3\2\2\5\3\3\2\2\2\2") + buf.write("\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\3\35") + buf.write("\u0084\4\2\t\2\4\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7\t\7") + buf.write("\4\b\t\b\4\t\t\t\4\n\t\n\4\13\t\13\3\2\5\2\30\n\2\3\2") + buf.write("\3\2\3\3\3\3\3\3\5\3\37\n\3\3\3\3\3\5\3#\n\3\3\3\3\3\3") + buf.write("\3\3\3\5\3)\n\3\3\3\3\3\5\3-\n\3\3\3\3\3\5\3\61\n\3\3") + buf.write("\4\3\4\3\4\3\4\3\4\3\4\3\4\3\4\3\4\3\4\3\4\3\4\3\4\6\4") + buf.write("@\n\4\r\4\16\4A\3\4\7\4E\n\4\f\4\16\4H\13\4\3\5\3\5\3") + buf.write("\5\3\5\3\5\3\5\3\5\3\5\3\5\3\5\3\5\5\5U\n\5\3\6\3\6\3") + buf.write("\6\3\6\3\6\3\6\5\6]\n\6\3\7\3\7\3\b\3\b\5\bc\n\b\3\t\3") + buf.write("\t\3\t\5\th\n\t\3\t\3\t\5\tl\n\t\3\t\5\to\n\t\3\t\3\t") + buf.write("\3\t\5\tt\n\t\3\t\5\tw\n\t\5\ty\n\t\3\n\3\n\3\n\5\n~\n") + buf.write("\n\3\13\3\13\5\13\u0082\n\13\3\13\2\3\6\f\2\4\6\b\n\f") + buf.write("\16\20\22\24\2\4\4\2\3\3\13\13\4\2\13\13\33\33\2\u0096") + buf.write("\2\27\3\2\2\2\4\60\3\2\2\2\6\62\3\2\2\2\bT\3\2\2\2\n\\") + buf.write("\3\2\2\2\f^\3\2\2\2\16b\3\2\2\2\20x\3\2\2\2\22}\3\2\2") + buf.write("\2\24\u0081\3\2\2\2\26\30\5\4\3\2\27\26\3\2\2\2\27\30") + buf.write("\3\2\2\2\30\31\3\2\2\2\31\32\7\2\2\3\32\3\3\2\2\2\33\61") + buf.write("\5\6\4\2\34\36\5\6\4\2\35\37\7\25\2\2\36\35\3\2\2\2\36") + buf.write("\37\3\2\2\2\37 \3\2\2\2 \"\7\16\2\2!#\7\25\2\2\"!\3\2") + buf.write("\2\2\"#\3\2\2\2#$\3\2\2\2$%\5\16\b\2%\61\3\2\2\2&(\5\6") + buf.write("\4\2\')\7\25\2\2(\'\3\2\2\2()\3\2\2\2)*\3\2\2\2*,\7\16") + buf.write("\2\2+-\7\25\2\2,+\3\2\2\2,-\3\2\2\2-.\3\2\2\2./\5\20\t") + buf.write("\2/\61\3\2\2\2\60\33\3\2\2\2\60\34\3\2\2\2\60&\3\2\2\2") + buf.write("\61\5\3\2\2\2\62\63\b\4\1\2\63\64\5\b\5\2\64F\3\2\2\2") + buf.write("\65\66\f\6\2\2\66E\5\b\5\2\678\f\5\2\289\7\5\2\29E\5\b") + buf.write("\5\2:;\f\4\2\2;<\7\6\2\2@\7\25\2") + buf.write("\2?>\3\2\2\2@A\3\2\2\2A?\3\2\2\2AB\3\2\2\2BC\3\2\2\2C") + buf.write("E\5\b\5\2D\65\3\2\2\2D\67\3\2\2\2D:\3\2\2\2D=\3\2\2\2") + buf.write("EH\3\2\2\2FD\3\2\2\2FG\3\2\2\2G\7\3\2\2\2HF\3\2\2\2IJ") + buf.write("\5\n\6\2JK\5\f\7\2KU\3\2\2\2LU\5\n\6\2MN\5\n\6\2NO\7\20") + buf.write("\2\2OP\5\f\7\2PU\3\2\2\2QR\5\n\6\2RS\7\17\2\2SU\3\2\2") + buf.write("\2TI\3\2\2\2TL\3\2\2\2TM\3\2\2\2TQ\3\2\2\2U\t\3\2\2\2") + buf.write("V]\7\23\2\2WX\7\b\2\2XY\5\4\3\2YZ\7\t\2\2Z]\3\2\2\2[]") + buf.write("\5\16\b\2\\V\3\2\2\2\\W\3\2\2\2\\[\3\2\2\2]\13\3\2\2\2") + buf.write("^_\t\2\2\2_\r\3\2\2\2`c\5\f\7\2ac\7\r\2\2b`\3\2\2\2ba") + buf.write("\3\2\2\2c\17\3\2\2\2dy\t\3\2\2eg\t\3\2\2fh\7\25\2\2gf") + buf.write("\3\2\2\2gh\3\2\2\2hi\3\2\2\2in\5\22\n\2jl\7\25\2\2kj\3") + buf.write("\2\2\2kl\3\2\2\2lm\3\2\2\2mo\5\24\13\2nk\3\2\2\2no\3\2") + buf.write("\2\2oy\3\2\2\2py\7\35\2\2qs\7\34\2\2rt\7\25\2\2sr\3\2") + buf.write("\2\2st\3\2\2\2tv\3\2\2\2uw\5\24\13\2vu\3\2\2\2vw\3\2\2") + buf.write("\2wy\3\2\2\2xd\3\2\2\2xe\3\2\2\2xp\3\2\2\2xq\3\2\2\2y") + buf.write("\21\3\2\2\2z~\7\30\2\2{~\7\31\2\2|~\5\f\7\2}z\3\2\2\2") + buf.write("}{\3\2\2\2}|\3\2\2\2~\23\3\2\2\2\177\u0082\7\31\2\2\u0080") + buf.write("\u0082\5\f\7\2\u0081\177\3\2\2\2\u0081\u0080\3\2\2\2\u0082") + buf.write("\25\3\2\2\2\26\27\36\"(,\60ADFT\\bgknsvx}\u0081") return buf.getvalue() @@ -24,16 +66,62 @@ class udunits2Parser ( Parser ): sharedContextCache = PredictionContextCache() - literalNames = [ ] + literalNames = [ "", "", "'+'", "", "", + "'.'", "'('", "')'", "':'", "", "", + "", "", "", "", + "", "", "", "", + "", "", "", "", + "", "'-'" ] - symbolicNames = [ "", "ALL" ] + symbolicNames = [ "", "SIGNED_INT", "PLUS", "MULTIPLY", "DIVIDE", + "PERIOD", "OPEN_PAREN", "CLOSE_PAREN", "SEMICOLON", + "INT", "E_POWER", "FLOAT", "SHIFT_OP", "UNICODE_EXPONENT", + "RAISE", "LOG", "LOGREF", "ID", "LATIN_SUBSET", "WS", + "ERRORCHARACTER", "TIMEZONE", "HOUR_MINUTE_SECOND", + "HOUR_MINUTE", "M_MINUS", "DATE", "TIMESTAMP", "DT_T_CLOCK" ] RULE_unit_spec = 0 - - ruleNames = [ "unit_spec" ] + RULE_shift_spec = 1 + RULE_product = 2 + RULE_power = 3 + RULE_basic_spec = 4 + RULE_integer = 5 + RULE_number = 6 + RULE_timestamp = 7 + RULE_signed_clock = 8 + RULE_timezone_offset = 9 + + ruleNames = [ "unit_spec", "shift_spec", "product", "power", "basic_spec", + "integer", "number", "timestamp", "signed_clock", "timezone_offset" ] EOF = Token.EOF - ALL=1 + SIGNED_INT=1 + PLUS=2 + MULTIPLY=3 + DIVIDE=4 + PERIOD=5 + OPEN_PAREN=6 + CLOSE_PAREN=7 + SEMICOLON=8 + INT=9 + E_POWER=10 + FLOAT=11 + SHIFT_OP=12 + UNICODE_EXPONENT=13 + RAISE=14 + LOG=15 + LOGREF=16 + ID=17 + LATIN_SUBSET=18 + WS=19 + ERRORCHARACTER=20 + TIMEZONE=21 + HOUR_MINUTE_SECOND=22 + HOUR_MINUTE=23 + M_MINUS=24 + DATE=25 + TIMESTAMP=26 + DT_T_CLOCK=27 def __init__(self, input:TokenStream, output:TextIO = sys.stdout): super().__init__(input, output) @@ -50,19 +138,21 @@ def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): super().__init__(parent, invokingState) self.parser = parser - def ALL(self): - return self.getToken(udunits2Parser.ALL, 0) + def EOF(self): + return self.getToken(udunits2Parser.EOF, 0) + + def shift_spec(self): + return self.getTypedRuleContext(udunits2Parser.Shift_specContext,0) + def getRuleIndex(self): return udunits2Parser.RULE_unit_spec - def enterRule(self, listener:ParseTreeListener): - if hasattr( listener, "enterUnit_spec" ): - listener.enterUnit_spec(self) - - def exitRule(self, listener:ParseTreeListener): - if hasattr( listener, "exitUnit_spec" ): - listener.exitUnit_spec(self) + def accept(self, visitor:ParseTreeVisitor): + if hasattr( visitor, "visitUnit_spec" ): + return visitor.visitUnit_spec(self) + else: + return visitor.visitChildren(self) @@ -71,10 +161,135 @@ def unit_spec(self): localctx = udunits2Parser.Unit_specContext(self, self._ctx, self.state) self.enterRule(localctx, 0, self.RULE_unit_spec) + self._la = 0 # Token type try: self.enterOuterAlt(localctx, 1) - self.state = 2 - self.match(udunits2Parser.ALL) + self.state = 21 + self._errHandler.sync(self) + _la = self._input.LA(1) + if (((_la) & ~0x3f) == 0 and ((1 << _la) & ((1 << udunits2Parser.SIGNED_INT) | (1 << udunits2Parser.OPEN_PAREN) | (1 << udunits2Parser.INT) | (1 << udunits2Parser.FLOAT) | (1 << udunits2Parser.ID))) != 0): + self.state = 20 + self.shift_spec() + + + self.state = 23 + self.match(udunits2Parser.EOF) + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + class Shift_specContext(ParserRuleContext): + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def product(self): + return self.getTypedRuleContext(udunits2Parser.ProductContext,0) + + + def SHIFT_OP(self): + return self.getToken(udunits2Parser.SHIFT_OP, 0) + + def number(self): + return self.getTypedRuleContext(udunits2Parser.NumberContext,0) + + + def WS(self, i:int=None): + if i is None: + return self.getTokens(udunits2Parser.WS) + else: + return self.getToken(udunits2Parser.WS, i) + + def timestamp(self): + return self.getTypedRuleContext(udunits2Parser.TimestampContext,0) + + + def getRuleIndex(self): + return udunits2Parser.RULE_shift_spec + + def accept(self, visitor:ParseTreeVisitor): + if hasattr( visitor, "visitShift_spec" ): + return visitor.visitShift_spec(self) + else: + return visitor.visitChildren(self) + + + + + def shift_spec(self): + + localctx = udunits2Parser.Shift_specContext(self, self._ctx, self.state) + self.enterRule(localctx, 2, self.RULE_shift_spec) + self._la = 0 # Token type + try: + self.state = 46 + self._errHandler.sync(self) + la_ = self._interp.adaptivePredict(self._input,5,self._ctx) + if la_ == 1: + self.enterOuterAlt(localctx, 1) + self.state = 25 + self.product(0) + pass + + elif la_ == 2: + self.enterOuterAlt(localctx, 2) + self.state = 26 + self.product(0) + self.state = 28 + self._errHandler.sync(self) + _la = self._input.LA(1) + if _la==udunits2Parser.WS: + self.state = 27 + self.match(udunits2Parser.WS) + + + self.state = 30 + self.match(udunits2Parser.SHIFT_OP) + self.state = 32 + self._errHandler.sync(self) + _la = self._input.LA(1) + if _la==udunits2Parser.WS: + self.state = 31 + self.match(udunits2Parser.WS) + + + self.state = 34 + self.number() + pass + + elif la_ == 3: + self.enterOuterAlt(localctx, 3) + self.state = 36 + self.product(0) + self.state = 38 + self._errHandler.sync(self) + _la = self._input.LA(1) + if _la==udunits2Parser.WS: + self.state = 37 + self.match(udunits2Parser.WS) + + + self.state = 40 + self.match(udunits2Parser.SHIFT_OP) + self.state = 42 + self._errHandler.sync(self) + _la = self._input.LA(1) + if _la==udunits2Parser.WS: + self.state = 41 + self.match(udunits2Parser.WS) + + + self.state = 44 + self.timestamp() + pass + + except RecognitionException as re: localctx.exception = re self._errHandler.reportError(self, re) @@ -84,6 +299,684 @@ def unit_spec(self): return localctx + class ProductContext(ParserRuleContext): + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def power(self): + return self.getTypedRuleContext(udunits2Parser.PowerContext,0) + + + def product(self): + return self.getTypedRuleContext(udunits2Parser.ProductContext,0) + + + def MULTIPLY(self): + return self.getToken(udunits2Parser.MULTIPLY, 0) + + def DIVIDE(self): + return self.getToken(udunits2Parser.DIVIDE, 0) + + def WS(self, i:int=None): + if i is None: + return self.getTokens(udunits2Parser.WS) + else: + return self.getToken(udunits2Parser.WS, i) + + def getRuleIndex(self): + return udunits2Parser.RULE_product + + def accept(self, visitor:ParseTreeVisitor): + if hasattr( visitor, "visitProduct" ): + return visitor.visitProduct(self) + else: + return visitor.visitChildren(self) + + + + def product(self, _p:int=0): + _parentctx = self._ctx + _parentState = self.state + localctx = udunits2Parser.ProductContext(self, self._ctx, _parentState) + _prevctx = localctx + _startState = 4 + self.enterRecursionRule(localctx, 4, self.RULE_product, _p) + self._la = 0 # Token type + try: + self.enterOuterAlt(localctx, 1) + self.state = 49 + self.power() + self._ctx.stop = self._input.LT(-1) + self.state = 68 + self._errHandler.sync(self) + _alt = self._interp.adaptivePredict(self._input,8,self._ctx) + while _alt!=2 and _alt!=ATN.INVALID_ALT_NUMBER: + if _alt==1: + if self._parseListeners is not None: + self.triggerExitRuleEvent() + _prevctx = localctx + self.state = 66 + self._errHandler.sync(self) + la_ = self._interp.adaptivePredict(self._input,7,self._ctx) + if la_ == 1: + localctx = udunits2Parser.ProductContext(self, _parentctx, _parentState) + self.pushNewRecursionContext(localctx, _startState, self.RULE_product) + self.state = 51 + if not self.precpred(self._ctx, 4): + from antlr4.error.Errors import FailedPredicateException + raise FailedPredicateException(self, "self.precpred(self._ctx, 4)") + self.state = 52 + self.power() + pass + + elif la_ == 2: + localctx = udunits2Parser.ProductContext(self, _parentctx, _parentState) + self.pushNewRecursionContext(localctx, _startState, self.RULE_product) + self.state = 53 + if not self.precpred(self._ctx, 3): + from antlr4.error.Errors import FailedPredicateException + raise FailedPredicateException(self, "self.precpred(self._ctx, 3)") + self.state = 54 + self.match(udunits2Parser.MULTIPLY) + self.state = 55 + self.power() + pass + + elif la_ == 3: + localctx = udunits2Parser.ProductContext(self, _parentctx, _parentState) + self.pushNewRecursionContext(localctx, _startState, self.RULE_product) + self.state = 56 + if not self.precpred(self._ctx, 2): + from antlr4.error.Errors import FailedPredicateException + raise FailedPredicateException(self, "self.precpred(self._ctx, 2)") + self.state = 57 + self.match(udunits2Parser.DIVIDE) + self.state = 58 + self.power() + pass + + elif la_ == 4: + localctx = udunits2Parser.ProductContext(self, _parentctx, _parentState) + self.pushNewRecursionContext(localctx, _startState, self.RULE_product) + self.state = 59 + if not self.precpred(self._ctx, 1): + from antlr4.error.Errors import FailedPredicateException + raise FailedPredicateException(self, "self.precpred(self._ctx, 1)") + self.state = 61 + self._errHandler.sync(self) + _la = self._input.LA(1) + while True: + self.state = 60 + self.match(udunits2Parser.WS) + self.state = 63 + self._errHandler.sync(self) + _la = self._input.LA(1) + if not (_la==udunits2Parser.WS): + break + + self.state = 65 + self.power() + pass + + + self.state = 70 + self._errHandler.sync(self) + _alt = self._interp.adaptivePredict(self._input,8,self._ctx) + + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.unrollRecursionContexts(_parentctx) + return localctx + + + class PowerContext(ParserRuleContext): + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def basic_spec(self): + return self.getTypedRuleContext(udunits2Parser.Basic_specContext,0) + + + def integer(self): + return self.getTypedRuleContext(udunits2Parser.IntegerContext,0) + + + def RAISE(self): + return self.getToken(udunits2Parser.RAISE, 0) + + def UNICODE_EXPONENT(self): + return self.getToken(udunits2Parser.UNICODE_EXPONENT, 0) + + def getRuleIndex(self): + return udunits2Parser.RULE_power + + def accept(self, visitor:ParseTreeVisitor): + if hasattr( visitor, "visitPower" ): + return visitor.visitPower(self) + else: + return visitor.visitChildren(self) + + + + + def power(self): + + localctx = udunits2Parser.PowerContext(self, self._ctx, self.state) + self.enterRule(localctx, 6, self.RULE_power) + try: + self.state = 82 + self._errHandler.sync(self) + la_ = self._interp.adaptivePredict(self._input,9,self._ctx) + if la_ == 1: + self.enterOuterAlt(localctx, 1) + self.state = 71 + self.basic_spec() + self.state = 72 + self.integer() + pass + + elif la_ == 2: + self.enterOuterAlt(localctx, 2) + self.state = 74 + self.basic_spec() + pass + + elif la_ == 3: + self.enterOuterAlt(localctx, 3) + self.state = 75 + self.basic_spec() + self.state = 76 + self.match(udunits2Parser.RAISE) + self.state = 77 + self.integer() + pass + + elif la_ == 4: + self.enterOuterAlt(localctx, 4) + self.state = 79 + self.basic_spec() + self.state = 80 + self.match(udunits2Parser.UNICODE_EXPONENT) + pass + + + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + class Basic_specContext(ParserRuleContext): + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def ID(self): + return self.getToken(udunits2Parser.ID, 0) + + def OPEN_PAREN(self): + return self.getToken(udunits2Parser.OPEN_PAREN, 0) + + def shift_spec(self): + return self.getTypedRuleContext(udunits2Parser.Shift_specContext,0) + + + def CLOSE_PAREN(self): + return self.getToken(udunits2Parser.CLOSE_PAREN, 0) + + def number(self): + return self.getTypedRuleContext(udunits2Parser.NumberContext,0) + + + def getRuleIndex(self): + return udunits2Parser.RULE_basic_spec + + def accept(self, visitor:ParseTreeVisitor): + if hasattr( visitor, "visitBasic_spec" ): + return visitor.visitBasic_spec(self) + else: + return visitor.visitChildren(self) + + + + + def basic_spec(self): + + localctx = udunits2Parser.Basic_specContext(self, self._ctx, self.state) + self.enterRule(localctx, 8, self.RULE_basic_spec) + try: + self.state = 90 + self._errHandler.sync(self) + token = self._input.LA(1) + if token in [udunits2Parser.ID]: + self.enterOuterAlt(localctx, 1) + self.state = 84 + self.match(udunits2Parser.ID) + pass + elif token in [udunits2Parser.OPEN_PAREN]: + self.enterOuterAlt(localctx, 2) + self.state = 85 + self.match(udunits2Parser.OPEN_PAREN) + self.state = 86 + self.shift_spec() + self.state = 87 + self.match(udunits2Parser.CLOSE_PAREN) + pass + elif token in [udunits2Parser.SIGNED_INT, udunits2Parser.INT, udunits2Parser.FLOAT]: + self.enterOuterAlt(localctx, 3) + self.state = 89 + self.number() + pass + else: + raise NoViableAltException(self) + + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + class IntegerContext(ParserRuleContext): + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def INT(self): + return self.getToken(udunits2Parser.INT, 0) + + def SIGNED_INT(self): + return self.getToken(udunits2Parser.SIGNED_INT, 0) + + def getRuleIndex(self): + return udunits2Parser.RULE_integer + + def accept(self, visitor:ParseTreeVisitor): + if hasattr( visitor, "visitInteger" ): + return visitor.visitInteger(self) + else: + return visitor.visitChildren(self) + + + + + def integer(self): + + localctx = udunits2Parser.IntegerContext(self, self._ctx, self.state) + self.enterRule(localctx, 10, self.RULE_integer) + self._la = 0 # Token type + try: + self.enterOuterAlt(localctx, 1) + self.state = 92 + _la = self._input.LA(1) + if not(_la==udunits2Parser.SIGNED_INT or _la==udunits2Parser.INT): + self._errHandler.recoverInline(self) + else: + self._errHandler.reportMatch(self) + self.consume() + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + class NumberContext(ParserRuleContext): + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def integer(self): + return self.getTypedRuleContext(udunits2Parser.IntegerContext,0) + + + def FLOAT(self): + return self.getToken(udunits2Parser.FLOAT, 0) + + def getRuleIndex(self): + return udunits2Parser.RULE_number + + def accept(self, visitor:ParseTreeVisitor): + if hasattr( visitor, "visitNumber" ): + return visitor.visitNumber(self) + else: + return visitor.visitChildren(self) + + + + + def number(self): + + localctx = udunits2Parser.NumberContext(self, self._ctx, self.state) + self.enterRule(localctx, 12, self.RULE_number) + try: + self.state = 96 + self._errHandler.sync(self) + token = self._input.LA(1) + if token in [udunits2Parser.SIGNED_INT, udunits2Parser.INT]: + self.enterOuterAlt(localctx, 1) + self.state = 94 + self.integer() + pass + elif token in [udunits2Parser.FLOAT]: + self.enterOuterAlt(localctx, 2) + self.state = 95 + self.match(udunits2Parser.FLOAT) + pass + else: + raise NoViableAltException(self) + + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + class TimestampContext(ParserRuleContext): + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def DATE(self): + return self.getToken(udunits2Parser.DATE, 0) + + def INT(self): + return self.getToken(udunits2Parser.INT, 0) + + def signed_clock(self): + return self.getTypedRuleContext(udunits2Parser.Signed_clockContext,0) + + + def WS(self, i:int=None): + if i is None: + return self.getTokens(udunits2Parser.WS) + else: + return self.getToken(udunits2Parser.WS, i) + + def timezone_offset(self): + return self.getTypedRuleContext(udunits2Parser.Timezone_offsetContext,0) + + + def DT_T_CLOCK(self): + return self.getToken(udunits2Parser.DT_T_CLOCK, 0) + + def TIMESTAMP(self): + return self.getToken(udunits2Parser.TIMESTAMP, 0) + + def getRuleIndex(self): + return udunits2Parser.RULE_timestamp + + def accept(self, visitor:ParseTreeVisitor): + if hasattr( visitor, "visitTimestamp" ): + return visitor.visitTimestamp(self) + else: + return visitor.visitChildren(self) + + + + + def timestamp(self): + + localctx = udunits2Parser.TimestampContext(self, self._ctx, self.state) + self.enterRule(localctx, 14, self.RULE_timestamp) + self._la = 0 # Token type + try: + self.state = 118 + self._errHandler.sync(self) + la_ = self._interp.adaptivePredict(self._input,17,self._ctx) + if la_ == 1: + self.enterOuterAlt(localctx, 1) + self.state = 98 + _la = self._input.LA(1) + if not(_la==udunits2Parser.INT or _la==udunits2Parser.DATE): + self._errHandler.recoverInline(self) + else: + self._errHandler.reportMatch(self) + self.consume() + pass + + elif la_ == 2: + self.enterOuterAlt(localctx, 2) + self.state = 99 + _la = self._input.LA(1) + if not(_la==udunits2Parser.INT or _la==udunits2Parser.DATE): + self._errHandler.recoverInline(self) + else: + self._errHandler.reportMatch(self) + self.consume() + self.state = 101 + self._errHandler.sync(self) + _la = self._input.LA(1) + if _la==udunits2Parser.WS: + self.state = 100 + self.match(udunits2Parser.WS) + + + self.state = 103 + self.signed_clock() + self.state = 108 + self._errHandler.sync(self) + _la = self._input.LA(1) + if (((_la) & ~0x3f) == 0 and ((1 << _la) & ((1 << udunits2Parser.SIGNED_INT) | (1 << udunits2Parser.INT) | (1 << udunits2Parser.WS) | (1 << udunits2Parser.HOUR_MINUTE))) != 0): + self.state = 105 + self._errHandler.sync(self) + _la = self._input.LA(1) + if _la==udunits2Parser.WS: + self.state = 104 + self.match(udunits2Parser.WS) + + + self.state = 107 + self.timezone_offset() + + + pass + + elif la_ == 3: + self.enterOuterAlt(localctx, 3) + self.state = 110 + self.match(udunits2Parser.DT_T_CLOCK) + pass + + elif la_ == 4: + self.enterOuterAlt(localctx, 4) + self.state = 111 + self.match(udunits2Parser.TIMESTAMP) + self.state = 113 + self._errHandler.sync(self) + _la = self._input.LA(1) + if _la==udunits2Parser.WS: + self.state = 112 + self.match(udunits2Parser.WS) + + + self.state = 116 + self._errHandler.sync(self) + _la = self._input.LA(1) + if (((_la) & ~0x3f) == 0 and ((1 << _la) & ((1 << udunits2Parser.SIGNED_INT) | (1 << udunits2Parser.INT) | (1 << udunits2Parser.HOUR_MINUTE))) != 0): + self.state = 115 + self.timezone_offset() + + + pass + + + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + class Signed_clockContext(ParserRuleContext): + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def HOUR_MINUTE_SECOND(self): + return self.getToken(udunits2Parser.HOUR_MINUTE_SECOND, 0) + + def HOUR_MINUTE(self): + return self.getToken(udunits2Parser.HOUR_MINUTE, 0) + + def integer(self): + return self.getTypedRuleContext(udunits2Parser.IntegerContext,0) + + + def getRuleIndex(self): + return udunits2Parser.RULE_signed_clock + + def accept(self, visitor:ParseTreeVisitor): + if hasattr( visitor, "visitSigned_clock" ): + return visitor.visitSigned_clock(self) + else: + return visitor.visitChildren(self) + + + + + def signed_clock(self): + + localctx = udunits2Parser.Signed_clockContext(self, self._ctx, self.state) + self.enterRule(localctx, 16, self.RULE_signed_clock) + try: + self.state = 123 + self._errHandler.sync(self) + token = self._input.LA(1) + if token in [udunits2Parser.HOUR_MINUTE_SECOND]: + self.enterOuterAlt(localctx, 1) + self.state = 120 + self.match(udunits2Parser.HOUR_MINUTE_SECOND) + pass + elif token in [udunits2Parser.HOUR_MINUTE]: + self.enterOuterAlt(localctx, 2) + self.state = 121 + self.match(udunits2Parser.HOUR_MINUTE) + pass + elif token in [udunits2Parser.SIGNED_INT, udunits2Parser.INT]: + self.enterOuterAlt(localctx, 3) + self.state = 122 + self.integer() + pass + else: + raise NoViableAltException(self) + + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + class Timezone_offsetContext(ParserRuleContext): + + def __init__(self, parser, parent:ParserRuleContext=None, invokingState:int=-1): + super().__init__(parent, invokingState) + self.parser = parser + + def HOUR_MINUTE(self): + return self.getToken(udunits2Parser.HOUR_MINUTE, 0) + + def integer(self): + return self.getTypedRuleContext(udunits2Parser.IntegerContext,0) + + + def getRuleIndex(self): + return udunits2Parser.RULE_timezone_offset + + def accept(self, visitor:ParseTreeVisitor): + if hasattr( visitor, "visitTimezone_offset" ): + return visitor.visitTimezone_offset(self) + else: + return visitor.visitChildren(self) + + + + + def timezone_offset(self): + + localctx = udunits2Parser.Timezone_offsetContext(self, self._ctx, self.state) + self.enterRule(localctx, 18, self.RULE_timezone_offset) + try: + self.state = 127 + self._errHandler.sync(self) + token = self._input.LA(1) + if token in [udunits2Parser.HOUR_MINUTE]: + self.enterOuterAlt(localctx, 1) + self.state = 125 + self.match(udunits2Parser.HOUR_MINUTE) + pass + elif token in [udunits2Parser.SIGNED_INT, udunits2Parser.INT]: + self.enterOuterAlt(localctx, 2) + self.state = 126 + self.integer() + pass + else: + raise NoViableAltException(self) + + except RecognitionException as re: + localctx.exception = re + self._errHandler.reportError(self, re) + self._errHandler.recover(self, re) + finally: + self.exitRule() + return localctx + + + + def sempred(self, localctx:RuleContext, ruleIndex:int, predIndex:int): + if self._predicates == None: + self._predicates = dict() + self._predicates[2] = self.product_sempred + pred = self._predicates.get(ruleIndex, None) + if pred is None: + raise Exception("No predicate with index:" + str(ruleIndex)) + else: + return pred(localctx, predIndex) + + def product_sempred(self, localctx:ProductContext, predIndex:int): + if predIndex == 0: + return self.precpred(self._ctx, 4) + + + if predIndex == 1: + return self.precpred(self._ctx, 3) + + + if predIndex == 2: + return self.precpred(self._ctx, 2) + + + if predIndex == 3: + return self.precpred(self._ctx, 1) + + diff --git a/cf_units/_udunits2_parser/parser/udunits2ParserListener.py b/cf_units/_udunits2_parser/parser/udunits2ParserListener.py deleted file mode 100644 index 6e699a30..00000000 --- a/cf_units/_udunits2_parser/parser/udunits2ParserListener.py +++ /dev/null @@ -1,19 +0,0 @@ -# Generated from udunits2Parser.g4 by ANTLR 4.7.2 -from antlr4 import * -if __name__ is not None and "." in __name__: - from .udunits2Parser import udunits2Parser -else: - from udunits2Parser import udunits2Parser - -# This class defines a complete listener for a parse tree produced by udunits2Parser. -class udunits2ParserListener(ParseTreeListener): - - # Enter a parse tree produced by udunits2Parser#unit_spec. - def enterUnit_spec(self, ctx:udunits2Parser.Unit_specContext): - pass - - # Exit a parse tree produced by udunits2Parser#unit_spec. - def exitUnit_spec(self, ctx:udunits2Parser.Unit_specContext): - pass - - diff --git a/cf_units/_udunits2_parser/parser/udunits2ParserVisitor.py b/cf_units/_udunits2_parser/parser/udunits2ParserVisitor.py new file mode 100644 index 00000000..338b9a07 --- /dev/null +++ b/cf_units/_udunits2_parser/parser/udunits2ParserVisitor.py @@ -0,0 +1,63 @@ +# Generated from /Users/pelson/dev/scitools/cf-units/cf_units/_udunits2_parser/udunits2Parser.g4 by ANTLR 4.7.2 +from antlr4 import * +if __name__ is not None and "." in __name__: + from .udunits2Parser import udunits2Parser +else: + from udunits2Parser import udunits2Parser + +# This class defines a complete generic visitor for a parse tree produced by udunits2Parser. + +class udunits2ParserVisitor(ParseTreeVisitor): + + # Visit a parse tree produced by udunits2Parser#unit_spec. + def visitUnit_spec(self, ctx:udunits2Parser.Unit_specContext): + return self.visitChildren(ctx) + + + # Visit a parse tree produced by udunits2Parser#shift_spec. + def visitShift_spec(self, ctx:udunits2Parser.Shift_specContext): + return self.visitChildren(ctx) + + + # Visit a parse tree produced by udunits2Parser#product. + def visitProduct(self, ctx:udunits2Parser.ProductContext): + return self.visitChildren(ctx) + + + # Visit a parse tree produced by udunits2Parser#power. + def visitPower(self, ctx:udunits2Parser.PowerContext): + return self.visitChildren(ctx) + + + # Visit a parse tree produced by udunits2Parser#basic_spec. + def visitBasic_spec(self, ctx:udunits2Parser.Basic_specContext): + return self.visitChildren(ctx) + + + # Visit a parse tree produced by udunits2Parser#integer. + def visitInteger(self, ctx:udunits2Parser.IntegerContext): + return self.visitChildren(ctx) + + + # Visit a parse tree produced by udunits2Parser#number. + def visitNumber(self, ctx:udunits2Parser.NumberContext): + return self.visitChildren(ctx) + + + # Visit a parse tree produced by udunits2Parser#timestamp. + def visitTimestamp(self, ctx:udunits2Parser.TimestampContext): + return self.visitChildren(ctx) + + + # Visit a parse tree produced by udunits2Parser#signed_clock. + def visitSigned_clock(self, ctx:udunits2Parser.Signed_clockContext): + return self.visitChildren(ctx) + + + # Visit a parse tree produced by udunits2Parser#timezone_offset. + def visitTimezone_offset(self, ctx:udunits2Parser.Timezone_offsetContext): + return self.visitChildren(ctx) + + + +del udunits2Parser \ No newline at end of file diff --git a/cf_units/_udunits2_parser/udunits2Lexer.g4.jinja b/cf_units/_udunits2_parser/udunits2Lexer.g4.jinja index a15f500b..58ad2a3e 100644 --- a/cf_units/_udunits2_parser/udunits2Lexer.g4.jinja +++ b/cf_units/_udunits2_parser/udunits2Lexer.g4.jinja @@ -1,3 +1,156 @@ +// Derived from https://www.unidata.ucar.edu/software/udunits/udunits-2.0.4/udunits2lib.html#Grammar + lexer grammar udunits2Lexer; -ALL: .; +// Whitespace is significant, and is not ignored. +// For example: "m 2" == "m*2", yet "m2" == "m^2". + + +SIGNED_INT : (MINUS|PLUS) INT ; + +PLUS: '+' ; +fragment MINUS: '-' ; +MULTIPLY: ('*' | '·' | MINUS) ; +DIVIDE: WS* ('/' | ' PER ' | ' per ') WS*; +PERIOD: '.' ; + +OPEN_PAREN: '(' ; +CLOSE_PAREN: ')' ; +SEMICOLON: ':' ; + +INT : '0'..'9'+ ; + + +fragment ANY_INT: INT | SIGNED_INT ; +E_POWER: ('E' | 'e') ANY_INT ; +FLOAT: + // NOTE: floats themselves are context sensitive. (e.g. m2.3 === m^2 * 3 in udunits2) + // For this reason, FLOATS are parsed *after* MULTIPLY (which contains '.'). + // This behaviour is reversed immediate after seeing an ID token. + // Example: -2.e5 + (((ANY_INT PERIOD INT?) + |(ANY_INT? PERIOD INT) + ) E_POWER?) // 1.2e-5, 1e2, 2.e4 + | (ANY_INT E_POWER) +; + + +SHIFT_OP: + ( '@' + | 'after' + | 'from' + | 'since' + | 'ref' + ) -> pushMode(SHIFT_MODE) +; + + +UNICODE_EXPONENT: + // One or more ISO-8859-9 encoded exponent characters + ('⁻' | '⁺' | '¹' | '²' | '³' | '⁴' | '⁵' | '⁶' | '⁷' | '⁸' | '⁹' | '⁰')+ +; + +RAISE : + ( '^' + | '**' + ) +; + +LOG: 'log' | 'lg' | 'ln' | 'lb'; + +LOGREF: '(' WS* ('RE' | 're') ':'? WS*; + +//ID: one of +// +// '%' +// "'" +// "\"" +// degree sign +// greek mu character +// + +ID: ([A-Za-z_] | LATIN_SUBSET)+ -> mode(ID_SEEN); + +LATIN_SUBSET: + '\u00C0'..'\u00D6' // UDUNITS implementation "\xc3([\x80-\x96])" + | '\u00D8'..'\u00F6' // UDUNITS implementation "\xc3([\x98-\xB6])" + | '\u00F8'..'\u00FF' // UDUNITS implementation "\xc3([\xB8-\xBF])" + | '\u0080' | '\u00AD' // Non-breaking space. " ". Why is this in UDUNITS?! + | '\u00B0' // Degree symbol ° + | '\u00B5' // Mu µ + | 'π' | 'Ω' // NOTE: Other symbols are allowed, as long as they are defined in the XML. +; + +WS : [ ] ; + +// Any characters which fail to match should raise an error. +ERRORCHARACTER : . ; + + +mode SHIFT_MODE; +// This mode is only enabled after seeing a SHIFT_OP token. +// However, it is worth noting that the simplest form is just a number (e.g. m@10). + +// S_WS: ' ' -> skip; + + +TIMEZONE: 'UTC' | 'Z' | 'GMT'; + +fragment SIGN: + PLUS | MINUS +; + +fragment HOUR: + // NOTE: -19 is fine, -20 is not (in the current udunits-2 implementation). + (SIGN? ('0'..'1')? ('0'..'9')) | (('2' ('0'..'3'))) +; + +fragment MINUTE: + ('0'..'5')? ('0'..'9') +; + +fragment SECOND: + (MINUTE | '60') (PERIOD ('0'..'9')*)? +; + +fragment MONTH: + ('0'? ('1'..'9')) | ('1' ('0'..'2')) +; + +fragment DAY: + ('0'? ('1'..'9')) | (('1'..'2')('0'..'9')) | '30' | '31' +; + +fragment YEAR: + ANY_INT INT? INT? INT? // e.g. 9, 0001, 150, +2001 (=year 200 in UDUNITS2 implementation) +; + +HOUR_MINUTE_SECOND: HOUR ':' MINUTE ':' SECOND; +HOUR_MINUTE: HOUR ':' MINUTE; +M_MINUS: '-'; +DATE: (YEAR MINUS MONTH (MINUS DAY)?); + +fragment CLOCK: HOUR (MINUTE SECOND?)?; + +TIMESTAMP: (YEAR (MONTH DAY?)? 'T' CLOCK); + +DT_T_CLOCK: DATE 'T' CLOCK; // UNDOCUMENTED + +{% for TOKEN in tokens['DEFAULT_MODE'] -%} + SHIFT_MODE_{{ TOKEN }}: {{TOKEN}} -> type({{TOKEN}}); +{% endfor %} + + +mode ID_SEEN; + +ID_SEEN_SIGNED_INT: SIGNED_INT -> type(SIGNED_INT), mode(DEFAULT_MODE); + +EXTRA_MULTIPLY: ( + '.' // m.m, m.2 (=2m) + | '-' // m-m (=m^2), m--2 (=-2m) +) -> type(MULTIPLY), mode(DEFAULT_MODE); + +{% for TOKEN in tokens['DEFAULT_MODE'] if TOKEN not in ['FLOAT'] -%} + ID_SEEN_AUTO_{{ TOKEN }}: {{TOKEN}} -> type({{TOKEN}}), mode(DEFAULT_MODE); +{% endfor %} +// inherit // from DEFAULT_MODE: *; diff --git a/cf_units/_udunits2_parser/udunits2Parser.g4 b/cf_units/_udunits2_parser/udunits2Parser.g4 index 5a86002d..014581bf 100644 --- a/cf_units/_udunits2_parser/udunits2Parser.g4 +++ b/cf_units/_udunits2_parser/udunits2Parser.g4 @@ -1,6 +1,73 @@ +// Derived from https://www.unidata.ucar.edu/software/udunits/udunits-2.0.4/udunits2lib.html#Grammar + parser grammar udunits2Parser; - + +// Use tokens from our UDUNITS2 lex rules. options { tokenVocab=udunits2Lexer; } -unit_spec: ALL; +unit_spec: + shift_spec? EOF // Zero or one "shift_spec", followed by the end of the input. +; + +shift_spec: + product + | product WS? SHIFT_OP WS? number // e.g. Kelvin @ 273.15 + | product WS? SHIFT_OP WS? timestamp // e.g. hours since 2001-12-31 23:59:59.999 +6 +; + +product: + power + | product power // e.g. m2s (s*m^2) + | product MULTIPLY power // e.g. m2*s + | product DIVIDE power // e.g. m2/2 + | product WS+ power // e.g. "m2 s" +; + +power: + basic_spec integer // e.g. m+2, m2. Note that this occurs *before* basic_spec, + // as m2 should be matched before m for precendence of power + // being greater than multiplication (e.g. m2==m^2, not m*2). + | basic_spec + | basic_spec RAISE integer // e.g. m^2 + | basic_spec UNICODE_EXPONENT // e.g. m² +; + +basic_spec: + ID + | '(' shift_spec ')' +// Log not yet implemented, but it is supported in UDUNITS2. +// | LOGREF product_spec ')' + | number +; + +integer: + INT | SIGNED_INT +; + +number: + integer | FLOAT +; + + +timestamp: + (DATE | INT) // e.g "s since 1990", "s since 1990:01[:02]" + + | ((DATE | INT) WS? signed_clock (WS? timezone_offset)?) // e.g. "s since 1990:01:01 12:21 +6 + + | DT_T_CLOCK // e.g. "s since 1990:01:02T1900" + | (TIMESTAMP WS? timezone_offset?) // e.g. "s since 19900101T190030" +; + +signed_clock: + HOUR_MINUTE_SECOND // e.g. 10:11:12 + | HOUR_MINUTE // e.g. 10:11 + | integer // e.g. +101112 +; + +timezone_offset: + HOUR_MINUTE // e.g. 10:11 + | integer // e.g. 1011 + // NOTE: UDUNITS2 also supports named timezones, but these aren't documented + // in the grammar, and aren't yet implemented here. +; diff --git a/cf_units/conftest.py b/cf_units/conftest.py index 2c86725d..f8adaba4 100644 --- a/cf_units/conftest.py +++ b/cf_units/conftest.py @@ -36,7 +36,13 @@ all_compiled_parse_py = glob.glob( os.path.join(here, '_udunits2_parser', '*.py')) + # Files under cf_units/tests/integration/parse are python3 *only*. + parse_test_files = glob.glob( + os.path.join(here, 'tests', 'integration', 'parse', '*.py')) + # collect_ignore is the special variable that pytest reads to # indicate which files should be ignored (and not even imported). # See also https://docs.pytest.org/en/latest/example/pythoncollection.html - collect_ignore = list(all_parse_py) + list(all_compiled_parse_py) + collect_ignore = (list(all_parse_py) + + list(all_compiled_parse_py) + + list(parse_test_files)) diff --git a/cf_units/tests/integration/parse/test_graph.py b/cf_units/tests/integration/parse/test_graph.py new file mode 100644 index 00000000..2c8a6e7a --- /dev/null +++ b/cf_units/tests/integration/parse/test_graph.py @@ -0,0 +1,72 @@ +# (C) British Crown Copyright 2019, Met Office +# +# This file is part of cf-units. +# +# cf-units is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# cf-units is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with cf-units. If not, see . + +from cf_units._udunits2_parser import parse +import cf_units._udunits2_parser.graph as g + + +def test_Node_attributes(): + n = g.Node(a=1, kwarg='two', arbitrary_kwargs=3) + + assert n.a == 1 + assert n.kwarg == 'two' + assert n.arbitrary_kwargs == 3 + + +def test_Node_str(): + n = g.Node(a=1, kwarg='two', arbitrary_kwargs=3) + assert str(n) == "Node(a=1, kwarg='two', arbitrary_kwargs=3)" + + +def test_Node_children(): + n = g.Node(a=1, kwarg='two', arbitrary_kwargs=3) + # Ordered, and consistent. + assert n.children() == [1, 'two', 3] + + +def test_large_graph(): + graph = parse('m2/4.1.2π per second @ 10') + assert isinstance(graph, g.Shift) + + unit, shift_from = graph.children() + assert isinstance(shift_from, g.Number) + assert str(shift_from) == '10' + + assert isinstance(unit, g.Divide) + lhs, rhs = unit.children() + assert str(lhs) == 'm^2/4.1·.2·π' + assert str(rhs) == 'second' + + assert isinstance(lhs, g.Multiply) + lhs, rhs = lhs.children() + assert str(lhs) == 'm^2/4.1·.2' + assert str(rhs) == 'π' + + assert isinstance(lhs, g.Multiply) + lhs, rhs = lhs.children() + assert str(lhs) == 'm^2/4.1' + assert str(rhs) == '.2' + + assert isinstance(lhs, g.Divide) + lhs, rhs = lhs.children() + assert str(lhs) == 'm^2' + assert str(rhs) == '4.1' + + assert isinstance(lhs, g.Raise) + lhs, rhs = lhs.children() + assert str(lhs) == 'm' + assert str(rhs) == '2' diff --git a/cf_units/tests/integration/parse/test_parse.py b/cf_units/tests/integration/parse/test_parse.py new file mode 100644 index 00000000..c46ac31e --- /dev/null +++ b/cf_units/tests/integration/parse/test_parse.py @@ -0,0 +1,327 @@ +# (C) British Crown Copyright 2019, Met Office +# +# This file is part of cf-units. +# +# cf-units is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# cf-units is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with cf-units. If not, see . + +import re + +import pytest + +import cf_units +from cf_units._udunits2_parser import normalize + + +testdata = [ + '', + '1', + '12', + '1.2', + '+1', + '+1.2', + '-1', + '-1.2', + '-1.2e0', + '2e6', + '2e-6', + '2.e-6', + '.1e2', + '.1e2.2', + '2e', # <- TODO: Assert this isn't 2e1, but is infact the unit e *2 + 'm', + 'meter', + + # Multiplication + '1 2 3', + '1 -2 -3', + '1m', + '1*m', + 'm·m', + '1 m', + '1 m', + 'm -1', + 'm -1.2', + 'm 1', + 'm 1.2', + 'm-+2', + 'm--4', + 'm*1*2', + 'm--2--3', + + # TODO: add some tests with brackets. + 'm(2.3)', + 'm(2.3m)', + '(1.2)(2.4)', + '(5m(6s-1))', + '2*3*4/5m/6*7*8', + + + 'm/2', + 'm1', + 'm m', + 'm2', + 'm+2', + 'm¹', + 'm²', + 'm³', + '2⁴', # NOTE: Udunits can't do m⁴ for some reason. Bug? + '2⁵', + '2⁴²', + '3⁻²', + 'm2 s2', + 'm^2*s^2', + + '1-2', + '1-2-3', # nb. looks a bit like a date, but it isn't! + 'm-1', + 'm^2', + 'm^+2', + 'm^-1', + 'm.2', # This is 2*m + 'm.+2', # 2*m + 'm.2.4', # This is 2.4 * m + 'm0.2', # But this is 2 m^0 + 'm2.5', # And this is 5m^2 + 'm2.3.4', # 0.4 * m^2 + 'm--1', + + # Division + 'm per 2', + 'm per s', + 'm / 2', + + # Shift + 'm@10', + 'm @10', + 'm @ 10', + 'm@ 10', + 'm from2', + 'm from2e-1', + '(m @ 10) (s @ 10)', + + # Date shift + 's from 1990', + 'minutes since 1990', + 'hour@1990', + 'hours from 1990-1', + 'hours from 1990-1-1', + 'hours from 1990-1-1 0', + 'hours from 1990-1-1 0:1:1', + 'hours from 1990-1-1 0:0:1 +2', + 's since 1990-1-2+5:2:2', + 's since 1990-1-2+5:2', + 's since 1990-1-2 5 6:0', # Undocumented packed_clock format? + 's since 19900102T5', # Packed format (undocumented?) + 's since 19900101T190030 +2', + 's since 199022T1', # UGLY! (bug?). + + 's since 1990 +2:0:2.9', + 's since 1990-2T1', + 'hours from 1990-1-1 -19:4:2', + 'hours from 1990-1-1 3+1', + + 'seconds from 1990-1-1 0:0:0 +2550', + 's since 1990-1-2+5:2:2', + 'hours from 1990-1-1 0:1:60', + 'hours from 1990-1-1 0:1:62', + + '(hours since 1900) (s since 1980)', # Really fruity behaviour. + + # Unicode / constants + 'π', + 'e', + '°C', +] + +invalid = [ + '1 * m', + 'm--m', + '-m', + '.1e2.', + 'm+-1', + '--1', + '+-1', + '--3.1', + '$', + '£', + 'hours from 1990-0-0 0:0:0', + 'hours since 1900-1 10:12 10:0 1', + 's since 1990:01:02T1900 +1', +] + + +@pytest.mark.parametrize("_, unit_str", enumerate(testdata)) +def test_normed_units_equivalent(_, unit_str): + # nb: The "_" argument makes it easier to see which test was being run. + + # Get the udunits symbolic form for the raw unit. + raw_symbol = cf_units.Unit(unit_str).symbol + + # Now get the parsed form of the unit, and then convert that to + # symbolic form. The two should match. + unit_expr = normalize(unit_str) + parsed_expr_symbol = cf_units.Unit(unit_expr).symbol + + # Whilst the symbolic form from udunits is ugly, it *is* acurate, + # so check that the two represent the same unit. + assert raw_symbol == parsed_expr_symbol + + +udunits_bugs = [ + '2¹²³⁴⁵⁶⁷⁸⁹⁰', + 'm⁻²' +] + + +@pytest.mark.parametrize("_, unit_str", enumerate(invalid)) +def test_invalid_units(_, unit_str): + # Confirm that invalid udunits-2 units are also invalid in our grammar. + + try: + cf_units.Unit(unit_str) + cf_valid = True + except ValueError: + cf_valid = False + + # Double check that udunits2 can't parse this. + assert cf_valid is False, \ + 'Unit {!r} is unexpectedly valid in UDUNITS2'.format(unit_str) + + try: + normalize(unit_str) + can_parse = True + except SyntaxError: + can_parse = False + + # Now confirm that we couldn't parse this either. + msg = 'Parser unexpectedly able to deal with {}'.format(unit_str) + assert can_parse is False, msg + + +def multi_enumerate(items): + # Like enumerate, but flattens out the resulting index and items. + return [[i, *item] for i, item in enumerate(items)] + + +not_udunits = [ + ['foo', 'foo'], + ['mfrom1', 'mfrom^1'], + ['m⁴', 'm^4'], # udunits bug. + ['2¹²³⁴⁵⁶⁷⁸⁹⁰', '2^1234567890'], + + # Unicode (subset of the subset). + ['À'] * 2, + ['Á'] * 2, + ['Ö'] * 2, + ['Ø'] * 2, + ['ö'] * 2, + ['ø'] * 2, + ['ÿ'] * 2, + ['µ'] * 2, + ['µ°F·Ω⁻¹', 'µ°F·Ω^-1'], +] + + +@pytest.mark.parametrize("_, unit_str, expected", multi_enumerate(not_udunits)) +def test_invalid_in_udunits_but_still_parses(_, unit_str, expected): + # Some units read fine in our grammar, but not in UDUNITS. + + try: + cf_units.Unit(unit_str) + cf_valid = True + except ValueError: + cf_valid = False + + # Double check that udunits2 can't parse this. + assert cf_valid is False + + unit_expr = normalize(unit_str) + assert unit_expr == expected + + +known_issues = [ + # Disabled due to crazy results from UDUNITS. + ['s since +1990 +2:0:2.9', SyntaxError], + ['s since -1990 +2:0:2.9', SyntaxError], + + # The following are not yet implemented. + ['hours since 2001-12-31 23:59:59.999UTC', SyntaxError], + ['hours since 2001-12-31 23:59:59.999 Z', SyntaxError], + ['hours since 2001-12-31 23:59:59.999 GMT', SyntaxError], + ['0.1 lg(re 1 mW)', SyntaxError], +] + + +@pytest.mark.parametrize("_, unit_str, expected", + multi_enumerate(known_issues)) +def test_known_issues(_, unit_str, expected): + # Unfortunately the grammar is not perfect. + # These are the cases that don't work yet but which do work with udunits. + + # Make sure udunits can read it. + cf_units.Unit(unit_str).symbol + + if isinstance(expected, type) and issubclass(expected, Exception): + with pytest.raises(SyntaxError): + unit_expr = normalize(unit_str) + else: + unit_expr = normalize(unit_str) + assert unit_expr != expected + + +def test_syntax_parse_error_quality(): + # Check that the syntax error is giving us good context. + + msg = re.escape(r"no viable alternative at input 'm^m' (inline, line 1)") + with pytest.raises(SyntaxError, match=msg) as err: + normalize('m^m 2s') + # The problem is with the m after "^", so make sure the exception is + # pointing at it (including the leading speechmark). + assert err.value.offset == 4 + + +def test_unknown_symbol_error(): + msg = re.escape(r"mismatched input '×' expecting ") + with pytest.raises(SyntaxError, match=msg) as err: + # The × character is explicitly excluded in the UDUNITS2 + # implementation. It would make some sense to support it in the + # future though. + normalize('Thing×Another') + # The 7th character (including the speechmark) is the problem, check that + # the exception points at the right location. + # correct location... + # File "inline", line 1 + # 'Thing×Another' + # ^ + assert err.value.offset == 7 + + +not_allowed = [ + 'hours from 1990-1-1 -20:4:18 +2', + 'm++2', + 'm s^(-1)', + 'm per /s', +] + + +@pytest.mark.parametrize("_, unit_str", enumerate(not_allowed)) +def test_invalid_syntax_units(_, unit_str): + # Check that units that aren't allowed with UDUNITS-2 are also not + # allowed with our grammar. + + with pytest.raises(ValueError): + cf_units.Unit(unit_str).symbol + + with pytest.raises(SyntaxError): + normalize(unit_str)