Source code for finesse.script.parser

"""Kat parser to convert tokens to productions.

This is a recursive descent parser, providing unlimited lookahead capabilities to allow
arbitrary context-free grammars to be parsed. The implementation of memoization makes it
a *packrat* parser which runs in linear time at the expense of potentially unlimited
memory use. In practice memory use is limited by the simplicity of typical kat scripts
(e.g. expressions don't tend to have many subexpressions).

This is inspired by Python's PEG parser, used as of 3.9. Some introductory information
can be found in `this series of blog posts
<https://medium.com/@gvanrossum_83706/peg-parsing-series-de5d41b2ed60>`__.

Sean Leavey <sean.leavey@ligo.org>
"""

# NOTE: do not run black on this file!

import logging
from io import StringIO
from .containers import (
    KatScript,
    KatElement,
    KatFunction,
    KatKwarg,
    KatExpression,
    KatGroupedExpression,
    KatArray,
    KatNumericalArray
)
from .tokenizer import KatTokenizer
from .memoize import memoize, memoize_left_rec
from .exceptions import KatSyntaxError, KatMissingAfterDirective

LOGGER = logging.getLogger(__name__)


[docs]class KatParser: """Kat script parser. This uses so-called *packrat* parsing to reduce, via productions, tokens yielded from a token stream generated from an input file or string to :class:`.KatScriptItem` objects containing the associated :class:`tokens <.KatToken>`. """
[docs] def __init__(self): self.tokens = None self.pos = None self.memos = None self._tokenizer = None self._token_stream = None
@property def script(self): return self._tokenizer.script
[docs] def parse(self, string): """Parse the contents of `string`. Parameters ---------- string : :class:`str` The string to parse kat script from. Returns ------- :class:`.KatScript` The parsed kat script. """ return self.parse_file(StringIO(string))
[docs] def parse_file(self, fobj): """Parse the contents of the specified file. Parameters ---------- fobj : :class:`io.FileIO` The file object to parse kat script from. This should be opened in text mode. Returns ------- :class:`.KatScript` The parsed kat script. """ # Reset parser state. self.tokens = [] self.pos = 0 self.memos = {} self._log_stack = [] # Perform parse. self._tokenizer = KatTokenizer() self._token_stream = self._tokenizer.tokenize_file(fobj) if (script := self.expect_production("start")) is not None: return script # There was an error. self._diagnose_error()
def _diagnose_error(self): if not self.tokens: error_token = self.peek_token() else: error_token = self.tokens[-1] raise KatSyntaxError("syntax error", self.script, error_token)
[docs] def mark(self): return self.pos
[docs] def reset(self, pos): if pos == self.pos: return self.pos = pos
[docs] def get_token(self): token = self.peek_token() self.pos += 1 return token
[docs] def peek_token(self): if self.pos == len(self.tokens): self.tokens.append(next(self._token_stream)) return self.tokens[self.pos]
[docs] def positive_lookahead(self, token_type): pos = self.mark() token = self.peek_token() self.reset(pos) return token.type == token_type
[docs] def negative_lookahead(self, token_type): return not self.positive_lookahead(token_type)
[docs] def maybe_whitespace(self, multiline, whitespace): """Expect zero or more whitespace if enabled, else return [].""" if not whitespace: return [] return self.loop("empty", False, multiline)
[docs] def maybe_trailing_comma(self, multiline): pos = self.mark() if ( True and (empty := self.loop("empty", False, multiline)) is not None and (COMMA := self.expect_token("COMMA")) is not None ): return [*empty, COMMA] self.reset(pos) return []
@memoize def expect_token(self, arg): self._log_stack.append(arg) path = "->".join(self._log_stack) # path = arg LOGGER.debug(f"{path}?") token = self.peek_token() if token.type == arg: LOGGER.debug(f"{path} = {token!r}!") result = self.get_token() self._log_stack.pop() return result LOGGER.debug(f"{arg} not found") self._log_stack.pop()
[docs] def expect_production(self, production, *args, **kwargs): self._log_stack.append(production) path = "->".join(self._log_stack) # path = production LOGGER.debug(f"{path}?") result = getattr(self, production)(*args, **kwargs) if result is not None: LOGGER.debug(f"{path} = {result!r}!") self._log_stack.pop() return result LOGGER.debug(f"{production} not found") self._log_stack.pop()
[docs] def loop(self, production, nonempty, *args, **kwargs): mark = self.mark() nodes = [] while (node := self.expect_production(production, *args, **kwargs)) is not None: nodes.append(node) if len(nodes) >= nonempty: return nodes self.reset(mark)
[docs] def start(self): pos = self.mark() if ( True and (script_lines := self.loop("script_line", True)) is not None and self.expect_token("ENDMARKER") is not None ): statements = [] extra = [] for statement, line_extra in script_lines: if statement: statements.append(statement) extra.extend(line_extra) return KatScript(arguments=statements, extra=extra) self.reset(pos) # Empty file. if self.expect_token("ENDMARKER") is not None: return KatScript(arguments=[], extra=[]) self.reset(pos)
@memoize def script_line(self): pos = self.mark() # script_line -> script_line_empty* statement script_line_empty* NEWLINE if ( True and (script_line_empty1 := self.loop("script_line_empty", False)) is not None and (statement := self.expect_production("statement")) is not None and (script_line_empty2 := self.loop("script_line_empty", False)) is not None and (NEWLINE := self.expect_token("NEWLINE")) is not None ): extra = [*script_line_empty1, *script_line_empty2] # Ignore implicit newlines, which are metatokens. if hasattr(NEWLINE, "value"): extra.append(NEWLINE) return statement, extra self.reset(pos) # script_line -> script_line_empty* NEWLINE if ( True and (script_line_empty := self.loop("script_line_empty", False)) is not None and (NEWLINE := self.expect_token("NEWLINE")) is not None ): extra = [*script_line_empty] if hasattr(NEWLINE, "value"): # Newline is a real token. extra.append(NEWLINE) return None, extra self.reset(pos) if (error := self.expect_production("invalid_script_line")) is not None: raise error @memoize def script_line_empty(self): pos = self.mark() # script_line_empty -> WHITESPACE if (WHITESPACE := self.expect_token("WHITESPACE")) is not None: return WHITESPACE self.reset(pos) # script_line_empty -> COMMENT if (COMMENT := self.expect_token("COMMENT")) is not None: return COMMENT self.reset(pos) @memoize def empty(self, multiline): pos = self.mark() # empty -> WHITESPACE if (WHITESPACE := self.expect_token("WHITESPACE")) is not None: return WHITESPACE self.reset(pos) if multiline: # empty -> COMMENT if (COMMENT := self.expect_token("COMMENT")) is not None: return COMMENT self.reset(pos) # empty -> NEWLINE if (NEWLINE := self.expect_token("NEWLINE")) is not None: return NEWLINE self.reset(pos) @memoize def statement(self): pos = self.mark() # statement -> element if (element := self.expect_production("element")) is not None: return element self.reset(pos) # statement -> function if (function := self.expect_production("function", True)) is not None: return function self.reset(pos) @memoize def element(self): pos = self.mark() # element -> NAME WHITESPACE NAME WHITESPACE element_params if ( True and (DIRECTIVE := self.expect_token("NAME")) is not None and (WHITESPACE1 := self.expect_token("WHITESPACE")) is not None and (NAME := self.expect_token("NAME")) is not None and (WHITESPACE2 := self.expect_token("WHITESPACE")) is not None and (element_params := self.expect_production("element_params")) is not None ): arguments, params_extra = element_params return KatElement( directive=DIRECTIVE, name=NAME, arguments=arguments, extra=[WHITESPACE1, WHITESPACE2, *params_extra] ) self.reset(pos) # element -> NAME WHITESPACE NAME if ( True and (DIRECTIVE := self.expect_token("NAME")) is not None and (WHITESPACE := self.expect_token("WHITESPACE")) is not None and (NAME := self.expect_token("NAME")) is not None ): return KatElement( directive=DIRECTIVE, name=NAME, arguments=[], extra=[WHITESPACE] ) self.reset(pos) @memoize def element_params(self): pos = self.mark() # element_params -> element_value_list empty+ element_key_value_list if ( True and (element_value_list := self.expect_production("element_value_list")) is not None and (empty := self.loop("empty", True, False)) is not None and (element_key_value_list := self.expect_production("element_key_value_list")) is not None ): values, value_extra = element_value_list key_values, key_value_extra = element_key_value_list return ([*values, *key_values], [*value_extra, *empty, *key_value_extra]) self.reset(pos) # element_params -> element_value_list if (element_value_list := self.expect_production("element_value_list")) is not None: return element_value_list self.reset(pos) # element_params -> element_key_value_list if (element_key_value_list := self.expect_production("element_key_value_list")) is not None: return element_key_value_list self.reset(pos) @memoize def element_value_list(self): pos = self.mark() # element_value_list -> positional_value empty+ element_value_list if ( True and (positional_value := self.expect_production("positional_value", False, False)) is not None and (empty := self.loop("empty", True, False)) is not None and (element_value_list := self.expect_production("element_value_list")) is not None ): values, value_extra = element_value_list return ([positional_value, *values], [*empty, *value_extra]) self.reset(pos) # element_value_list -> positional_value if (positional_value := self.expect_production("positional_value", False, False)) is not None: return [positional_value], [] self.reset(pos) @memoize_left_rec def element_key_value_list(self): pos = self.mark() # element_key_value_list -> element_key_value_list empty+ element_key_value_list if ( True and (element_key_value_list1 := self.expect_production("element_key_value_list")) is not None and (empty := self.loop("empty", True, False)) is not None and (element_key_value_list2 := self.expect_production("element_key_value_list")) is not None ): key_values1, key_value_extra1 = element_key_value_list1 key_values2, key_value_extra2 = element_key_value_list2 return ( [*key_values1, *key_values2], [*key_value_extra1, *empty, *key_value_extra2] ) self.reset(pos) # element_key_value_list -> key_value if (key_value := self.expect_production("key_value", False, False)) is not None: return [key_value], [] self.reset(pos) @memoize def function(self, multiline): pos = self.mark() # function -> NAME '(' empty* function_params empty* ')' if ( True and (FUNCTION := self.expect_token("NAME")) is not None and (LPAREN := self.expect_token("LPAREN")) is not None and (empty1 := self.loop("empty", False, multiline)) is not None and (function_params := self.expect_production("function_params", multiline)) is not None and (empty2 := self.loop("empty", False, multiline)) is not None and (RPAREN := self.expect_token("RPAREN")) is not None ): args, extra = function_params return KatFunction( directive=FUNCTION, arguments=args, extra=[LPAREN, *empty1, *extra, *empty2, RPAREN] ) self.reset(pos) # function -> NAME '(' empty* ')' if ( True and (FUNCTION := self.expect_token("NAME")) is not None and (LPAREN := self.expect_token("LPAREN")) is not None and (empty := self.loop("empty", False, multiline)) is not None and (RPAREN := self.expect_token("RPAREN")) is not None ): return KatFunction( directive=FUNCTION, arguments=[], extra=[LPAREN, *empty, RPAREN] ) self.reset(pos) @memoize def function_params(self, multiline): pos = self.mark() # function_params -> function_value_list empty* ',' empty* function_key_value_list (empty* ',')? if ( True and (function_value_list := self.expect_production("function_value_list", multiline)) is not None and (empty1 := self.loop("empty", False, multiline)) is not None and (COMMA := self.expect_token("COMMA")) is not None and (empty2 := self.loop("empty", False, multiline)) is not None and (function_key_value_list := self.expect_production("function_key_value_list", multiline)) is not None and (trailing_comma := self.maybe_trailing_comma(multiline)) is not None ): values, value_extra = function_value_list key_values, key_value_extra = function_key_value_list return ( [*values, *key_values], [ *value_extra, *empty1, COMMA, *empty2, *key_value_extra, *trailing_comma ] ) self.reset(pos) # function_params -> function_value_list (empty* ',')? if ( True and (function_value_list := self.expect_production("function_value_list", multiline)) is not None and (trailing_comma := self.maybe_trailing_comma(multiline)) is not None ): values, value_extra = function_value_list return values, [*value_extra, *trailing_comma] self.reset(pos) # function_params -> function_key_value_list (empty* ',')? if ( True and (function_key_value_list := self.expect_production("function_key_value_list", multiline)) is not None and (trailing_comma := self.maybe_trailing_comma(multiline)) is not None ): key_values, key_value_extra = function_key_value_list return key_values, [*key_value_extra, *trailing_comma] self.reset(pos) @memoize def function_value_list(self, multiline): pos = self.mark() # function_value_list -> positional_value empty* ',' empty* function_value_list if ( True and (positional_value := self.expect_production("positional_value", multiline, True)) is not None and (empty1 := self.loop("empty", False, multiline)) is not None and (COMMA1 := self.expect_token("COMMA")) is not None and (empty2 := self.loop("empty", False, multiline)) is not None and (function_value_list := self.expect_production("function_value_list", multiline)) is not None ): values, value_extra = function_value_list return ( [positional_value, *values], [*empty1, COMMA1, *empty2, *value_extra] ) self.reset(pos) # function_value_list -> positional_value if (positional_value := self.expect_production("positional_value", multiline, True)) is not None: return [positional_value], [] self.reset(pos) @memoize_left_rec def function_key_value_list(self, multiline): pos = self.mark() # function_key_value_list -> function_key_value_list empty* ',' empty* function_key_value_list if ( True and (function_key_value_list1 := self.expect_production("function_key_value_list", multiline)) is not None and (empty1 := self.loop("empty", False, multiline)) is not None and (COMMA := self.expect_token("COMMA")) is not None and (empty2 := self.loop("empty", False, multiline)) is not None and (function_key_value_list2 := self.expect_production("function_key_value_list", multiline)) is not None ): key_values1, key_value_extra1 = function_key_value_list1 key_values2, key_value_extra2 = function_key_value_list2 return ( [*key_values1, *key_values2], [*key_value_extra1, *empty1, COMMA, *empty2, *key_value_extra2] ) self.reset(pos) # function_key_value_list -> key_value if (key_value := self.expect_production("key_value", multiline, True)) is not None: return [key_value], [] self.reset(pos) @memoize def positional_value(self, multiline, whitespace): pos = self.mark() # Don't match values followed by '=', which are kwarg keys. if ( True and (value := self.expect_production("value", multiline, whitespace)) is not None and self.negative_lookahead("EQUALS") ): return value self.reset(pos) @memoize def value(self, multiline, whitespace): pos = self.mark() # value -> array # Should be above `expr` so arrays that aren't part of expressions stay as lists # instead of get converted into ndarrays. if (array := self.expect_production("array", multiline)) is not None: return array self.reset(pos) # value -> expr if (expr := self.expect_production("expr", multiline, whitespace)) is not None: return expr self.reset(pos) # value -> function if (function := self.expect_production("function", multiline)) is not None: return function self.reset(pos) # value -> NONE / BOOLEAN / STRING for token_type in ("NONE", "BOOLEAN", "STRING"): if (TOKEN := self.expect_token(token_type)) is not None: return TOKEN self.reset(pos) @memoize def key_value(self, multiline, whitespace): pos = self.mark() # key_value -> NAME '=' value if ( True and (NAME := self.expect_token("NAME")) is not None and (EQUALS := self.expect_token("EQUALS")) is not None and (value := self.expect_production("value", multiline, whitespace)) is not None ): return KatKwarg(key=NAME, equals=EQUALS, value=value) self.reset(pos) if ( True and (NAME := self.expect_token("NAME")) is not None and (EQUALS := self.expect_token("EQUALS")) is not None and self.expect_production("value", multiline, whitespace) is None # ! ): raise KatSyntaxError("missing value", self.script, EQUALS) self.reset(pos) @memoize_left_rec def expr(self, multiline, whitespace): pos = self.mark() # NOTE: operator precedence is set by defining productions within productions. # expr -> expr ( '+' / '-' ) expr1 for operator in ("PLUS", "MINUS"): if ( True and (lhs := self.expect_production("expr", multiline, whitespace)) is not None and (whitespace1 := self.maybe_whitespace(multiline, whitespace)) is not None and (operator := self.expect_token(operator)) is not None and (whitespace2 := self.maybe_whitespace(multiline, whitespace)) is not None and (rhs := self.expect_production("expr1", multiline, whitespace)) is not None ): return KatExpression( operator=operator, arguments=[lhs, rhs], extra=[*whitespace1, *whitespace2] ) self.reset(pos) # expr -> expr1 if (expr1 := self.expect_production("expr1", multiline, whitespace)) is not None: return expr1 self.reset(pos) @memoize_left_rec def expr1(self, multiline, whitespace): """Times, divide and floordivide operators.""" pos = self.mark() # expr1 -> expr1 ( '*' / '/' / '//' ) expr2 for operator in ("TIMES", "DIVIDE", "FLOORDIVIDE"): if ( True and (lhs := self.expect_production("expr1", multiline, whitespace)) is not None and (whitespace1 := self.maybe_whitespace(multiline, whitespace)) is not None and (operator := self.expect_token(operator)) is not None and (whitespace2 := self.maybe_whitespace(multiline, whitespace)) is not None and (rhs := self.expect_production("expr2", multiline, whitespace)) is not None ): return KatExpression( operator=operator, arguments=[lhs, rhs], extra=[*whitespace1, *whitespace2] ) self.reset(pos) # expr1 -> expr2 if (expr2 := self.expect_production("expr2", multiline, whitespace)) is not None: return expr2 self.reset(pos) @memoize def expr2(self, multiline, whitespace): """Unary operators.""" pos = self.mark() # expr2 -> ( '+' / '-' ) expr2 for unary_operator in ("PLUS", "MINUS"): if ( True and (unary_operator := self.expect_token(unary_operator)) is not None and (expr2 := self.expect_production("expr2", multiline, whitespace)) is not None ): return KatFunction( directive=unary_operator, arguments=[expr2], extra=[] ) self.reset(pos) # expr2 -> expr3 if (expr3 := self.expect_production("expr3", multiline, whitespace)) is not None: return expr3 self.reset(pos) @memoize def expr3(self, multiline, whitespace): """Power operator.""" pos = self.mark() # expr3 -> expr4 '**' expr2 if ( True and (expr4 := self.expect_production("expr4", multiline, whitespace)) is not None and (whitespace1 := self.maybe_whitespace(multiline, whitespace)) is not None and (power := self.expect_token("POWER")) is not None and (whitespace2 := self.maybe_whitespace(multiline, whitespace)) is not None and (expr2 := self.expect_production("expr2", multiline, whitespace)) is not None ): return KatExpression( operator=power, arguments=[expr4, expr2], extra=[*whitespace1, *whitespace2] ) self.reset(pos) # expr3 -> expr4 if (expr4 := self.expect_production("expr4", multiline, whitespace)) is not None: return expr4 self.reset(pos) @memoize def expr4(self, multiline, whitespace): """Parentheses, functions, references, names and numbers. Names are allowed here to support keywords and copy-/read-by-value parameters like l1.P. """ pos = self.mark() # expr4 -> '(' empty* expr empty* ')' if ( True and (LPAREN := self.expect_token("LPAREN")) is not None and (whitespace1 := self.maybe_whitespace(multiline, True)) is not None and (expr := self.expect_production("expr", multiline, True)) is not None and (whitespace2 := self.maybe_whitespace(multiline, True)) is not None and (RPAREN := self.expect_token("RPAREN")) is not None ): return KatGroupedExpression( arguments=[expr], extra=[LPAREN, *whitespace1, *whitespace2, RPAREN] ) self.reset(pos) # expr4 -> function if (function := self.expect_production("function", multiline)) is not None: return function self.reset(pos) # expr4 -> array if (array := self.expect_production("array", multiline)) is not None: LOGGER.debug("converting array expression operand to numerical array") return KatNumericalArray.from_array(array) self.reset(pos) # expr4 -> NUMBER # Disallow matching of subsequent numbers, which indicates the tokenizer failed # to group two numbers together, and therefore a syntax error (handled later). if ( True and (TOKEN := self.expect_token("NUMBER")) is not None and self.negative_lookahead("NUMBER") ): return TOKEN self.reset(pos) # expr4 -> NAME if (TOKEN := self.expect_token("NAME")) is not None: return TOKEN self.reset(pos) if (error := self.expect_production("invalid_expr4", multiline)) is not None: raise error self.reset(pos) @memoize def array(self, multiline): pos = self.mark() # array -> '[' empty* array_values empty* ']' if ( True and (LBRACKET := self.expect_token("LBRACKET")) is not None and (empty1 := self.loop("empty", False, False)) is not None and (array_values := self.expect_production("array_values", multiline)) is not None and (empty2 := self.loop("empty", False, False)) is not None and (RBRACKET := self.expect_token("RBRACKET")) is not None ): values, value_extra = array_values return KatArray( arguments=values, extra=[LBRACKET, *empty1, *value_extra, *empty2, RBRACKET] ) self.reset(pos) # array -> '[' empty* ']' if ( True and (LBRACKET := self.expect_token("LBRACKET")) is not None and (empty := self.loop("empty", False, False)) is not None and (RBRACKET := self.expect_token("RBRACKET")) is not None ): return KatArray(arguments=[], extra=[LBRACKET, *empty, RBRACKET]) self.reset(pos) if (error := self.expect_production("invalid_array", multiline)) is not None: raise error self.reset(pos) @memoize def array_values(self, multiline): pos = self.mark() # array_values -> array_value empty* ',' empty* array_values (empty* ',')? if ( True and (array_value := self.expect_production("array_value", multiline)) is not None and (empty1 := self.loop("empty", False, multiline)) is not None and (COMMA := self.expect_token("COMMA")) is not None and (empty2 := self.loop("empty", False, multiline)) is not None and (array_values := self.expect_production("array_values", multiline)) is not None and (trailing_comma := self.maybe_trailing_comma(multiline)) is not None ): values, value_extra = array_values return ( [array_value, *values], [*empty1, COMMA, *empty2, *value_extra, *trailing_comma] ) self.reset(pos) # array_values -> array_value (empty* ',')? if ( True and (array_value := self.expect_production("array_value", multiline)) is not None and (trailing_comma := self.maybe_trailing_comma(multiline)) is not None ): return [array_value], trailing_comma self.reset(pos) @memoize def array_value(self, multiline): pos = self.mark() # array_value -> array # Should be above `expr` so arrays that aren't part of expressions stay as lists # instead of get converted into ndarrays. if (array := self.expect_production("array", multiline)) is not None: return array self.reset(pos) # array_value -> expr if (expr := self.expect_production("expr", multiline, True)) is not None: return expr self.reset(pos) # array_value -> NONE / NUMBER / NAME / BOOLEAN / STRING for token_type in ("NONE", "NUMBER", "NAME", "BOOLEAN", "STRING"): if (TOKEN := self.expect_token(token_type)) is not None: return TOKEN self.reset(pos) @memoize def invalid_script_line(self): pos = self.mark() # Check for positional arguments specified after keyword arguments in inline # statements. Multiline statements are checked elsewhere. if ( True and self.loop("script_line_empty", False) is not None and self.expect_production("statement") is not None and self.loop("script_line_empty", True) is not None and (element_value_list := self.expect_production("element_value_list")) is not None ): values, _ = element_value_list return KatSyntaxError( "positional argument follows keyword argument", self.script, values[0] ) self.reset(pos) # Invalid parameter. # This has to be located here because we want the above rules to first check for # NEWLINE, to avoid matching valid elements. if ( True and self.expect_token("NAME") is not None and self.expect_token("WHITESPACE") is not None and self.expect_token("NAME") is not None and self.expect_token("WHITESPACE") is not None and self.expect_production("element_params") is not None and (element_params := self.expect_production("element_params")) is not None ): arguments, _ = element_params return KatSyntaxError("syntax error", self.script, arguments[0]) self.reset(pos) # Invalid element (no name) or function (no opening parenthesis). if ( True and (DIRECTIVE := self.expect_token("NAME")) is not None and self.expect_token("WHITESPACE") is not None and self.expect_production("element_params") is not None and self.expect_token("NEWLINE") is not None ): return KatMissingAfterDirective(self.script, DIRECTIVE) self.reset(pos) # Invalid function: space before parentheses (non-empty parameters). if ( True and self.expect_token("NAME") is not None and (WHITESPACE := self.expect_token("WHITESPACE")) is not None and self.expect_token("LPAREN") is not None and self.loop("empty", False, True) is not None and self.expect_production("function_params", True) is not None and self.loop("empty", False, True) is not None and self.expect_token("RPAREN") is not None ): return KatSyntaxError("space not allowed here", self.script, WHITESPACE) self.reset(pos) # Invalid function: space before parentheses (empty parameters). if ( True and self.expect_token("NAME") is not None and (WHITESPACE := self.expect_token("WHITESPACE")) is not None and self.expect_token("LPAREN") is not None and self.loop("empty", False, True) is not None and self.expect_token("RPAREN") is not None ): return KatSyntaxError("space not allowed here", self.script, WHITESPACE) self.reset(pos) @memoize def invalid_expr4(self, multiline): pos = self.mark() # Invalid number: two numbers tokenized in a row, indicating a failure to match # the number with the tokenizer's regex. if ( True and (NUMBER1 := self.expect_token("NUMBER")) and (NUMBER2 := self.expect_token("NUMBER")) ): if isinstance(NUMBER1.value, int) and isinstance(NUMBER2.value, int): # Leading zeros (as per the to-number operation of the IBM # specification; same behaviour as Python itself, see # https://docs.python.org/3/library/decimal.html). return KatSyntaxError( "leading zeros in integers are not permitted", self.script, NUMBER1 ) # Some other run-together, e.g. `0.1.1`. return KatSyntaxError("invalid number syntax", self.script, NUMBER2) self.reset(pos) @memoize def invalid_array(self, multiline): pos = self.mark() # Invalid: no comma as a delimiter. if ( True and self.expect_token("LBRACKET") is not None and self.expect_production("array_values", multiline) is not None and (empty := self.loop("empty", False, False)) is not None and self.expect_production("array_values", multiline) is not None ): return KatSyntaxError( "array values should be delimited by ','", self.script, empty[0] ) self.reset(pos)