Source code for pytableaux.lang.parsing

# -*- coding: utf-8 -*-
# pytableaux, a multi-logic proof generator.
# Copyright (C) 2014-2023 Doug Owings.
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
pytableaux.lang.parsing
^^^^^^^^^^^^^^^^^^^^^^^

"""
from __future__ import annotations

from abc import abstractmethod as abstract
from collections import deque
from enum import Enum
from types import MappingProxyType as MapProxy
from typing import (TYPE_CHECKING, Any, ClassVar, Iterable, Iterator, Mapping,
                    Self, TypeVar)

from ..errors import (BoundVariableError, Emsg, IllegalStateError, ParseError,
                      UnboundVariableError, UndefinedPredicateError, check)
from ..tools import MapCover, abcs, for_defaults
from . import BiCoords, LangCommonMeta, Marking, Notation
from .collect import Argument, Predicates, PredicatesBase
from .lex import (Atomic, Constant, Operated, Operator, Parameter, Predicate,
                  Predicated, Quantified, Quantifier, Sentence, Variable)

if TYPE_CHECKING:
    from typing import overload

__all__ = (
    'Parser',
    'ParseContext',
    'ParseTable',
    'PolishParser',
    'StandardParser')

_T = TypeVar('_T')
NOARG = object()


class ParserMeta(LangCommonMeta):
    'Parser Metaclass.'

    DEFAULT_NOTATION: Notation = Notation.polish

    if TYPE_CHECKING:
        @overload
        def __call__(cls,
            notation:Notation|None=None,
            predicates:Predicates|None=None,
            table:ParseTable|str|None = None,
            **opts) -> Parser:...
        
    def __call__(cls, *args, **kw) -> Parser:
        if cls is Parser:
            if args:
                notn = Notation(args[0])
                args = args[1:]
            else:
                notn = Notation(kw.pop('notation', Parser.DEFAULT_NOTATION))
            return notn.Parser(*args, **kw)
        return super().__call__(*args, **kw)

[docs] class Parser(metaclass=ParserMeta): """Parser interface and coordinator. To create a parser:: >>> from pytableaux.lang import Parser >>> Parser() <PolishParser:(<Notation.polish>, 'default')> >>> Parser('standard') <StandardParser:(<Notation.standard>, 'default')> Or use the :class:`Notation` enum class:: >>> from pytableaux.lang import Notation >>> Parser(Notation.standard) <StandardParser:(<Notation.standard>, 'default')> >>> Notation.polish.Parser() <PolishParser:(<Notation.polish>, 'default')> To parse :class:`Predicated` sentences, the parser must know the predicate's `arity`. By default, it will deduce the arity from the first usage. >>> parser = Parser(notation='standard') >>> parser('Fa & Gab') <Sentence: Fa ∧ Gab> >>> parser.predicates Predicates({<Predicate: F>, <Predicate: G>}) >>> # parser('Fab') # raises ParseError Alternatively, pass a :class:`Predicates` object with the predicates:: >>> from pytableaux.lang import Predicates >>> preds = Predicates(((0, 0, 1), (1, 0, 2))) >>> parser = Parser('standard', preds, auto_preds=False) >>> parser('Fa & Gab') <Sentence: Fa ∧ Gab> """ __slots__ = ('table', 'predicates', 'opts') notation: ClassVar[Notation] "The parser notation." defaults: ClassVar[dict] = dict(auto_preds=True) "The default options." table: ParseTable "The parse table instance." predicates: Predicates "The predicates store." opts: Mapping "The parser options." if TYPE_CHECKING: @overload def __init__(self, notation:Notation|None=None, predicates:Predicates|None=None, table:ParseTable|str|None = None, **opts): ... def __init__(self, predicates:Predicates|None=None, table:ParseTable|str|None = None, **opts): """ Options: auto_preds: Deduce predicate arity from first usage. Default ``True``. """ if table is None: self.table = ParseTable.fetch(self.notation) elif isinstance(table, str): self.table = ParseTable.fetch(self.notation, table) else: self.table = check.inst(table, ParseTable) if predicates is None: predicates = Predicates() else: if not isinstance(predicates, PredicatesBase): predicates = Predicates(predicates) self.predicates = predicates self.opts = for_defaults(self.defaults, opts)
[docs] @abstract def __call__(self, input: str, /) -> Sentence: """Parse a sentence from an input string. Args: input: The input string. Returns: Sentence: The parsed sentence. Raises: ParseError """ raise NotImplementedError
[docs] def argument(self, conclusion: str, premises: Iterable[str] = None, *, title: str|None = None) -> Argument: """Parse the input strings and create an argument. Args: conclusion: The argument's conclusion. premises: Premise strings, if any. Keyword Args: title: An optional title. Returns: The argument. Raises: ParseError: if input cannot be parsed. TypeError: for bad argument types. """ return Argument( self(conclusion), premises and map(self, premises), title=title)
def __repr__(self): return f'<{type(self).__name__}:{self.table.notation}.{self.table.dialect}>' def __init_subclass__(cls, primary = False, **kw): 'Merge ``_defaults``, set primary.' super().__init_subclass__(**kw) abcs.merge_attr(cls, 'defaults', supcls=__class__) if primary: cls.notation.Parser = cls
class ParseContext: "Parse context." __slots__ = ( 'bound', 'input', 'is_open', 'pos', 'predicates', 'table') bound: set[Variable] input: str is_open: bool pos: int predicates: PredicatesBase table: ParseTable def __init__(self, input: str, table: ParseTable, predicates: PredicatesBase, /): self.input = input self.table = table self.predicates = predicates self.is_open = False def open(self) -> Self: """Open the context. A context can only be opened once. """ if self.is_open: raise IllegalStateError('Context already open') self.is_open = True self.bound = set() self.pos = 0 self.chomp() return self def close(self): """Close the context. Eats remaining whitespace and checks that the input is fully consumed. Raises: ParseError: if input is not fully consumed. """ self.chomp() self.assert_end() def __enter__(self) -> Self: return self.open() def __exit__(self, typ, value, traceback): self.close() def current(self) -> str|None: 'Return the current character, or ``None`` if after last.' try: return self.input[self.pos] except IndexError: pass def next(self, n: int = 1, /) -> str|None: 'Get the nth character after the current, or ``None``.' try: return self.input[self.pos + n] except IndexError: pass def assert_current(self) -> Any: """ Returns: Type of current char, e.g. ``Operator``, or ``None`` if unknown type. Raises: ParseError: if after last. """ if not self.has_current(): raise ParseError(f'Unexpected end of input at position {self.pos}.') return self.type(self.current(), None) def assert_current_is(self, ctype: Any, /): """ Args: ctype (Any): Char type Raises ParseError: if after last, unexpected type or unknown symbol. """ if self.assert_current() is not ctype: raise ParseError(self._unexp_msg()) def assert_current_in(self, ctypes: Iterable[_T], /) -> _T: ctype = self.assert_current() if ctype in ctypes: return ctype raise ParseError(self._unexp_msg()) def assert_end(self): 'Raise an error if not after last.' if len(self.input) > self.pos: raise ParseError(self._unexp_msg()) def has_next(self, n = 1, /) -> bool: 'Whether there are n-many characters after the current.' return len(self.input) > self.pos + n def has_current(self) -> bool: 'Whether there is a current character.' return len(self.input) > self.pos def advance(self, n: int = 1, /) -> Self: """Advance the current pointer n-many characters, and then eat whitespace. Args: n: The number of characters to advance, default ``1``. Returns: self """ self.pos += n self.chomp() return self def chomp(self) -> ParseContext: """Proceeed through whitepsace. Returns: self """ try: while self.type(self.input[self.pos], None) is Marking.whitespace: self.pos += 1 except IndexError: pass return self def type(self, char: str, default = NOARG, /) -> Any: """Get the item type for the character. Args: char: The character symbol. default: The value to return if missing. Returns: The symbol type, or ``default`` if provided. Raises: KeyError: if symbol not in table and no default passed. """ try: return self.table[char][0] except KeyError: if default is NOARG: raise return default def value(self, char: str, /) -> Any: """Get the item value for the character. Args: char: The character symbol. Returns: Table item value, e.g. ``1`` or ``Operator.Negation``. Raises: KeyError: if symbol not in table. """ return self.table[char][1] def bind(self, v: Variable, /) -> Variable: """Add a variable to the set of bound variables, checking that it is not already bound. Args: v (Variable): The variable. Returns: Variable: The variable passed. Raises: BoundVariableError: if variable already bound. """ if v in self.bound: raise BoundVariableError( f"Cannot rebind variable {v.spec} near position {self.pos}.") self.bound.add(v) return v def check_bound(self, v: Variable, /) -> Variable: """Check that a variable is already bound. Args: v (Variable): The variable. Returns: Variable: The variable passed. Raises: UnboundVariableError: if variable is not bound. """ if v not in self.bound: raise UnboundVariableError( f"Unbound variable {v.spec} near position {self.pos}") return v def unbind(self, v: Variable, s: Sentence, /) -> tuple[Variable, Sentence]: """Unbind a variable, checking that it is already bound, and used within the sentence. Args: v (Variable): The variable. s (Sentence): The sentence using the variable. Returns: tuple[Variable, Sentence]: The variable and sentence passed. Raises: UnboundVariableError: if variable is not bound. BoundVariableError: if variable is not used in the sentence. """ if self.check_bound(v) not in s.variables: raise BoundVariableError( f"Unused bound variable {v.spec} near position {self.pos}") self.bound.remove(v) return v, s def _unexp_msg(self) -> str: char = self.input[self.pos] ctype = self.type(char, None) if ctype is None: pfx = 'Unexpected symbol' else: pfx = f'Unexpected {ctype} symbol' return f"{pfx} '{char}' at position {self.pos}" class Ctype(frozenset, Enum): pred = {Predicate, Predicate.System} param = {Constant, Variable}
[docs] class DefaultParser(Parser): """Parser default implementation. """ def __call__(self, input: str, /) -> Sentence: if isinstance(input, Sentence): return input with ParseContext(input, self.table, self.predicates) as context: return self._read(context) _methodmap = MapProxy({ Operator: '_read_operated', Atomic: '_read_atomic', Quantifier: '_read_quantified', Predicate: '_read_predicated', Predicate.System: '_read_predicated'}) def _read(self, context: ParseContext, /) -> Sentence: """ Internal entrypoint for reading a sentence. Implementation is recursive. This provides the default implementation for prefix notation sentences, i.e. atomic, predicated, and quantified sentences. Args: context (ParseContext): The parse context Returns: Sentence: The sentence Raises: ParseError: """ try: method = self._methodmap[context.assert_current()] except KeyError: raise ParseError(context._unexp_msg()) from None return getattr(self, method)(context) @abstract def _read_operated(self, context: ParseContext, /) -> Operated: raise NotImplementedError def _read_atomic(self, context: ParseContext, /) -> Atomic: 'Read an atomic sentence.' return Atomic(self._read_coords(context)) def _read_predicated(self, context: ParseContext, /) -> Predicated: 'Read a predicated sentence.' try: pred = self._read_predicate(context) except UndefinedPredicateError as err: if not self.opts['auto_preds']: raise coords = err.coords else: return pred(self._read_params(context, pred.arity)) params = tuple(self._read_params_auto(context)) arity = len(params) try: pred = Predicate(*coords, arity) except ValueError as err: raise ParseError( f'Error auto-creating predicate {coords=} {arity=}: {err}') self.predicates.add(pred) return pred(params) def _read_quantified(self, context: ParseContext, /) -> Quantified: 'Read a quantified sentence.' quant = context.value(context.current()) context.advance() context.assert_current_is(Variable) return quant( *context.unbind( context.bind(Variable(self._read_coords(context))), self._read(context))) def _read_predicate(self, context: ParseContext, /) -> Predicate: 'Read a predicate.' pchar = context.current() if context.type(pchar) is Predicate.System: context.advance() return Predicate(context.value(pchar)) coords = self._read_coords(context) try: return self.predicates.get(coords) except KeyError: raise UndefinedPredicateError( coords, f"Undefined predicate symbol '{pchar}' at position {context.pos}") def _read_params(self, context: ParseContext, num: int, /) -> Iterator[Parameter]: 'Read the given number of parameters.' read = self._read_parameter for _ in range(num): yield read(context) def _read_params_auto(self, context: ParseContext, /) -> Iterator[Parameter]: 'Read indefinite number of parameters' read = self._read_parameter ftype = context.type fcurr = context.current ctypes = Ctype.param while ftype(fcurr(), None) in ctypes: yield read(context) def _read_parameter(self, context: ParseContext, /) -> Parameter: 'Read a single parameter (constant or variable)' ctype = context.assert_current_in(Ctype.param) param = ctype(self._read_coords(context)) if ctype is Variable: context.check_bound(param) return param def _read_subscript(self, context: ParseContext, /) -> int: """Read the subscript starting from the current character. If the current character is not a digit, or we are after last, then the subscript is ``0```. Otherwise, all consecutive digit characters are read (whitespace allowed), and then converted to an integer, which is then returned. """ digits = deque() while True: cur = context.current() if context.type(cur, None) is not Marking.digit: break digits.append(context.value(cur)) context.advance() return int(''.join(map(str, digits)) or 0) def _read_coords(self, context: ParseContext, /) -> BiCoords: """Read (index, subscript) coords starting from the current character, which must be in the list of characters given. `index` is the list index in the symbol set. This is a generic way to read user predicates, atomics, variables, constants, etc. Note, this will not work for system predicates, because they have string keys in the symbols set. """ index = context.value(context.current()) context.advance() return BiCoords(index, self._read_subscript(context)) __delattr__ = Emsg.ReadOnly.razr
[docs] class PolishParser(DefaultParser, primary=True): "Polish notation parser." notation = Notation.polish def _read_operated(self, context: ParseContext, /) -> Operated: oper = Operator(context.value(context.current())) context.advance() return oper(self._read(context) for _ in range(oper.arity))
[docs] class StandardParser(DefaultParser, primary=True): """Standard notation parser. Options: drop_parens: Don't require parentheses on all binary sentences. Default ``True``. """ notation = Notation.standard defaults = dict(drop_parens=True) def __call__(self, input: str, /) -> Sentence: try: return super().__call__(input) except ParseError: if self.opts['drop_parens']: rev = self.table.reversed popen = rev[Marking.paren_open] pclose = rev[Marking.paren_close] try: return super().__call__(f'{popen}{input}{pclose}') except ParseError: pass raise _methodmap = MapProxy(dict(DefaultParser._methodmap) | { Marking.paren_open: '_read_from_paren_open', Constant: '_read_infix_predicated', Variable: '_read_infix_predicated'}) def _read_operated(self, context: ParseContext, /) -> Operated: oper = Operator(context.value(context.current())) # only unary operators can be prefix operators if oper.arity != 1: raise ParseError( f"Unexpected non-prefix operator symbol '{context.current()}' " f"at position {context.pos}") context.advance() return oper(self._read(context)) def _read_infix_predicated(self, context: ParseContext, /) -> Predicated: lhp = self._read_parameter(context) context.assert_current_in(Ctype.pred) ppos = context.pos try: pred = self._read_predicate(context) except UndefinedPredicateError as err: if not self.opts['auto_preds']: raise coords = err.coords else: arity = pred.arity if arity < 2: raise ParseError( f"Unexpected infixed {arity}-ary predicate symbol at position {ppos}") return pred(lhp, *self._read_params(context, arity - 1)) params = (lhp, *self._read_params_auto(context)) arity = len(params) if arity < 2: raise ParseError( f"Unexpected infixed {arity}-ary predicate symbol at position {ppos}") try: pred = Predicate(*coords, arity) except ValueError as err: raise ParseError( f'Error auto-creating predicate {coords=} {arity=}: {err}') self.predicates.add(pred) return pred(params) def _read_from_paren_open(self, context: ParseContext, /) -> Operated: # if we have an open parenthesis, then we demand a binary infix operator sentence. # scan ahead to: # - find the corresponding close parenthesis position # - find the binary operator and its position oper = oper_pos = None depth = length = 1 while depth: if not context.has_next(length): raise ParseError( f'Unterminated open paren at position {context.pos}') peek = context.next(length) ptype = context.type(peek, None) if ptype is Marking.paren_close: depth -= 1 elif ptype is Marking.paren_open: depth += 1 elif ptype is Operator: peek_oper = Operator(context.value(peek)) if peek_oper.arity == 2 and depth == 1: oper_pos = context.pos + length if oper is not None: raise ParseError( f'Unexpected {oper.name} symbol at position {oper_pos}') oper = peek_oper length += 1 if oper is None: raise ParseError( f'Missing binary operator at position {context.pos}') # now we can divide the string into lhs and rhs lhs_start = context.pos + 1 # move past the open paren context.advance() # read the lhs lhs = self._read(context) context.chomp() if context.pos != oper_pos: raise ParseError( f'Invalid left side expression starting at position {lhs_start} ' f'and ending at position {context.pos}, which proceeds past operator ' f'({oper.name}) at position {oper_pos}') # move past the operator context.advance() # read the rhs rhs = self._read(context) context.chomp() # now we should have a close paren context.assert_current_is(Marking.paren_close) # move past the close paren context.advance() return oper(lhs, rhs)
[docs] class ParseTable(MapCover[str, Any]): 'Parser table data class.' _instances: ClassVar[dict[Any, Self]] = {}
[docs] @classmethod def load(cls, data: Mapping, /) -> Self: """Create and store instance from data. Args: data: The data mapping Raises: Emsg.DuplicateKey: on duplicate key Returns: The instance """ notn = Notation[data['notation']] dialect = data.get('dialect', 'default') key = notn, dialect if key in cls._instances: raise Emsg.DuplicateKey(key) return cls._instances.setdefault(key, cls(data))
[docs] @classmethod def fetch(cls, notation: Notation, dialect: str = None) -> Self: """Get a loaded instance. Args: notation: The notation dialect: The dialect. Default is 'default'. Returns: The instance """ return cls._instances[Notation[notation], dialect or 'default']
reversed: Mapping "Reversed mapping of item to symbol." __slots__ = ('reversed', 'notation', 'dialect') def __init__(self, data: Mapping, /): """ Args: data: The table data. """ self.notation = Notation[data['notation']] self.dialect = data.get('dialect', 'default') mapping = dict(data['mapping']) rev = dict(map(reversed, mapping.items())) for cls in (Operator, Quantifier, Predicate.System): for item in cls: rev.setdefault(item, rev[cls, item]) for key, defaultkey in self._keydefaults.items(): if defaultkey in rev: rev.setdefault(key, rev[defaultkey]) self.reversed = MapProxy(rev) super().__init__(mapping) _keydefaults = MapProxy({ Marking.whitespace: (Marking.whitespace, 0), Marking.paren_open: (Marking.paren_open, 0), Marking.paren_close: (Marking.paren_close, 0), (Predicate, Predicate.Existence): (Predicate.System, Predicate.Existence), (Predicate, Predicate.Identity): (Predicate.System, Predicate.Identity)}) def __setattr__(self, name, value, /, *, sa = object.__setattr__): if getattr(self, name, NOARG) is not NOARG: raise AttributeError(name) sa(self, name, value) __delattr__ = Emsg.ReadOnly.razr