Final step to split the registry from the parser

Overview: - All the code in facets is now independent of the definition textual format. In particular, defintions such as UnitDefinition, ContextDefinition an so on cannot be built directly from a string. (some functions are kept only temporarily to simplify but transition) Building Definition objects from string requires a parser that emits them. - The standart pint format is implemented in delegates/txt_parser using flexparser. Briefly each single line statement is mapped to a ParsedStatement class and each larger construct to a Block class. - The registry then has an adder function that takes a definition an incorporate it into the registry. A few nice features of this approach: 1. The Definition objects are standalone public objects, you can now build them in a programatic way and incorporate them to the registry using the define function that will dispatch to the correct adder: >>> new_unit = UnitDefintion( ....) >>> ureg.define(new_unit) # might be called add in the future No more being forced to use string definitions (but you can still use them if you want) 2. Composition over inheritance. The Registry does not know how to parse a definition, but it delegates this to another class which can be changed. This makes it very easy to write another parser (faster, simpler) o try out a completely different file format. 3. Error messages can be more meaningful. Backwards incompatible changes - is_base parameter Definitions is not needed any more. It is now computed automatically leading to a leaner experience and also avoiding incompatible states - alias for dimensionality has been removed (for now at least) The only one defined was speed as an alias of velocity. - (Context|Group|System).from_lines and Definition.from string have been rewritten in terms of the new parser. But will be likely removed in the future - Changing non_int_type is not possible after registry has been created - load_definition raises FileNotFoundError instead of a generic exception if the file was not found - the string representation of several definitions is now not so user friendly terms of the new parser. But will be likely removed in the future - Changing non_int_type is not possible after registry has been created - load_definition raises FileNotFoundError instead of a generic exception if the file was not found - the string representation of several definitions is now not so user friendly. terms o
hgrecco · Sep 30, 2022 · 1dd3125 · 1dd3125
1 parent c2f2b53
commit 1dd3125
Show file tree

Hide file tree

Showing 36 changed files with 3,384 additions and 1,483 deletions.
diff --git a/pint/_vendor/flexparser.py b/pint/_vendor/flexparser.py
diff --git a/pint/default_en.txt b/pint/default_en.txt
@@ -230,7 +230,7 @@ counts_per_second = count / second = cps
 reciprocal_centimeter = 1 / cm = cm_1 = kayser
 
 # Velocity
-[velocity] = [length] / [time] = [speed]
+[velocity] = [length] / [time]
 knot = nautical_mile / hour = kt = knot_international = international_knot
 mile_per_hour = mile / hour = mph = MPH
 kilometer_per_hour = kilometer / hour = kph = KPH
@@ -443,17 +443,17 @@ farad = coulomb / volt = F
 abfarad = 1e9 * farad = abF
 conventional_farad_90 = R_K90 / R_K * farad = F_90
 
+# Magnetic flux
+[magnetic_flux] = [electric_potential] * [time]
+weber = volt * second = Wb
+unit_pole = µ_0 * biot * centimeter
+
 # Inductance
 [inductance] = [magnetic_flux] / [current]
 henry = weber / ampere = H
 abhenry = 1e-9 * henry = abH
 conventional_henry_90 = R_K / R_K90 * henry = H_90
 
-# Magnetic flux
-[magnetic_flux] = [electric_potential] * [time]
-weber = volt * second = Wb
-unit_pole = µ_0 * biot * centimeter
-
 # Magnetic field
 [magnetic_field] = [magnetic_flux] / [area]
 tesla = weber / meter ** 2 = T

diff --git a/pint/definitions.py b/pint/definitions.py
@@ -1,147 +1,28 @@
 """
-    pint.definitions
-    ~~~~~~~~~~~~~~~~
+    pint.converters
+    ~~~~~~~~~~~~~~~
 
-    Functions and classes related to unit definitions.
+    Kept for backwards compatibility
 
-    :copyright: 2016 by Pint Authors, see AUTHORS for more details.
+    :copyright: 2022 by Pint Authors, see AUTHORS for more details.
     :license: BSD, see LICENSE for more details.
 """
 
-from __future__ import annotations
+from . import errors
+from ._vendor import flexparser as fp
+from .delegates import ParserConfig, txt_parser
 
-from dataclasses import dataclass
-from typing import Callable, Optional, Tuple, Union
 
-from .converters import Converter
-
-
-@dataclass(frozen=True)
-class PreprocessedDefinition:
-    """Splits a definition into the constitutive parts.
-
-    A definition is given as a string with equalities in a single line::
-
-        ---------------> rhs
-        a = b = c = d = e
-        |   |   |   -------> aliases (optional)
-        |   |   |
-        |   |   -----------> symbol (use "_" for no symbol)
-        |   |
-        |   ---------------> value
-        |
-        -------------------> name
-    """
-
-    name: str
-    symbol: Optional[str]
-    aliases: Tuple[str, ...]
-    value: str
-    rhs_parts: Tuple[str, ...]
-
-    @classmethod
-    def from_string(cls, definition: str) -> PreprocessedDefinition:
-        name, definition = definition.split("=", 1)
-        name = name.strip()
-
-        rhs_parts = tuple(res.strip() for res in definition.split("="))
-
-        value, aliases = rhs_parts[0], tuple([x for x in rhs_parts[1:] if x != ""])
-        symbol, aliases = (aliases[0], aliases[1:]) if aliases else (None, aliases)
-        if symbol == "_":
-            symbol = None
-        aliases = tuple([x for x in aliases if x != "_"])
-
-        return cls(name, symbol, aliases, value, rhs_parts)
-
-
-@dataclass(frozen=True)
 class Definition:
-    """Base class for definitions.
-
-    Parameters
-    ----------
-    name : str
-        Canonical name of the unit/prefix/etc.
-    defined_symbol : str or None
-        A short name or symbol for the definition.
-    aliases : iterable of str
-        Other names for the unit/prefix/etc.
-    converter : callable or Converter or None
-    """
-
-    name: str
-    defined_symbol: Optional[str]
-    aliases: Tuple[str, ...]
-    converter: Optional[Union[Callable, Converter]]
-
-    _subclasses = []
-    _default_subclass = None
-
-    def __init_subclass__(cls, **kwargs):
-        if kwargs.pop("default", False):
-            if cls._default_subclass is not None:
-                raise ValueError("There is already a registered default definition.")
-            Definition._default_subclass = cls
-        super().__init_subclass__(**kwargs)
-        cls._subclasses.append(cls)
-
-    def __post_init__(self):
-        if isinstance(self.converter, str):
-            raise TypeError(
-                "The converter parameter cannot be an instance of `str`. Use `from_string` method"
-            )
-
-    @property
-    def is_multiplicative(self) -> bool:
-        return self.converter.is_multiplicative
-
-    @property
-    def is_logarithmic(self) -> bool:
-        return self.converter.is_logarithmic
-
-    @classmethod
-    def accept_to_parse(cls, preprocessed: PreprocessedDefinition):
-        return False
+    """This is kept for backwards compatibility"""
 
     @classmethod
-    def from_string(
-        cls, definition: Union[str, PreprocessedDefinition], non_int_type: type = float
-    ) -> Definition:
-        """Parse a definition.
-
-        Parameters
-        ----------
-        definition : str or PreprocessedDefinition
-        non_int_type : type
-
-        Returns
-        -------
-        Definition or subclass of Definition
-        """
-
-        if isinstance(definition, str):
-            definition = PreprocessedDefinition.from_string(definition)
-
-        for subclass in cls._subclasses:
-            if subclass.accept_to_parse(definition):
-                return subclass.from_string(definition, non_int_type)
-
-        if cls._default_subclass is None:
-            raise ValueError("No matching definition (and no default parser).")
-
-        return cls._default_subclass.from_string(definition, non_int_type)
-
-    @property
-    def symbol(self) -> str:
-        return self.defined_symbol or self.name
-
-    @property
-    def has_symbol(self) -> bool:
-        return bool(self.defined_symbol)
-
-    def add_aliases(self, *alias: str) -> None:
-        raise Exception("Cannot add aliases, definitions are inmutable.")
-
-    def __str__(self) -> str:
-        return self.name
+    def from_string(cls, s: str, non_int_type=float):
+        cfg = ParserConfig(non_int_type)
+        parser = txt_parser.Parser(cfg, None)
+        pp = parser.parse_string(s)
+        for definition in parser.iter_parsed_project(pp):
+            if isinstance(definition, Exception):
+                raise errors.DefinitionSyntaxError(str(definition))
+            if not isinstance(definition, (fp.BOS, fp.BOF, fp.BOS)):
+                return definition
diff --git a/pint/delegates/__init__.py b/pint/delegates/__init__.py
@@ -0,0 +1,14 @@
+"""
+    pint.delegates
+    ~~~~~~~~~~~~~~
+
+    Defines methods and classes to handle autonomous tasks.
+
+    :copyright: 2022 by Pint Authors, see AUTHORS for more details.
+    :license: BSD, see LICENSE for more details.
+"""
+
+from . import txt_parser
+from .base_parser import ParserConfig, build_disk_cache_class
+
+__all__ = [txt_parser, ParserConfig, build_disk_cache_class]
diff --git a/pint/delegates/base_parser.py b/pint/delegates/base_parser.py
@@ -0,0 +1,107 @@
+"""
+    pint.delegates.base_parser
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    Common class and function for all parsers.
+
+    :copyright: 2022 by Pint Authors, see AUTHORS for more details.
+    :license: BSD, see LICENSE for more details.
+"""
+
+from __future__ import annotations
+
+import functools
+import itertools
+import numbers
+import pathlib
+import typing as ty
+from dataclasses import dataclass, field
+
+from pint import errors
+from pint.facets.plain.definitions import NotNumeric
+from pint.util import ParserHelper, UnitsContainer
+
+from .._vendor import flexcache as fc
+from .._vendor import flexparser as fp
+
+
+@dataclass(frozen=True)
+class ParserConfig:
+    """Configuration used by the parser."""
+
+    #: Indicates the output type of non integer numbers.
+    non_int_type: ty.Type[numbers.Number] = float
+
+    def to_scaled_units_container(self, s: str):
+        return ParserHelper.from_string(s, self.non_int_type)
+
+    def to_units_container(self, s: str):
+        v = self.to_scaled_units_container(s)
+        if v.scale != 1:
+            raise errors.UnexpectedScaleInContainer(str(v.scale))
+        return UnitsContainer(v)
+
+    def to_dimension_container(self, s: str):
+        v = self.to_units_container(s)
+        invalid = tuple(itertools.filterfalse(errors.is_valid_dimension_name, v.keys()))
+        if invalid:
+            raise errors.DefinitionSyntaxError(
+                f"Cannot build a dimension container with {', '.join(invalid)} that "
+                + errors.MSG_INVALID_DIMENSION_NAME
+            )
+        return v
+
+    def to_number(self, s: str) -> numbers.Number:
+        """Try parse a string into a number (without using eval).
+
+        The string can contain a number or a simple equation (3 + 4)
+
+        Raises
+        ------
+        _NotNumeric
+            If the string cannot be parsed as a number.
+        """
+        val = self.to_scaled_units_container(s)
+        if len(val):
+            raise NotNumeric(s)
+        return val.scale
+
+
+@functools.lru_cache()
+def build_disk_cache_class(non_int_type: type):
+    """Build disk cache class, taking into account the non_int_type."""
+
+    @dataclass(frozen=True)
+    class PintHeader(fc.InvalidateByExist, fc.NameByFields, fc.BasicPythonHeader):
+
+        from .. import __version__
+
+        pint_version: str = __version__
+        non_int_type: str = field(default_factory=lambda: non_int_type.__qualname__)
+
+    class PathHeader(fc.NameByFileContent, PintHeader):
+        pass
+
+    class ParsedProjecHeader(fc.NameByHashIter, PintHeader):
+        @classmethod
+        def from_parsed_project(cls, pp: fp.ParsedProject, reader_id):
+            tmp = []
+            for stmt in pp.iter_statements():
+                if isinstance(stmt, fp.BOS):
+                    tmp.append(
+                        stmt.content_hash.algorithm_name
+                        + ":"
+                        + stmt.content_hash.hexdigest
+                    )
+
+            return cls(tuple(tmp), reader_id)
+
+    class PintDiskCache(fc.DiskCache):
+
+        _header_classes = {
+            pathlib.Path: PathHeader,
+            str: PathHeader.from_string,
+            fp.ParsedProject: ParsedProjecHeader.from_parsed_project,
+        }
+
+    return PintDiskCache
diff --git a/pint/delegates/txt_parser/__init__.py b/pint/delegates/txt_parser/__init__.py
@@ -0,0 +1,14 @@
+"""
+    pint.delegates.txt_parser
+    ~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    Parser for the original textual Pint Definition file.
+
+    :copyright: 2022 by Pint Authors, see AUTHORS for more details.
+    :license: BSD, see LICENSE for more details.
+"""
+
+
+from .parser import Parser
+
+__all__ = [Parser]
diff --git a/pint/delegates/txt_parser/block.py b/pint/delegates/txt_parser/block.py
@@ -0,0 +1,45 @@
+"""
+    pint.delegates.txt_parser.block
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    Classes for Pint Blocks, which are defined by:
+
+        @<block name>
+            <content>
+        @end
+
+    :copyright: 2022 by Pint Authors, see AUTHORS for more details.
+    :license: BSD, see LICENSE for more details.
+"""
+
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from ..._vendor import flexparser as fp
+
+
+@dataclass(frozen=True)
+class EndDirectiveBlock(fp.ParsedStatement):
+    """An EndDirectiveBlock is simply an "@end" statement."""
+
+    @classmethod
+    def from_string(cls, s: str) -> fp.FromString[EndDirectiveBlock]:
+        if s == "@end":
+            return cls()
+        return None
+
+
+@dataclass(frozen=True)
+class DirectiveBlock(fp.Block):
+    """Directive blocks have beginning statement starting with a @ character.
+    and ending with a "@end" (captured using a EndDirectiveBlock).
+
+    Subclass this class for convenience.
+    """
+
+    closing: EndDirectiveBlock
+
+    def derive_definition(self):
+        pass