diff --git a/.gitignore b/.gitignore index 36df893..d27f79d 100644 --- a/.gitignore +++ b/.gitignore @@ -64,3 +64,5 @@ docs/_build/ # PyBuilder target/ +.idea +.DS_Store diff --git a/edtf/convert.py b/edtf/convert.py index ee03f36..db86155 100644 --- a/edtf/convert.py +++ b/edtf/convert.py @@ -70,8 +70,7 @@ def trim_struct_time(st: struct_time, strip_time: bool = False) -> struct_time: """ if strip_time: return struct_time(list(st[:3]) + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - else: - return struct_time(list(st[:6]) + TIME_EMPTY_EXTRAS) + return struct_time(list(st[:6]) + TIME_EMPTY_EXTRAS) def struct_time_to_jd(st: struct_time) -> float: @@ -116,7 +115,7 @@ def jd_to_struct_time(jd: float) -> struct_time: return struct_time([year, month, day, hour, minute, second] + TIME_EMPTY_EXTRAS) -def _roll_negative_time_fields(year, month, day, hour, minute, second): +def _roll_negative_time_fields(year, month, day, hour, minute, second) -> tuple: """ Fix date/time fields which have nonsense negative values for any field except for year by rolling the overall date/time value backwards, treating @@ -152,4 +151,5 @@ def _roll_negative_time_fields(year, month, day, hour, minute, second): year += int(month / 12.0) # Adjust by whole year in months year -= 1 # Subtract 1 for negative minutes month %= 12 # Convert negative month to positive remainder - return (year, month, day, hour, minute, second) + + return year, month, day, hour, minute, second diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index f28e685..9cee578 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -1,7 +1,9 @@ """Utilities to derive an EDTF string from an (English) natural language string.""" +import functools import re from datetime import datetime +from typing import Optional from dateutil.parser import ParserError, parse @@ -13,19 +15,45 @@ DEFAULT_DATE_1 = datetime(1234, 1, 1, 0, 0) DEFAULT_DATE_2 = datetime(5678, 10, 10, 0, 0) -SHORT_YEAR_RE = r"(-?)([\dX])([\dX])([\dX])([\dX])" -LONG_YEAR_RE = r"Y(-?)([1-9]\d\d\d\d+)" -CENTURY_RE = r"(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?" -CE_RE = r"(\d{1,4}) (ad|ce|bc|bce)" +LONG_YEAR_RE = re.compile(r"y(-?)([1-9]\d\d\d\d+)") +CENTURY_RE = re.compile(r"(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?") +CENTURY_RANGE = re.compile(r"\b(\d\d)(th|st|nd|rd|)-(\d\d)(th|st|nd|rd) [cC]") +CE_RE = re.compile(r"(\d{1,4}) (ad|ce|bc|bce)") +ONE_DIGIT_PARTIAL_FIRST = re.compile(r"\d\D\b") +TWO_DIGIT_PARTIAL_FIRST = re.compile(r"\d\d\b") +PARTIAL_CHECK = re.compile(r"\b\d\d\d\d$") +SLASH_YEAR = re.compile(r"(\d\d\d\d)/(\d\d\d\d)") +BEFORE_CHECK = re.compile(r"\b(?:before|earlier|avant)\b") +AFTER_CHECK = re.compile(r"\b(after|since|later|aprés|apres)\b") +APPROX_CHECK = re.compile( + r"\b(?:ca?\.? ?\d{4}|circa|approx|approximately|around|about|~\d{3,4})|^~" +) +UNCERTAIN_CHECK = re.compile(r"\b(?:uncertain|possibly|maybe|guess|\d{3,4}\?)") +UNCERTAIN_REPL = re.compile(r"(\d{4})\?") +MIGHT_BE_CENTURY = re.compile(r"(\d{2}00)s") +MIGHT_BE_DECADE = re.compile(r"(\d{3}0)s") + +APPROX_CENTURY_RE = re.compile( + r"\b(ca?\.?) ?(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?" +) +UNCERTAIN_CENTURY_RE = re.compile( + r"(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?\?" +) + +APPROX_CE_RE = re.compile(r"\b(ca?\.?) ?(\d{1,4}) (ad|ce|bc|bce)") +UNCERTAIN_CE_RE = re.compile(r"(\d{1,4}) (ad|ce|bc|bce)\?") + +MENTIONS_YEAR = re.compile(r"\byear\b.+(in|during)\b") +MENTIONS_MONTH = re.compile(r"\bmonth\b.+(in|during)\b") +MENTIONS_DAY = re.compile(r"\bday\b.+(in|during)\b") # Set of RE rules that will cause us to abort text processing, since we know # the results will be wrong. -REJECT_RULES = ( - r".*dynasty.*", # Don't parse '23rd Dynasty' to 'uuuu-uu-23' -) +REJECT_RULES = re.compile(r".*dynasty.*") # Don't parse '23rd Dynasty' to 'uuuu-uu-23' -def text_to_edtf(text): +@functools.lru_cache +def text_to_edtf(text: str) -> Optional[str]: """ Generate EDTF string equivalent of a given natural language date string. """ @@ -35,7 +63,7 @@ def text_to_edtf(text): t = text.lower() # try parsing the whole thing - result = text_to_edtf_date(t) + result: Optional[str] = text_to_edtf_date(t) if not result: # split by list delims and move fwd with the first thing that returns a non-empty string. @@ -43,7 +71,8 @@ def text_to_edtf(text): for split in [",", ";", "or"]: for list_item in t.split(split): # try parsing as an interval - split by '-' - toks = list_item.split("-") + toks: list[str] = list_item.split("-") + if len(toks) == 2: d1 = toks[0].strip() d2 = toks[1].strip() @@ -51,19 +80,20 @@ def text_to_edtf(text): # match looks from the beginning of the string, search # looks anywhere. - if re.match(r"\d\D\b", d2): # 1-digit year partial e.g. 1868-9 + if re.match( + ONE_DIGIT_PARTIAL_FIRST, d2 + ): # 1-digit year partial e.g. 1868-9 if re.search( - r"\b\d\d\d\d$", d1 + PARTIAL_CHECK, d1 ): # TODO: evaluate it and see if it's a year d2 = d1[-4:-1] + d2 - elif re.match(r"\d\d\b", d2): # 2-digit year partial e.g. 1809-10 - if re.search(r"\b\d\d\d\d$", d1): + elif re.match( + TWO_DIGIT_PARTIAL_FIRST, d2 + ): # 2-digit year partial e.g. 1809-10 + if re.search(PARTIAL_CHECK, d1): d2 = d1[-4:-2] + d2 else: - century_range_match = re.search( - r"\b(\d\d)(th|st|nd|rd|)-(\d\d)(th|st|nd|rd) [cC]", - f"{d1}-{d2}", - ) + century_range_match = re.search(CENTURY_RANGE, f"{d1}-{d2}") if century_range_match: g = century_range_match.groups() d1 = f"{g[0]}C" @@ -73,7 +103,7 @@ def text_to_edtf(text): r2 = text_to_edtf_date(d2) if r1 and r2: - result = r1 + "/" + r2 + result = f"{r1}/{r2}" return result # is it an either/or year "1838/1862" - that has a different @@ -82,7 +112,7 @@ def text_to_edtf(text): # This whole section could be more friendly. else: - int_match = re.search(r"(\d\d\d\d)\/(\d\d\d\d)", list_item) + int_match = re.search(SLASH_YEAR, list_item) if int_match: return f"[{int_match.group(1)}, {int_match.group(2)}]" @@ -92,21 +122,19 @@ def text_to_edtf(text): if result: break - is_before = re.findall(r"\bbefore\b", t) - is_before = is_before or re.findall(r"\bearlier\b", t) - - is_after = re.findall(r"\bafter\b", t) - is_after = is_after or re.findall(r"\bsince\b", t) - is_after = is_after or re.findall(r"\blater\b", t) + is_before = re.findall(BEFORE_CHECK, t) + is_after = re.findall(AFTER_CHECK, t) if is_before: - result = f"/{result}" # unknown is replaced with null for intervals + result = f"/{result}" elif is_after: - result = f"{result}/" # unknown is replaced with null for intervals + result = f"{result}/" + return result -def text_to_edtf_date(text): +@functools.lru_cache +def text_to_edtf_date(text: str) -> Optional[str]: """ Return EDTF string equivalent of a given natural language date string. @@ -115,37 +143,28 @@ def text_to_edtf_date(text): differ are undefined. """ if not text: - return + return None t = text.lower() - result = "" + result: str = "" - for reject_re in REJECT_RULES: - if re.match(reject_re, t): - return + if re.match(REJECT_RULES, t): + return None # matches on '1800s'. Needs to happen before is_decade. - could_be_century = re.findall(r"(\d{2}00)s", t) + could_be_century: list = re.findall(MIGHT_BE_CENTURY, t) # matches on '1800s' and '1910s'. Removes the 's'. # Needs to happen before is_uncertain because e.g. "1860s?" - t, is_decade = re.subn(r"(\d{3}0)s", r"\1", t) + t, is_decade = re.subn(MIGHT_BE_DECADE, r"\1", t) # detect approximation signifiers # a few 'circa' abbreviations just before the year - is_approximate = re.findall(r"\b(ca?\.?) ?\d{4}", t) + is_approximate = re.findall(APPROX_CHECK, t) # the word 'circa' anywhere - is_approximate = is_approximate or re.findall(r"\bcirca\b", t) - # the word 'approx'/'around'/'about' anywhere - is_approximate = is_approximate or re.findall(r"\b(approx|around|about)", t) - # a ~ before a year-ish number - is_approximate = is_approximate or re.findall(r"\b~\d{4}", t) - # a ~ at the beginning - is_approximate = is_approximate or re.findall(r"^~", t) # detect uncertainty signifiers - t, is_uncertain = re.subn(r"(\d{4})\?", r"\1", t) - # the words uncertain/maybe/guess anywhere - is_uncertain = is_uncertain or re.findall(r"\b(uncertain|possibly|maybe|guess)", t) + t, is_uncertain = re.subn(UNCERTAIN_REPL, r"\1", t) + is_uncertain = is_uncertain or re.findall(UNCERTAIN_CHECK, t) # detect century forms is_century = re.findall(CENTURY_RE, t) @@ -154,31 +173,28 @@ def text_to_edtf_date(text): is_ce = re.findall(CE_RE, t) if is_century: result = "%02dXX" % (int(is_century[0][0]) - 1,) - is_approximate = is_approximate or re.findall(r"\b(ca?\.?) ?" + CENTURY_RE, t) - is_uncertain = is_uncertain or re.findall(CENTURY_RE + r"\?", t) + is_approximate = is_approximate or re.findall(APPROX_CENTURY_RE, t) + is_uncertain = is_uncertain or re.findall(UNCERTAIN_CENTURY_RE, t) try: - is_bc = is_century[0][-1] in ("bc", "bce") - if is_bc: + if is_century[0][-1] in ("bc", "bce"): result = f"-{result}" except IndexError: pass elif is_ce: result = "%04d" % (int(is_ce[0][0])) - is_approximate = is_approximate or re.findall(r"\b(ca?\.?) ?" + CE_RE, t) - is_uncertain = is_uncertain or re.findall(CE_RE + r"\?", t) + is_approximate = is_approximate or re.findall(APPROX_CE_RE, t) + is_uncertain = is_uncertain or re.findall(UNCERTAIN_CE_RE, t) try: - is_bc = is_ce[0][-1] in ("bc", "bce") - if is_bc: + if is_ce[0][-1] in ("bc", "bce"): result = f"-{result}" except IndexError: pass else: # try dateutil.parse - try: # parse twice, using different defaults to see what was # parsed and what was guessed. @@ -205,15 +221,15 @@ def text_to_edtf_date(text): if dt1.date() == DEFAULT_DATE_1.date() and dt2.date() == DEFAULT_DATE_2.date(): # couldn't parse anything - defaults are untouched. - return + return None date1 = dt1.isoformat()[:10] date2 = dt2.isoformat()[:10] # guess precision of 'unspecified' characters to use - mentions_year = re.findall(r"\byear\b.+(in|during)\b", t) - mentions_month = re.findall(r"\bmonth\b.+(in|during)\b", t) - mentions_day = re.findall(r"\bday\b.+(in|during)\b", t) + mentions_year = re.findall(MENTIONS_YEAR, t) + mentions_month = re.findall(MENTIONS_MONTH, t) + mentions_day = re.findall(MENTIONS_DAY, t) for i in range(len(date1)): # if the given year could be a century (e.g. '1800s') then use @@ -221,18 +237,18 @@ def text_to_edtf_date(text): # a century or a decade. if i == 2 and could_be_century and not (is_approximate or is_uncertain): result += "X" - elif i == 3 and is_decade > 0: + elif i == 3 and is_decade: if mentions_year: - result += "X" # previously year precision - now just X + result += "X" # year precision else: - result += "X" # previously decade precision - now just X + result += "X" # decade precision elif date1[i] == date2[i]: # since both attempts at parsing produced the same result # it must be parsed value, not a default result += date1[i] else: # different values were produced, meaning that it's likely - # a default. Use 'X' + # a default. Use 'unspecified' result += "X" # strip off unknown chars from end of string - except the first 4 diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index ed03355..14728f0 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -4,6 +4,7 @@ from datetime import date, datetime from operator import add, sub from time import struct_time +from typing import Optional from dateutil.relativedelta import relativedelta @@ -27,7 +28,7 @@ PRECISION_DAY = "day" -def days_in_month(year, month): +def days_in_month(year: int, month: int) -> int: """ Return the number of days in the given year and month, where month is 1=January to 12=December, and respecting leap years as identified by @@ -125,7 +126,7 @@ def __init__(self, *args, **kwargs): def __str__(self): raise NotImplementedError - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): raise NotImplementedError def lower_strict(self): @@ -134,36 +135,37 @@ def lower_strict(self): def upper_strict(self): return self._strict_date(lean=LATEST) - def _get_fuzzy_padding(self, lean): + def _get_fuzzy_padding(self, lean: str): """ Subclasses should override this to pad based on how precise they are. """ return relativedelta(0) - def get_is_approximate(self): + def get_is_approximate(self) -> bool: return getattr(self, "_is_approximate", False) - def set_is_approximate(self, val): + def set_is_approximate(self, val: bool) -> None: self._is_approximate = val - is_approximate = property(get_is_approximate, set_is_approximate) + is_approximate = property(get_is_approximate, set_is_approximate) # noqa - def get_is_uncertain(self): + def get_is_uncertain(self) -> bool: return getattr(self, "_is_uncertain", False) - def set_is_uncertain(self, val): + def set_is_uncertain(self, val: bool) -> None: self._is_uncertain = val - is_uncertain = property(get_is_uncertain, set_is_uncertain) + is_uncertain = property(get_is_uncertain, set_is_uncertain) # noqa - def get_is_uncertain_and_approximate(self): + def get_is_uncertain_and_approximate(self) -> bool: return getattr(self, "_uncertain_and_approximate", False) - def set_is_uncertain_and_approximate(self, val): + def set_is_uncertain_and_approximate(self, val: bool) -> None: self._uncertain_and_approximate = val is_uncertain_and_approximate = property( - get_is_uncertain_and_approximate, set_is_uncertain_and_approximate + get_is_uncertain_and_approximate, # noqa + set_is_uncertain_and_approximate, # noqa ) def lower_fuzzy(self): @@ -241,56 +243,71 @@ def __le__(self, other): class Date(EDTFObject): - def set_year(self, y): + def __init__( # noqa + self, + year: Optional[str] = None, + month: Optional[str] = None, + day: Optional[str] = None, + significant_digits=None, + **kwargs, + ): + for param in ("date", "lower", "upper"): + if param in kwargs: + self.__init__(**kwargs[param]) + return + + self._year = year # Year is required, but sometimes passed in as a 'date' dict. + self._month = month + self._day = day + self.significant_digits = ( + int(significant_digits) if significant_digits else None + ) + + def set_year(self, y: str): if y is None: raise AttributeError("Year must not be None") self._year = y - def get_year(self): + def get_year(self) -> str: return self._year - year = property(get_year, set_year) + year = property(get_year, set_year) # noqa - def set_month(self, m): + def set_month(self, m: Optional[str]): self._month = m if m is None: - self.day = None + self._day = None - def get_month(self): + def get_month(self) -> Optional[str]: return self._month - month = property(get_month, set_month) + month = property(get_month, set_month) # noqa - def __init__( - self, year=None, month=None, day=None, significant_digits=None, **kwargs - ): - for param in ("date", "lower", "upper"): - if param in kwargs: - self.__init__(**kwargs[param]) - return + def set_day(self, d: Optional[str]): + self._day = d + if d is None: + self._day = None - self.year = year # Year is required, but sometimes passed in as a 'date' dict. - self.month = month - self.day = day - self.significant_digits = ( - int(significant_digits) if significant_digits else None - ) + def get_day(self) -> Optional[str]: + return self._day + + day = property(get_day, set_day) # noqa def __str__(self): - r = self.year - if self.month: - r += f"-{self.month}" - if self.day: - r += f"-{self.day}" + r = self._year + if self._month: + r += f"-{self._month}" + if self._day: + r += f"-{self._day}" if self.significant_digits: r += f"S{self.significant_digits}" return r def isoformat(self, default=date.max): return "%s-%02d-%02d" % ( - self.year, - int(self.month or default.month), - int(self.day or default.day), + self._year, + int(self._month or default.month), + int(self._day or default.day), ) def lower_fuzzy(self): @@ -299,10 +316,10 @@ def lower_fuzzy(self): sub, self.lower_strict(), self._get_fuzzy_padding(EARLIEST) ) else: - total_digits = len(self.year) + total_digits = len(self._year) insignificant_digits = total_digits - self.significant_digits lower_year = ( - int(self.year) + int(self._year) // (10**insignificant_digits) * (10**insignificant_digits) ) @@ -314,9 +331,9 @@ def upper_fuzzy(self): add, self.upper_strict(), self._get_fuzzy_padding(LATEST) ) else: - total_digits = len(self.year) + total_digits = len(self._year) insignificant_digits = total_digits - self.significant_digits - upper_year = (int(self.year) // (10**insignificant_digits) + 1) * ( + upper_year = (int(self._year) // (10**insignificant_digits) + 1) * ( 10**insignificant_digits ) - 1 return struct_time( @@ -326,23 +343,23 @@ def upper_fuzzy(self): def _precise_year(self, lean): # Replace any ambiguous characters in the year string with 0s or 9s if lean == EARLIEST: - return int(re.sub(r"X", r"0", self.year)) + return int(re.sub(r"X", r"0", self._year)) else: - return int(re.sub(r"X", r"9", self.year)) + return int(re.sub(r"X", r"9", self._year)) def _precise_month(self, lean): - if self.month and self.month != "XX": + if self._month and self._month != "XX": try: - return int(self.month) + return int(self._month) except ValueError as err: raise ValueError( - f"Couldn't convert {self.month} to int (in {self})" + f"Couldn't convert {self._month} to int (in {self})" ) from err else: return 1 if lean == EARLIEST else 12 def _precise_day(self, lean): - if not self.day or self.day == "XX": + if not self._day or self._day == "XX": if lean == EARLIEST: return 1 else: @@ -350,9 +367,9 @@ def _precise_day(self, lean): self._precise_year(LATEST), self._precise_month(LATEST) ) else: - return int(self.day) + return int(self._day) - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): """ Return a `time.struct_time` representation of the date. """ @@ -368,9 +385,9 @@ def _strict_date(self, lean): @property def precision(self): - if self.day: + if self._day: return PRECISION_DAY - if self.month: + if self._month: return PRECISION_MONTH return PRECISION_YEAR @@ -379,7 +396,7 @@ def estimated(self): class DateAndTime(EDTFObject): - def __init__(self, date, time): + def __init__(self, date, time): # noqa: super raises not implemented self.date = date self.time = time @@ -389,7 +406,7 @@ def __str__(self): def isoformat(self): return self.date.isoformat() + "T" + self.time - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): return self.date._strict_date(lean) def __eq__(self, other): @@ -408,14 +425,14 @@ def __ne__(self, other): class Interval(EDTFObject): - def __init__(self, lower, upper): + def __init__(self, lower, upper): # noqa: super() raises not implemented self.lower = lower self.upper = upper def __str__(self): return f"{self.lower}/{self.upper}" - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): if lean == EARLIEST: r = self.lower._strict_date(lean) else: @@ -438,7 +455,7 @@ def parse_action(cls, toks): args = toks.asList() return cls(*args) - def __init__(self, *args): + def __init__(self, *args): # noqa: super() raises not implemented if len(args) != 1: raise AssertionError("UA must have exactly one argument") ua = args[0] @@ -467,7 +484,7 @@ def _get_multiplier(self): class UncertainOrApproximate(EDTFObject): - def __init__(self, date, ua): + def __init__(self, date, ua): # noqa: super() raises not implemented self.date = date self.ua = ua self.is_uncertain = ua.is_uncertain if ua else False @@ -482,7 +499,7 @@ def __str__(self): else: return str(self.date) - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): return self.date._strict_date(lean) def _get_fuzzy_padding(self, lean): @@ -511,7 +528,7 @@ def _get_fuzzy_padding(self, lean): class UnspecifiedIntervalSection(EDTFObject): - def __init__(self, sectionOpen=False, other_section_element=None): + def __init__(self, sectionOpen=False, other_section_element=None): # noqa: super() raises not implemented if sectionOpen: self.is_open = True self.is_unknown = False @@ -526,14 +543,17 @@ def __str__(self): else: return ".." - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): + if lean not in (EARLIEST, LATEST): + raise ValueError("lean must be one of EARLIEST or LATEST") + if lean == EARLIEST: if self.is_unknown: upper = self.other._strict_date(LATEST) return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) else: return -math.inf - else: + elif lean == LATEST: if self.is_unknown: lower = self.other._strict_date(EARLIEST) return apply_delta(add, lower, appsettings.DELTA_IF_UNKNOWN) @@ -696,7 +716,7 @@ def precision(self): class Level1Interval(Interval): - def __init__(self, lower=None, upper=None): + def __init__(self, lower: Optional[dict] = None, upper: Optional[dict] = None): # noqa if lower: if lower["date"] == "..": self.lower = UnspecifiedIntervalSection( @@ -719,8 +739,10 @@ def __init__(self, lower=None, upper=None): self.upper = UnspecifiedIntervalSection( False, UncertainOrApproximate(**lower) ) - self.is_approximate = self.lower.is_approximate or self.upper.is_approximate - self.is_uncertain = self.lower.is_uncertain or self.upper.is_uncertain + self.is_approximate: bool = ( + self.lower.is_approximate or self.upper.is_approximate + ) + self.is_uncertain: bool = self.lower.is_uncertain or self.upper.is_uncertain self.is_uncertain_and_approximate = ( self.lower.is_uncertain_and_approximate or self.upper.is_uncertain_and_approximate @@ -734,7 +756,7 @@ def _get_fuzzy_padding(self, lean): class LongYear(EDTFObject): - def __init__(self, year, significant_digits=None): + def __init__(self, year: str, significant_digits: Optional[str] = None): # noqa self.year = year self.significant_digits = ( int(significant_digits) if significant_digits else None @@ -749,7 +771,7 @@ def __str__(self): def _precise_year(self): return int(self.year) - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): py = self._precise_year() if lean == EARLIEST: return struct_time([py, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) @@ -797,7 +819,7 @@ def upper_fuzzy(self): class Season(Date): - def __init__(self, year, season, **kwargs): + def __init__(self, year, season, **kwargs): # noqa self.year = year self.season = season # use season to look up month # day isn't part of the 'season' spec, but it helps the inherited @@ -811,20 +833,15 @@ def _precise_month(self, lean): rng = appsettings.SEASON_L2_MONTHS_RANGE[int(self.season)] if lean == EARLIEST: return rng[0] - else: - return rng[1] + + return rng[1] # (* ************************** Level 2 *************************** *) class PartialUncertainOrApproximate(Date): - def set_year(self, y): # Year can be None. - self._year = y - - year = property(Date.get_year, set_year) - - def __init__( + def __init__( # noqa self, year=None, month=None, @@ -909,12 +926,17 @@ def __str__(self): return result - def _precise_year(self, lean): + def set_year(self, y): # Year can be None. + self._year = y + + year = property(Date.get_year, set_year) # noqa + + def _precise_year(self, lean: str): if self.season: return self.season._precise_year(lean) return super()._precise_year(lean) - def _precise_month(self, lean): + def _precise_month(self, lean: str): if self.season: return self.season._precise_month(lean) return super()._precise_month(lean) @@ -992,7 +1014,7 @@ class PartialUnspecified(Unspecified): class Consecutives(Interval): # Treating Consecutive ranges as intervals where one bound is optional - def __init__(self, lower=None, upper=None): + def __init__(self, lower=None, upper=None): # noqa if lower and not isinstance(lower, EDTFObject): self.lower = Date.parse(lower) else: @@ -1018,18 +1040,19 @@ def __str__(self): class OneOfASet(EDTFObject): + def __init__(self, *args): # noqa + self.objects = args + @classmethod def parse_action(cls, toks): args = [t for t in toks.asList() if isinstance(t, EDTFObject)] return cls(*args) - def __init__(self, *args): - self.objects = args - def __str__(self): - return "[{}]".format(", ".join([str(o) for o in self.objects])) + repr: str = ", ".join([str(o) for o in self.objects]) + return f"[{repr}]" - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): strict_dates = [x._strict_date(lean) for x in self.objects] # Accounting for possible 'inf' and '-inf' values if lean == LATEST: @@ -1051,34 +1074,35 @@ def _strict_date(self, lean): class MultipleDates(EDTFObject): + def __init__(self, *args): # noqa + self.objects = args + @classmethod def parse_action(cls, toks): args = [t for t in toks.asList() if isinstance(t, EDTFObject)] return cls(*args) - def __init__(self, *args): - self.objects = args - def __str__(self): - return "{{{}}}".format(", ".join([str(o) for o in self.objects])) + repr: str = ", ".join([str(o) for o in self.objects]) + return f"{{{repr}}}" - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): if lean == LATEST: return max([x._strict_date(lean) for x in self.objects]) - else: - return min([x._strict_date(lean) for x in self.objects]) + return min([x._strict_date(lean) for x in self.objects]) class Level2Interval(Level1Interval): - def __init__(self, lower, upper): + def __init__(self, lower, upper): # noqa # Check whether incoming lower/upper values are single-item lists, and # if so take just the first item. This works around what I *think* is a - # bug in the grammer that provides us with single-item lists of + # bug in the grammar that provides us with single-item lists of # `PartialUncertainOrApproximate` items for lower/upper values. if isinstance(lower, (tuple, list)) and len(lower) == 1: self.lower = lower[0] else: self.lower = lower + if isinstance(lower, (tuple, list)) and len(upper) == 1: self.upper = upper[0] else: @@ -1096,7 +1120,7 @@ class Level2Season(Season): class ExponentialYear(LongYear): - def __init__(self, base, exponent, significant_digits=None): + def __init__(self, base, exponent, significant_digits=None): # noqa self.base = base self.exponent = exponent self.significant_digits = ( @@ -1106,13 +1130,13 @@ def __init__(self, base, exponent, significant_digits=None): def _precise_year(self): return int(self.base) * 10 ** int(self.exponent) - def get_year(self): + def get_year(self) -> str: if self.significant_digits: return f"{self.base}E{self.exponent}S{self.significant_digits}" else: return f"{self.base}E{self.exponent}" - year = property(get_year) + year = property(get_year) # noqa def estimated(self): return self._precise_year() diff --git a/pyproject.toml b/pyproject.toml index b48c3f7..8826b99 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,8 @@ [project] name = "edtf" version = "5.0.0" +license = { file = "LICENSE" } +keywords = ['edtf'] dependencies = [ "python-dateutil", "pyparsing", @@ -16,7 +18,8 @@ authors = [ { name = "Mark Finger" }, { name = "Sabine Müller" }, { name = "Cole Crawford" }, - { name = "Klaus Rettinghaus" } + { name = "Klaus Rettinghaus" }, + { name = "Andrew Hankinson", email = "andrew.hankinson@rism.digital" }, ] maintainers = [ { name = "The Interaction Consortium", email = "studio@interaction.net.au" }