From 699198c4d2f7dee82f92803a87f6312710dcc387 Mon Sep 17 00:00:00 2001 From: Anish Nyayachavadi <55898433+anishnya@users.noreply.github.com> Date: Sun, 25 Apr 2021 12:34:58 -0400 Subject: [PATCH] Implement dehumanize method (#956) * Created new dehumanize method for Arrow objects. * Added comments as per style guide and improved input validation and test cases. Implemented fixes as per discussion. * Fixed issue with failed Bengali test case. * Correct grammar errors in comments and error messages fixed. * Added Dehumanize information to index.rst * Edited Dehumaize documentation in index.rst to be clearer. * Fixed linting issue with docs --- arrow/arrow.py | 131 ++++++++++- arrow/constants.py | 87 ++++++++ arrow/locales.py | 6 +- docs/index.rst | 35 +++ tests/test_arrow.py | 497 +++++++++++++++++++++++++++++++++++++++++- tests/test_locales.py | 2 +- 6 files changed, 752 insertions(+), 6 deletions(-) diff --git a/arrow/arrow.py b/arrow/arrow.py index 0de43889d..515c0ab66 100644 --- a/arrow/arrow.py +++ b/arrow/arrow.py @@ -6,6 +6,7 @@ import calendar +import re import sys from datetime import date from datetime import datetime as dt_datetime @@ -32,7 +33,7 @@ from dateutil.relativedelta import relativedelta from arrow import formatter, locales, parser, util -from arrow.constants import DEFAULT_LOCALE +from arrow.constants import DEFAULT_LOCALE, DEHUMANIZE_LOCALES from arrow.locales import TimeFrameLiteral if sys.version_info < (3, 8): # pragma: no cover @@ -1298,6 +1299,134 @@ def gather_timeframes(_delta: float, _frame: TimeFrameLiteral) -> float: "Please consider making a contribution to this locale." ) + def dehumanize(self, timestring: str, locale: str = "en_us") -> "Arrow": + """Returns a new :class:`Arrow ` object, that represents + the time difference relative to the attrbiutes of the + :class:`Arrow ` object. + + :param timestring: a ``str`` representing a humanized relative time. + :param locale: (optional) a ``str`` specifying a locale. Defaults to 'en-us'. + + Usage:: + + >>> arw = arrow.utcnow() + >>> arw + + >>> earlier = arw.dehumanize("two days ago") + >>> earlier + + + >>> arw = arrow.utcnow() + >>> arw + + >>> later = arw.dehumanize("in 1 month") + >>> later + + + """ + + # Create a locale object based off given local + locale_obj = locales.get_locale(locale) + + # Check to see if locale is supported + normalized_locale_name = locale.lower().replace("_", "-") + + if normalized_locale_name not in DEHUMANIZE_LOCALES: + raise ValueError( + f"Dehumanize does not currently support the {locale} locale, please consider making a contribution to add support for this locale." + ) + + current_time = self.fromdatetime(self._datetime) + + # Create an object containing the relative time info + time_object_info = dict.fromkeys( + ["seconds", "minutes", "hours", "days", "weeks", "months", "years"], 0 + ) + + # Create an object representing if unit has been seen + unit_visited = dict.fromkeys( + ["now", "seconds", "minutes", "hours", "days", "weeks", "months", "years"], + False, + ) + + # Create a regex pattern object for numbers + num_pattern = re.compile(r"\d+") + + # Search timestring for each time unit within locale + for unit in locale_obj.timeframes: + + # Numeric unit of change + change_value = 0 + + # Replace {0} with regex \d representing digits + search_string = str(locale_obj.timeframes[unit]) + search_string = search_string.format(r"\d+") + + # Create search pattern and find within string + pattern = re.compile(fr"{search_string}") + match = pattern.search(timestring) + + # If there is no match continue to next iteration + if not match: + continue + + match_string = match.group() + num_match = num_pattern.search(match_string) + + # If no number matches set change value to be one + if not num_match: + change_value = 1 + else: + change_value = int(num_match.group()) + + # No time to update if now is the unit + if unit == "now": + unit_visited[unit] = True + continue + + # Add change value to the correct unit (incorporates the plurality that exists within timeframe i.e second v.s seconds) + time_unit_to_change = str(unit) + time_unit_to_change += "s" if (str(time_unit_to_change)[-1] != "s") else "" + time_object_info[time_unit_to_change] = change_value + unit_visited[time_unit_to_change] = True + + # Assert error if string does not modify any units + if not any([True for k, v in unit_visited.items() if v]): + raise ValueError( + """Input string not valid. Note: Some locales do not support the week granulairty in Arrow. + If you are attempting to use the week granularity on an unsupported locale, this could be the cause of this error.""" + ) + + # Sign logic + future_string = locale_obj.future + future_string = future_string.format(".*") + future_pattern = re.compile(fr"^{future_string}$") + future_pattern_match = future_pattern.findall(timestring) + + past_string = locale_obj.past + past_string = past_string.format(".*") + past_pattern = re.compile(fr"^{past_string}$") + past_pattern_match = past_pattern.findall(timestring) + + # If a string contains the now unit, there will be no relative units, hence the need to check if the now unit + # was visited before raising a ValueError + if past_pattern_match: + sign_val = -1 + elif future_pattern_match: + sign_val = 1 + elif unit_visited["now"]: + sign_val = 0 + else: + raise ValueError( + """Invalid input String. String does not contain any relative time information. + String should either represent a time in the future or a time in the past. + Ex: "in 5 seconds" or "5 seconds ago". """ + ) + + time_changes = {k: sign_val * v for k, v in time_object_info.items()} + + return current_time.shift(**time_changes) + # query functions def is_between( diff --git a/arrow/constants.py b/arrow/constants.py index 2589592fd..23d886a4e 100644 --- a/arrow/constants.py +++ b/arrow/constants.py @@ -36,3 +36,90 @@ MIN_ORDINAL: Final[int] = 1 DEFAULT_LOCALE: Final[str] = "en-us" + +# Supported dehumanize locales +DEHUMANIZE_LOCALES = { + "en", + "en-us", + "en-gb", + "en-au", + "en-be", + "en-jp", + "en-za", + "en-ca", + "en-ph", + "fr", + "fr-fr", + "fr-ca", + "it", + "it-it", + "es", + "es-es", + "el", + "el-gr", + "ja", + "ja-jp", + "sv", + "sv-se", + "zh", + "zh-cn", + "zh-tw", + "zh-hk", + "nl", + "nl-nl", + "af", + "de", + "de-de", + "de-ch", + "de-at", + "nb", + "nb-no", + "nn", + "nn-no", + "pt", + "pt-pt", + "pt-br", + "tl", + "tl-ph", + "vi", + "vi-vn", + "tr", + "tr-tr", + "az", + "az-az", + "da", + "da-dk", + "ml", + "hi", + "fa", + "fa-ir", + "mr", + "ca", + "ca-es", + "ca-ad", + "ca-fr", + "ca-it", + "eo", + "eo-xx", + "bn", + "bn-bd", + "bn-in", + "rm", + "rm-ch", + "ro", + "ro-ro", + "sl", + "sl-si", + "id", + "id-id", + "sw", + "sw-ke", + "sw-tz", + "la", + "la-va", + "lt", + "lt-lt", + "ms", + "ms-my", + "ms-bn", +} diff --git a/arrow/locales.py b/arrow/locales.py index 14d762e3b..c604dfafb 100644 --- a/arrow/locales.py +++ b/arrow/locales.py @@ -2929,7 +2929,7 @@ class HindiLocale(Locale): month_abbreviations = [ "", "जन", - "फ़र", + "फ़र", "मार्च", "अप्रै", "मई", @@ -3821,7 +3821,7 @@ class BengaliLocale(Locale): month_names = [ "", "জানুয়ারি", - "ফেব্রুয়ারি", + "ফেব্রুয়ারি", "মার্চ", "এপ্রিল", "মে", @@ -3867,7 +3867,7 @@ def _ordinal_number(self, n: int) -> str: if n in [1, 5, 7, 8, 9, 10]: return f"{n}ম" if n in [2, 3]: - return f"{n}য়" + return f"{n}য়" if n == 4: return f"{n}র্থ" if n == 6: diff --git a/docs/index.rst b/docs/index.rst index 42bd73fda..3cef5a9b9 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -252,6 +252,41 @@ Support for a growing number of locales (see ``locales.py`` for supported langua >>> future.humanize(a, locale='ru') 'через 2 час(а,ов)' +Dehumanize +~~~~~~~~~~ + +Take a human readable string and use it to shift into a past time: + +.. code-block:: python + + >>> arw = arrow.utcnow() + >>> arw + + >>> earlier = arw.dehumanize("two days ago") + >>> earlier + + +Or use it to shift into a future time: + +.. code-block:: python + + >>> arw = arrow.utcnow() + >>> arw + + >>> later = arw.dehumanize("in 1 month") + >>> later + + +Support for a growing number of locales (see ``constants.py`` for supported languages): + +.. code-block:: python + + >>> arw = arrow.utcnow() + >>> arw + + >>> later = arw.dehumanize("एक माह बाद", locale="hi") + >>> later + Ranges & Spans ~~~~~~~~~~~~~~ diff --git a/tests/test_arrow.py b/tests/test_arrow.py index 473af36b5..78211f8a8 100644 --- a/tests/test_arrow.py +++ b/tests/test_arrow.py @@ -10,7 +10,7 @@ from dateutil import tz from dateutil.relativedelta import FR, MO, SA, SU, TH, TU, WE -from arrow import arrow +from arrow import arrow, locales from .utils import assert_datetime_equality @@ -2293,6 +2293,501 @@ def test_years(self): assert result == "год назад" +# Fixtures for Dehumanize +@pytest.fixture(scope="class") +def locale_list_no_weeks(): + tested_langs = [ + "en", + "en-us", + "en-gb", + "en-au", + "en-be", + "en-jp", + "en-za", + "en-ca", + "en-ph", + "fr", + "fr-fr", + "fr-ca", + "it", + "it-it", + "es", + "es-es", + "el", + "el-gr", + "ja", + "ja-jp", + "sv", + "sv-se", + "zh", + "zh-cn", + "zh-tw", + "zh-hk", + "nl", + "nl-nl", + "af", + "de", + "de-de", + "de-ch", + "de-at", + "nb", + "nb-no", + "nn", + "nn-no", + "pt", + "pt-pt", + "pt_br", + "tl", + "tl-ph", + "vi", + "vi-vn", + "tr", + "tr-tr", + "az", + "az-az", + "da", + "da-dk", + "ml", + "hi", + "fa", + "fa-ir", + "mr", + "ca", + "ca-es", + "ca-ad", + "ca-fr", + "ca-it", + "eo", + "eo-xx", + "bn", + "bn-bd", + "bn-in", + "rm", + "rm-ch", + "ro", + "ro-ro", + "sl", + "sl-si", + "id", + "id-id", + "sw", + "sw-ke", + "sw-tz", + "la", + "la-va", + "lt", + "lt-lt", + "ms", + "ms-my", + "ms-bn", + ] + + return tested_langs + + +@pytest.fixture(scope="class") +def locale_list_with_weeks(): + tested_langs = [ + "en", + "en-us", + "en-gb", + "en-au", + "en-be", + "en-jp", + "en-za", + "en-ca", + "en-ph", + "fr", + "fr-fr", + "fr-ca", + "it", + "it-it", + "es", + "es-es", + "ja", + "ja-jp", + "sv", + "sv-se", + "zh", + "zh-cn", + "zh-tw", + "zh-hk", + "nl", + "nl-nl", + "de", + "de-de", + "de-ch", + "de-at", + "pt", + "pt-pt", + "pt-br", + "tl", + "tl-ph", + "vi", + "vi-vn", + "sw", + "sw-ke", + "sw-tz", + "la", + "la-va", + "lt", + "lt-lt", + "ms", + "ms-my", + "ms-bn", + ] + + return tested_langs + + +class TestArrowDehumanize: + def test_now(self, locale_list_no_weeks): + + for lang in locale_list_no_weeks: + + arw = arrow.Arrow(2000, 6, 18, 5, 55, 0) + second_ago = arw.shift(seconds=-1) + second_future = arw.shift(seconds=1) + + second_ago_string = second_ago.humanize( + arw, locale=lang, granularity=["second"] + ) + second_future_string = second_future.humanize( + arw, locale=lang, granularity=["second"] + ) + + assert arw.dehumanize(second_ago_string, locale=lang) == arw + assert arw.dehumanize(second_future_string, locale=lang) == arw + + def test_seconds(self, locale_list_no_weeks): + + for lang in locale_list_no_weeks: + + arw = arrow.Arrow(2000, 6, 18, 5, 55, 0) + second_ago = arw.shift(seconds=-5) + second_future = arw.shift(seconds=5) + + second_ago_string = second_ago.humanize( + arw, locale=lang, granularity=["second"] + ) + second_future_string = second_future.humanize( + arw, locale=lang, granularity=["second"] + ) + + assert arw.dehumanize(second_ago_string, locale=lang) == second_ago + assert arw.dehumanize(second_future_string, locale=lang) == second_future + + def test_minute(self, locale_list_no_weeks): + + for lang in locale_list_no_weeks: + + arw = arrow.Arrow(2001, 6, 18, 5, 55, 0) + minute_ago = arw.shift(minutes=-1) + minute_future = arw.shift(minutes=1) + + minute_ago_string = minute_ago.humanize( + arw, locale=lang, granularity=["minute"] + ) + minute_future_string = minute_future.humanize( + arw, locale=lang, granularity=["minute"] + ) + + assert arw.dehumanize(minute_ago_string, locale=lang) == minute_ago + assert arw.dehumanize(minute_future_string, locale=lang) == minute_future + + def test_minutes(self, locale_list_no_weeks): + + for lang in locale_list_no_weeks: + + arw = arrow.Arrow(2007, 1, 10, 5, 55, 0) + minute_ago = arw.shift(minutes=-5) + minute_future = arw.shift(minutes=5) + + minute_ago_string = minute_ago.humanize( + arw, locale=lang, granularity=["minute"] + ) + minute_future_string = minute_future.humanize( + arw, locale=lang, granularity=["minute"] + ) + + assert arw.dehumanize(minute_ago_string, locale=lang) == minute_ago + assert arw.dehumanize(minute_future_string, locale=lang) == minute_future + + def test_hour(self, locale_list_no_weeks): + + for lang in locale_list_no_weeks: + + arw = arrow.Arrow(2009, 4, 20, 5, 55, 0) + hour_ago = arw.shift(hours=-1) + hour_future = arw.shift(hours=1) + + hour_ago_string = hour_ago.humanize(arw, locale=lang, granularity=["hour"]) + hour_future_string = hour_future.humanize( + arw, locale=lang, granularity=["hour"] + ) + + assert arw.dehumanize(hour_ago_string, locale=lang) == hour_ago + assert arw.dehumanize(hour_future_string, locale=lang) == hour_future + + def test_hours(self, locale_list_no_weeks): + + for lang in locale_list_no_weeks: + + arw = arrow.Arrow(2010, 2, 16, 7, 55, 0) + hour_ago = arw.shift(hours=-3) + hour_future = arw.shift(hours=3) + + hour_ago_string = hour_ago.humanize(arw, locale=lang, granularity=["hour"]) + hour_future_string = hour_future.humanize( + arw, locale=lang, granularity=["hour"] + ) + + assert arw.dehumanize(hour_ago_string, locale=lang) == hour_ago + assert arw.dehumanize(hour_future_string, locale=lang) == hour_future + + def test_week(self, locale_list_with_weeks): + + for lang in locale_list_with_weeks: + + arw = arrow.Arrow(2012, 2, 18, 1, 52, 0) + week_ago = arw.shift(weeks=-1) + week_future = arw.shift(weeks=1) + + week_ago_string = week_ago.humanize(arw, locale=lang, granularity=["week"]) + week_future_string = week_future.humanize( + arw, locale=lang, granularity=["week"] + ) + + assert arw.dehumanize(week_ago_string, locale=lang) == week_ago + assert arw.dehumanize(week_future_string, locale=lang) == week_future + + def test_weeks(self, locale_list_with_weeks): + + for lang in locale_list_with_weeks: + + arw = arrow.Arrow(2020, 3, 18, 5, 3, 0) + week_ago = arw.shift(weeks=-7) + week_future = arw.shift(weeks=7) + + week_ago_string = week_ago.humanize(arw, locale=lang, granularity=["week"]) + week_future_string = week_future.humanize( + arw, locale=lang, granularity=["week"] + ) + + assert arw.dehumanize(week_ago_string, locale=lang) == week_ago + assert arw.dehumanize(week_future_string, locale=lang) == week_future + + def test_year(self, locale_list_no_weeks): + + for lang in locale_list_no_weeks: + + arw = arrow.Arrow(2000, 1, 10, 5, 55, 0) + year_ago = arw.shift(years=-1) + year_future = arw.shift(years=1) + + year_ago_string = year_ago.humanize(arw, locale=lang, granularity=["year"]) + year_future_string = year_future.humanize( + arw, locale=lang, granularity=["year"] + ) + + assert arw.dehumanize(year_ago_string, locale=lang) == year_ago + assert arw.dehumanize(year_future_string, locale=lang) == year_future + + def test_years(self, locale_list_no_weeks): + + for lang in locale_list_no_weeks: + + arw = arrow.Arrow(2000, 1, 10, 5, 55, 0) + year_ago = arw.shift(years=-10) + year_future = arw.shift(years=10) + + year_ago_string = year_ago.humanize(arw, locale=lang, granularity=["year"]) + year_future_string = year_future.humanize( + arw, locale=lang, granularity=["year"] + ) + + assert arw.dehumanize(year_ago_string, locale=lang) == year_ago + assert arw.dehumanize(year_future_string, locale=lang) == year_future + + def test_mixed_granularity(self, locale_list_no_weeks): + + for lang in locale_list_no_weeks: + + arw = arrow.Arrow(2000, 1, 10, 5, 55, 0) + past = arw.shift(hours=-1, minutes=-1, seconds=-1) + future = arw.shift(hours=1, minutes=1, seconds=1) + + past_string = past.humanize( + arw, locale=lang, granularity=["hour", "minute", "second"] + ) + future_string = future.humanize( + arw, locale=lang, granularity=["hour", "minute", "second"] + ) + + assert arw.dehumanize(past_string, locale=lang) == past + assert arw.dehumanize(future_string, locale=lang) == future + + def test_mixed_granularity_hours(self, locale_list_no_weeks): + + for lang in locale_list_no_weeks: + + arw = arrow.Arrow(2000, 1, 10, 5, 55, 0) + past = arw.shift(hours=-3, minutes=-1, seconds=-15) + future = arw.shift(hours=3, minutes=1, seconds=15) + + past_string = past.humanize( + arw, locale=lang, granularity=["hour", "minute", "second"] + ) + future_string = future.humanize( + arw, locale=lang, granularity=["hour", "minute", "second"] + ) + + assert arw.dehumanize(past_string, locale=lang) == past + assert arw.dehumanize(future_string, locale=lang) == future + + def test_mixed_granularity_day(self, locale_list_no_weeks): + + for lang in locale_list_no_weeks: + + arw = arrow.Arrow(2000, 1, 10, 5, 55, 0) + past = arw.shift(days=-3, minutes=-1, seconds=-15) + future = arw.shift(days=3, minutes=1, seconds=15) + + past_string = past.humanize( + arw, locale=lang, granularity=["day", "minute", "second"] + ) + future_string = future.humanize( + arw, locale=lang, granularity=["day", "minute", "second"] + ) + + assert arw.dehumanize(past_string, locale=lang) == past + assert arw.dehumanize(future_string, locale=lang) == future + + def test_mixed_granularity_day_hour(self, locale_list_no_weeks): + + for lang in locale_list_no_weeks: + + arw = arrow.Arrow(2000, 1, 10, 5, 55, 0) + past = arw.shift(days=-3, hours=-23, seconds=-15) + future = arw.shift(days=3, hours=23, seconds=15) + + past_string = past.humanize( + arw, locale=lang, granularity=["day", "hour", "second"] + ) + future_string = future.humanize( + arw, locale=lang, granularity=["day", "hour", "second"] + ) + + assert arw.dehumanize(past_string, locale=lang) == past + assert arw.dehumanize(future_string, locale=lang) == future + + # Test to make sure unsupported locales error out + def test_unsupported_locale(self): + + arw = arrow.Arrow(2000, 6, 18, 5, 55, 0) + second_ago = arw.shift(seconds=-5) + second_future = arw.shift(seconds=5) + + second_ago_string = second_ago.humanize( + arw, locale="fi", granularity=["second"] + ) + second_future_string = second_future.humanize( + arw, locale="fi", granularity=["second"] + ) + + # fi is an example of many unsupported locales currently + with pytest.raises(ValueError): + arw.dehumanize(second_ago_string, locale="fi") + + with pytest.raises(ValueError): + arw.dehumanize(second_future_string, locale="fi") + + # Test to ensure old style locale strings are supported + def test_normalized_locale(self): + + arw = arrow.Arrow(2000, 6, 18, 5, 55, 0) + second_ago = arw.shift(seconds=-5) + second_future = arw.shift(seconds=5) + + second_ago_string = second_ago.humanize( + arw, locale="zh_hk", granularity=["second"] + ) + second_future_string = second_future.humanize( + arw, locale="zh_hk", granularity=["second"] + ) + + assert arw.dehumanize(second_ago_string, locale="zh_hk") == second_ago + assert arw.dehumanize(second_future_string, locale="zh_hk") == second_future + + # Ensures relative units are required in string + def test_require_relative_unit(self, locale_list_no_weeks): + + for lang in locale_list_no_weeks: + + arw = arrow.Arrow(2000, 6, 18, 5, 55, 0) + second_ago = arw.shift(seconds=-5) + second_future = arw.shift(seconds=5) + + second_ago_string = second_ago.humanize( + arw, locale=lang, granularity=["second"], only_distance=True + ) + second_future_string = second_future.humanize( + arw, locale=lang, granularity=["second"], only_distance=True + ) + + with pytest.raises(ValueError): + arw.dehumanize(second_ago_string, locale=lang) + + with pytest.raises(ValueError): + arw.dehumanize(second_future_string, locale=lang) + + # Test for scrambled input + def test_scrambled_input(self, locale_list_no_weeks): + + for lang in locale_list_no_weeks: + + arw = arrow.Arrow(2000, 6, 18, 5, 55, 0) + second_ago = arw.shift(seconds=-5) + second_future = arw.shift(seconds=5) + + second_ago_string = second_ago.humanize( + arw, locale=lang, granularity=["second"], only_distance=True + ) + second_future_string = second_future.humanize( + arw, locale=lang, granularity=["second"], only_distance=True + ) + + # Scrambles input by sorting strings + second_ago_presort = sorted(second_ago_string) + second_ago_string = "".join(second_ago_presort) + + second_future_presort = sorted(second_future_string) + second_future_string = "".join(second_future_presort) + + with pytest.raises(ValueError): + arw.dehumanize(second_ago_string, locale=lang) + + with pytest.raises(ValueError): + arw.dehumanize(second_future_string, locale=lang) + + def test_no_units_modified(self, locale_list_no_weeks): + + for lang in locale_list_no_weeks: + + arw = arrow.Arrow(2000, 6, 18, 5, 55, 0) + + # Ensures we pass the first stage of checking whether relative units exist + locale_obj = locales.get_locale(lang) + empty_past_string = locale_obj.past + empty_future_string = locale_obj.future + + with pytest.raises(ValueError): + arw.dehumanize(empty_past_string, locale=lang) + + with pytest.raises(ValueError): + arw.dehumanize(empty_future_string, locale=lang) + + class TestArrowIsBetween: def test_start_before_end(self): target = arrow.Arrow.fromdatetime(datetime(2013, 5, 7)) diff --git a/tests/test_locales.py b/tests/test_locales.py index 5e7fa7ea6..85d1cadbd 100644 --- a/tests/test_locales.py +++ b/tests/test_locales.py @@ -834,7 +834,7 @@ class TestBengaliLocale: def test_ordinal_number(self): assert self.locale._ordinal_number(0) == "0তম" assert self.locale._ordinal_number(1) == "1ম" - assert self.locale._ordinal_number(3) == "3য়" + assert self.locale._ordinal_number(3) == "3য়" assert self.locale._ordinal_number(4) == "4র্থ" assert self.locale._ordinal_number(5) == "5ম" assert self.locale._ordinal_number(6) == "6ষ্ঠ"