Skip to content

Commit

Permalink
Implement dehumanize method (#956)
Browse files Browse the repository at this point in the history
* Created new dehumanize method for Arrow objects.

* Added comments as per style guide and improved input validation and test cases. Implemented fixes as per discussion.

* Fixed issue with failed Bengali test case.

* Correct grammar errors in comments and error messages fixed.

* Added Dehumanize information to index.rst

* Edited Dehumaize documentation in index.rst to be clearer.

* Fixed linting issue with docs
  • Loading branch information
anishnya authored Apr 25, 2021
1 parent 1310dbb commit 699198c
Show file tree
Hide file tree
Showing 6 changed files with 752 additions and 6 deletions.
131 changes: 130 additions & 1 deletion arrow/arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@


import calendar
import re
import sys
from datetime import date
from datetime import datetime as dt_datetime
Expand All @@ -32,7 +33,7 @@
from dateutil.relativedelta import relativedelta

from arrow import formatter, locales, parser, util
from arrow.constants import DEFAULT_LOCALE
from arrow.constants import DEFAULT_LOCALE, DEHUMANIZE_LOCALES
from arrow.locales import TimeFrameLiteral

if sys.version_info < (3, 8): # pragma: no cover
Expand Down Expand Up @@ -1298,6 +1299,134 @@ def gather_timeframes(_delta: float, _frame: TimeFrameLiteral) -> float:
"Please consider making a contribution to this locale."
)

def dehumanize(self, timestring: str, locale: str = "en_us") -> "Arrow":
"""Returns a new :class:`Arrow <arrow.arrow.Arrow>` object, that represents
the time difference relative to the attrbiutes of the
:class:`Arrow <arrow.arrow.Arrow>` object.
:param timestring: a ``str`` representing a humanized relative time.
:param locale: (optional) a ``str`` specifying a locale. Defaults to 'en-us'.
Usage::
>>> arw = arrow.utcnow()
>>> arw
<Arrow [2021-04-20T22:27:34.787885+00:00]>
>>> earlier = arw.dehumanize("two days ago")
>>> earlier
<Arrow [2021-04-18T22:27:34.787885+00:00]>
>>> arw = arrow.utcnow()
>>> arw
<Arrow [2021-04-20T22:27:34.787885+00:00]>
>>> later = arw.dehumanize("in 1 month")
>>> later
<Arrow [2021-05-18T22:27:34.787885+00:00]>
"""

# Create a locale object based off given local
locale_obj = locales.get_locale(locale)

# Check to see if locale is supported
normalized_locale_name = locale.lower().replace("_", "-")

if normalized_locale_name not in DEHUMANIZE_LOCALES:
raise ValueError(
f"Dehumanize does not currently support the {locale} locale, please consider making a contribution to add support for this locale."
)

current_time = self.fromdatetime(self._datetime)

# Create an object containing the relative time info
time_object_info = dict.fromkeys(
["seconds", "minutes", "hours", "days", "weeks", "months", "years"], 0
)

# Create an object representing if unit has been seen
unit_visited = dict.fromkeys(
["now", "seconds", "minutes", "hours", "days", "weeks", "months", "years"],
False,
)

# Create a regex pattern object for numbers
num_pattern = re.compile(r"\d+")

# Search timestring for each time unit within locale
for unit in locale_obj.timeframes:

# Numeric unit of change
change_value = 0

# Replace {0} with regex \d representing digits
search_string = str(locale_obj.timeframes[unit])
search_string = search_string.format(r"\d+")

# Create search pattern and find within string
pattern = re.compile(fr"{search_string}")
match = pattern.search(timestring)

# If there is no match continue to next iteration
if not match:
continue

match_string = match.group()
num_match = num_pattern.search(match_string)

# If no number matches set change value to be one
if not num_match:
change_value = 1
else:
change_value = int(num_match.group())

# No time to update if now is the unit
if unit == "now":
unit_visited[unit] = True
continue

# Add change value to the correct unit (incorporates the plurality that exists within timeframe i.e second v.s seconds)
time_unit_to_change = str(unit)
time_unit_to_change += "s" if (str(time_unit_to_change)[-1] != "s") else ""
time_object_info[time_unit_to_change] = change_value
unit_visited[time_unit_to_change] = True

# Assert error if string does not modify any units
if not any([True for k, v in unit_visited.items() if v]):
raise ValueError(
"""Input string not valid. Note: Some locales do not support the week granulairty in Arrow.
If you are attempting to use the week granularity on an unsupported locale, this could be the cause of this error."""
)

# Sign logic
future_string = locale_obj.future
future_string = future_string.format(".*")
future_pattern = re.compile(fr"^{future_string}$")
future_pattern_match = future_pattern.findall(timestring)

past_string = locale_obj.past
past_string = past_string.format(".*")
past_pattern = re.compile(fr"^{past_string}$")
past_pattern_match = past_pattern.findall(timestring)

# If a string contains the now unit, there will be no relative units, hence the need to check if the now unit
# was visited before raising a ValueError
if past_pattern_match:
sign_val = -1
elif future_pattern_match:
sign_val = 1
elif unit_visited["now"]:
sign_val = 0
else:
raise ValueError(
"""Invalid input String. String does not contain any relative time information.
String should either represent a time in the future or a time in the past.
Ex: "in 5 seconds" or "5 seconds ago". """
)

time_changes = {k: sign_val * v for k, v in time_object_info.items()}

return current_time.shift(**time_changes)

# query functions

def is_between(
Expand Down
87 changes: 87 additions & 0 deletions arrow/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,90 @@
MIN_ORDINAL: Final[int] = 1

DEFAULT_LOCALE: Final[str] = "en-us"

# Supported dehumanize locales
DEHUMANIZE_LOCALES = {
"en",
"en-us",
"en-gb",
"en-au",
"en-be",
"en-jp",
"en-za",
"en-ca",
"en-ph",
"fr",
"fr-fr",
"fr-ca",
"it",
"it-it",
"es",
"es-es",
"el",
"el-gr",
"ja",
"ja-jp",
"sv",
"sv-se",
"zh",
"zh-cn",
"zh-tw",
"zh-hk",
"nl",
"nl-nl",
"af",
"de",
"de-de",
"de-ch",
"de-at",
"nb",
"nb-no",
"nn",
"nn-no",
"pt",
"pt-pt",
"pt-br",
"tl",
"tl-ph",
"vi",
"vi-vn",
"tr",
"tr-tr",
"az",
"az-az",
"da",
"da-dk",
"ml",
"hi",
"fa",
"fa-ir",
"mr",
"ca",
"ca-es",
"ca-ad",
"ca-fr",
"ca-it",
"eo",
"eo-xx",
"bn",
"bn-bd",
"bn-in",
"rm",
"rm-ch",
"ro",
"ro-ro",
"sl",
"sl-si",
"id",
"id-id",
"sw",
"sw-ke",
"sw-tz",
"la",
"la-va",
"lt",
"lt-lt",
"ms",
"ms-my",
"ms-bn",
}
6 changes: 3 additions & 3 deletions arrow/locales.py
Original file line number Diff line number Diff line change
Expand Up @@ -2929,7 +2929,7 @@ class HindiLocale(Locale):
month_abbreviations = [
"",
"जन",
"फ़र",
"फ़र",
"मार्च",
"अप्रै",
"मई",
Expand Down Expand Up @@ -3821,7 +3821,7 @@ class BengaliLocale(Locale):
month_names = [
"",
"জানুয়ারি",
"ফেব্রুয়ারি",
"ফেব্রুয়ারি",
"মার্চ",
"এপ্রিল",
"মে",
Expand Down Expand Up @@ -3867,7 +3867,7 @@ def _ordinal_number(self, n: int) -> str:
if n in [1, 5, 7, 8, 9, 10]:
return f"{n}ম"
if n in [2, 3]:
return f"{n}"
return f"{n}য়"
if n == 4:
return f"{n}র্থ"
if n == 6:
Expand Down
35 changes: 35 additions & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,41 @@ Support for a growing number of locales (see ``locales.py`` for supported langua
>>> future.humanize(a, locale='ru')
'через 2 час(а,ов)'
Dehumanize
~~~~~~~~~~

Take a human readable string and use it to shift into a past time:

.. code-block:: python
>>> arw = arrow.utcnow()
>>> arw
<Arrow [2021-04-20T22:27:34.787885+00:00]>
>>> earlier = arw.dehumanize("two days ago")
>>> earlier
<Arrow [2021-04-18T22:27:34.787885+00:00]>
Or use it to shift into a future time:

.. code-block:: python
>>> arw = arrow.utcnow()
>>> arw
<Arrow [2021-04-20T22:27:34.787885+00:00]>
>>> later = arw.dehumanize("in 1 month")
>>> later
<Arrow [2021-05-18T22:27:34.787885+00:00]>
Support for a growing number of locales (see ``constants.py`` for supported languages):

.. code-block:: python
>>> arw = arrow.utcnow()
>>> arw
<Arrow [2021-04-20T22:27:34.787885+00:00]>
>>> later = arw.dehumanize("एक माह बाद", locale="hi")
>>> later
<Arrow [2021-05-18T22:27:34.787885+00:00]>
Ranges & Spans
~~~~~~~~~~~~~~
Expand Down
Loading

0 comments on commit 699198c

Please sign in to comment.