Skip to content

Commit

Permalink
fix: drop joblib dependency (#1090)
Browse files Browse the repository at this point in the history
Joblib was only used for `joblib.hash` for dataframes, but there's `hash_pandas_object` for that.

Tangentially refs #1056
  • Loading branch information
akx authored and vascoalramos committed Oct 21, 2022
1 parent c29094c commit e4983c5
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 5 deletions.
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
joblib~=1.2.0 # 1.1.0
scipy>=1.4.1, <1.10
scipy>=1.4.1, <1.10
pandas>1.1, <1.6, !=1.4.0
matplotlib>=3.2, <3.6
pydantic>=1.8.1, <1.11
Expand Down
17 changes: 14 additions & 3 deletions src/pandas_profiling/utils/dataframe.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""Utils for pandas DataFrames."""
import hashlib
import re
import unicodedata
import warnings
from pathlib import Path
from typing import Any, Optional

import joblib
import pandas as pd
from pandas.core.util.hashing import hash_pandas_object


def warn_read(extension: str) -> None:
Expand Down Expand Up @@ -180,16 +181,26 @@ def expand_mixed(df: pd.DataFrame, types: Any = None) -> pd.DataFrame:
return df


# Change this if `hash_dataframe`'s implementation changes.
HASH_PREFIX = "2@"


def hash_dataframe(df: pd.DataFrame) -> str:
"""Hash a DataFrame (wrapper around joblib.hash, might change in the future)
"""Hash a DataFrame (implementation might change in the future)
Args:
df: the DataFrame
Returns:
The DataFrame's hash
"""
return joblib.hash(df)
# hash_pandas_object returns a series of uint64s. Using their
# binary representation would be more efficient, but it's not
# necessarily portable across architectures. Using the human-readable
# string values should be good enough.
hash_values = "\n".join(hash_pandas_object(df).values.astype(str))
digest = hashlib.sha256(hash_values.encode("utf-8")).hexdigest()
return f"{HASH_PREFIX}{digest}"


def slugify(value: str, allow_unicode: bool = False) -> str:
Expand Down

0 comments on commit e4983c5

Please sign in to comment.