Skip to content

Commit

Permalink
fix: drop joblib dependency
Browse files Browse the repository at this point in the history
Joblib was only used for `joblib.hash` for dataframes, but there's `hash_pandas_object` for that.

Tangentially refs ydataai#1056
  • Loading branch information
akx committed Oct 4, 2022
1 parent 2462119 commit 0b115b4
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 5 deletions.
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
joblib~=1.2.0 # 1.1.0
scipy>=1.4.1, <1.10
scipy>=1.4.1, <1.10
pandas>1.1, <1.6, !=1.4.0
matplotlib>=3.2, <3.6
pydantic>=1.8.1, <1.10
Expand Down
17 changes: 14 additions & 3 deletions src/pandas_profiling/utils/dataframe.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""Utils for pandas DataFrames."""
import hashlib
import re
import unicodedata
import warnings
from pathlib import Path
from typing import Any, Optional

import joblib
import pandas as pd
from pandas.core.util.hashing import hash_pandas_object


def warn_read(extension: str) -> None:
Expand Down Expand Up @@ -180,16 +181,26 @@ def expand_mixed(df: pd.DataFrame, types: Any = None) -> pd.DataFrame:
return df


# Change this if `hash_dataframe`'s implementation changes.
HASH_PREFIX = "2@"


def hash_dataframe(df: pd.DataFrame) -> str:
"""Hash a DataFrame (wrapper around joblib.hash, might change in the future)
"""Hash a DataFrame (implementation might change in the future)
Args:
df: the DataFrame
Returns:
The DataFrame's hash
"""
return joblib.hash(df)
# hash_pandas_object returns a series of uint64s. Using their
# binary representation would be more efficient, but it's not
# necessarily portable across architectures. Using the human-readable
# string values should be good enough.
hash_values = "\n".join(hash_pandas_object(df).values.astype(str))
digest = hashlib.sha256(hash_values.encode("utf-8")).hexdigest()
return f"{HASH_PREFIX}{digest}"


def slugify(value: str, allow_unicode: bool = False) -> str:
Expand Down

0 comments on commit 0b115b4

Please sign in to comment.