Skip to content

Commit

Permalink
Add Python 3.11 to GHA (#1090)
Browse files Browse the repository at this point in the history
* add downloads tile (#1085)

* Add Python 3.11 to GHA

* Replace snappy with cramjam (#1091)

* add downloads tile (#1085)

* Replace snappy with cramjam

* Delete test_no_snappy

---------

Co-authored-by: Taylor Turner <taylorfturner@gmail.com>

* Update dask modules

* Install dask dataframe

* Update dask modules in precommit

* Correct copy/paste error

* Try again to clear Unicode

* Rolled back pre-commit dask version

* Add py311 to tox

* Bump dask to 2024.4.1

* Bump python-snappy 0.7.1

* Rewrite labeler test

* Correct isort

* Satisfy black

* And flake8

* Synced with requirements

---------

Co-authored-by: Taylor Turner <taylorfturner@gmail.com>
  • Loading branch information
gliptak and taylorfturner authored Jun 12, 2024
1 parent 1af22bb commit 4e4450a
Show file tree
Hide file tree
Showing 9 changed files with 25 additions and 86 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/publish-python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.11'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test-python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.9, "3.10"]
python-version: [3.9, "3.10", "3.11"]

steps:
- uses: actions/checkout@v4
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ repos:
pyarrow>=1.0.1,
chardet>=3.0.4,
fastavro>=1.0.0.post1,
python-snappy>=0.5.4,
python-snappy>=0.7.1,
charset-normalizer>=1.3.6,
psutil>=4.0.0,
scipy>=1.4.1,
Expand Down
16 changes: 0 additions & 16 deletions dataprofiler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,6 @@
from .validators.base_validators import Validator
from .version import __version__

try:
import snappy
except ImportError:
import warnings

warnings.warn(
"Snappy must be installed to use parquet/avro datasets."
"\n\n"
"For macOS use Homebrew:\n"
"\t`brew install snappy`"
"\n\n"
"For linux use apt-get:\n`"
"\tsudo apt-get -y install libsnappy-dev`\n",
ImportWarning,
)


def set_seed(seed=None):
# also check it's an integer
Expand Down
43 changes: 19 additions & 24 deletions dataprofiler/tests/labelers/test_labeler_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import tempfile
import unittest
from unittest import mock

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -235,9 +235,7 @@ def test_verbose(self):
self.assertIn("f1-score ", log_output)
self.assertIn("F1 Score: ", log_output)

@mock.patch("dataprofiler.labelers.labeler_utils.classification_report")
@mock.patch("pandas.DataFrame")
def test_save_conf_mat(self, mock_dataframe, mock_report):
def test_save_conf_mat(self):

# ideally mock out the actual contents written to file, but
# would be difficult to get this completely worked out.
Expand All @@ -248,28 +246,25 @@ def test_save_conf_mat(self, mock_dataframe, mock_report):
[0, 1, 2],
]
)
expected_row_col_names = dict(
columns=["pred:PAD", "pred:UNKNOWN", "pred:OTHER"],
index=["true:PAD", "true:UNKNOWN", "true:OTHER"],
)
mock_instance_df = mock.Mock(spec=pd.DataFrame)()
mock_dataframe.return_value = mock_instance_df

# still omit bc confusion mat should include all despite omit
f1, f1_report = labeler_utils.evaluate_accuracy(
self.y_pred,
self.y_true,
self.num_labels,
self.reverse_label_mapping,
omitted_labels=["PAD"],
verbose=False,
confusion_matrix_file="test.csv",
)
expected_columns = ["pred:PAD", "pred:UNKNOWN", "pred:OTHER"]
expected_index = ["true:PAD", "true:UNKNOWN", "true:OTHER"]

self.assertTrue((mock_dataframe.call_args[0][0] == expected_conf_mat).all())
self.assertDictEqual(expected_row_col_names, mock_dataframe.call_args[1])
with tempfile.NamedTemporaryFile() as tmpFile:
# still omit bc confusion mat should include all despite omit
f1, f1_report = labeler_utils.evaluate_accuracy(
self.y_pred,
self.y_true,
self.num_labels,
self.reverse_label_mapping,
omitted_labels=["PAD"],
verbose=False,
confusion_matrix_file=tmpFile.name,
)

mock_instance_df.to_csv.assert_called()
df1 = pd.read_csv(tmpFile.name, index_col=0)
self.assertListEqual(list(df1.columns), expected_columns)
self.assertListEqual(list(df1.index), expected_index)
np.testing.assert_array_equal(df1.values, expected_conf_mat)


class TestTFFunctions(unittest.TestCase):
Expand Down
40 changes: 0 additions & 40 deletions dataprofiler/tests/test_data_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,46 +56,6 @@ def test_data_profiling(self):
self.assertIsNotNone(profile.profile)
self.assertIsNotNone(profile.report())

def test_no_snappy(self):
import importlib
import sys
import types

orig_import = __import__
# necessary for any wrapper around the library to test if snappy caught
# as an issue

def reload_data_profiler():
"""Recursively reload modules."""
sys_modules = sys.modules.copy()
for module_name, module in sys_modules.items():
# Only reload top level of the dataprofiler
if "dataprofiler" in module_name and len(module_name.split(".")) < 3:
if isinstance(module, types.ModuleType):
importlib.reload(module)

def import_mock(name, *args, **kwargs):
if name == "snappy":
raise ImportError("test")
return orig_import(name, *args, **kwargs)

with mock.patch("builtins.__import__", side_effect=import_mock):
with self.assertWarns(ImportWarning) as w:
import dataprofiler

reload_data_profiler()

self.assertEqual(
str(w.warning),
"Snappy must be installed to use parquet/avro datasets."
"\n\n"
"For macOS use Homebrew:\n"
"\t`brew install snappy`"
"\n\n"
"For linux use apt-get:\n`"
"\tsudo apt-get -y install libsnappy-dev`\n",
)

def test_no_tensorflow(self):
import sys

Expand Down
2 changes: 1 addition & 1 deletion requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
coverage>=5.0.1
dask>=2.29.0,<2024.2.0
dask[dask-expr,dataframe]>=2024.4.1
fsspec>=0.3.3
pytest>=6.0.1
pytest-cov>=2.8.1
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ pytz>=2020.1
pyarrow>=1.0.1
chardet>=3.0.4
fastavro>=1.1.0
python-snappy>=0.5.4
python-snappy>=0.7.1
charset-normalizer>=1.3.6
psutil>=4.0.0
scipy>=1.10.0
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[tox]
envlist = py39, py310, pypi-description, manifest, precom
envlist = py39, py310, py311, pypi-description, manifest, precom


[testenv]
Expand Down

0 comments on commit 4e4450a

Please sign in to comment.