-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added test cases for split() and preprocess()
- Loading branch information
1 parent
f790482
commit 771b2d9
Showing
3 changed files
with
170 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
# tests/test_preprocessing.py | ||
# References: Tiffany's Breast Cancer Predictor and ChatGPT were used as references | ||
|
||
import os | ||
import sys | ||
sys.path.append(os.path.join(os.path.dirname(__file__), '..')) | ||
|
||
import pytest | ||
import pandas as pd | ||
import tempfile | ||
from src.preprocessing import preprocess | ||
|
||
# Tests for preprocess: | ||
# 1. With regular/normal df | ||
# 2. With empty df | ||
# 3. With invalid path | ||
# 4. With invalid input types | ||
|
||
# note: split and preprocessing will have similar tests. | ||
|
||
|
||
def test_preprocess_normal(): | ||
""" | ||
Tests the function with valid DataFrames and checks if the output files are created | ||
""" | ||
# Create sample df | ||
X_train = pd.DataFrame({ | ||
'num_feature': [1, 2, 3], | ||
'cat_feature': ['A', 'B', 'A'] | ||
}) | ||
X_test = pd.DataFrame({ | ||
'num_feature': [4, 5], | ||
'cat_feature': ['B', 'A'] | ||
}) | ||
numeric_features = ['num_feature'] | ||
categorical_features = ['cat_feature'] | ||
|
||
# Create a temporary directory | ||
with tempfile.TemporaryDirectory() as temp_dir: | ||
preprocess(X_train, X_test, numeric_features, categorical_features, temp_dir) | ||
|
||
# Check if the processed files are created | ||
assert os.path.exists(os.path.join(temp_dir, 'delay_preprocessor.pickle')) | ||
assert os.path.exists(os.path.join(temp_dir, 'train_processed.pickle')) | ||
assert os.path.exists(os.path.join(temp_dir, 'test_processed.pickle')) | ||
|
||
def test_preprocess_empty_dataframe(): | ||
""" | ||
Tests the function with empty DataFrames to ensure it raises a ValueError | ||
""" | ||
X_train = pd.DataFrame() | ||
X_test = pd.DataFrame() | ||
numeric_features = ['num_feature'] | ||
categorical_features = ['cat_feature'] | ||
|
||
with pytest.raises(ValueError, match="DataFrame must contain observations."): | ||
preprocess(X_train, X_test, numeric_features, categorical_features, 'some_path') | ||
|
||
def test_preprocess_invalid_path(): | ||
""" | ||
Tests the function with a non-existent directory to ensure it raises a FileNotFoundError | ||
""" | ||
X_train = pd.DataFrame({ | ||
'num_feature': [1, 2, 3], | ||
'cat_feature': ['A', 'B', 'A'] | ||
}) | ||
X_test = pd.DataFrame({ | ||
'num_feature': [4, 5], | ||
'cat_feature': ['B', 'A'] | ||
}) | ||
numeric_features = ['num_feature'] | ||
categorical_features = ['cat_feature'] | ||
|
||
with pytest.raises(FileNotFoundError, match="Directory some_invalid_path does not exist."): | ||
preprocess(X_train, X_test, numeric_features, categorical_features, 'some_invalid_path') | ||
|
||
def test_preprocess_invalid_input_type(): | ||
""" | ||
Tests the function with an invalid input type (string instead of df) | ||
to ensure it raises a TypeError | ||
""" | ||
X_train = "not_a_dataframe" | ||
X_test = pd.DataFrame({ | ||
'num_feature': [4, 5], | ||
'cat_feature': ['B', 'A'] | ||
}) | ||
numeric_features = ['num_feature'] | ||
categorical_features = ['cat_feature'] | ||
|
||
with pytest.raises(TypeError, match="Input must be a pandas DataFrame"): | ||
preprocess(X_train, X_test, numeric_features, categorical_features, 'some_path') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
# tests/test_split.py | ||
# References: Tiffany's Breast Cancer Predictor and ChatGPT were used as references | ||
|
||
import os | ||
import sys | ||
sys.path.append(os.path.join(os.path.dirname(__file__), '..')) | ||
|
||
import pytest | ||
import pandas as pd | ||
from src.split import split | ||
|
||
|
||
# Tests for split() function: | ||
# 1. Can it split a regular dataframe with string and int values | ||
# 2. Test for empty dataset | ||
# 3. Raise error when dataframe not in an existing directory | ||
# 4. Raise error for invalid data type | ||
|
||
# note: split and preprocessing will have similar tests. | ||
|
||
|
||
def test_split_normal_case(tmpdir): | ||
""" | ||
Tests the function with a valid dataframe to checks if the output files | ||
are created in a temporary directory. | ||
""" | ||
# Create a sample df | ||
data = pd.DataFrame({ | ||
'feature1': [1, 2, 3, 4, 5], | ||
'feature2': ['A', 'B', 'C', 'D', 'E'], | ||
'target': [0, 1, 0, 1, 0] | ||
}) | ||
|
||
# Use a temporary directory for saving files | ||
directory = tmpdir.mkdir("test_dir") | ||
|
||
# Call the split function | ||
split(data, 'target', str(directory)) | ||
|
||
# Check if the files are created | ||
assert os.path.exists(os.path.join(directory, 'X_train.csv')) | ||
assert os.path.exists(os.path.join(directory, 'y_train.csv')) | ||
assert os.path.exists(os.path.join(directory, 'X_test.csv')) | ||
assert os.path.exists(os.path.join(directory, 'y_test.csv')) | ||
|
||
def test_split_empty_dataframe(): | ||
""" | ||
Tests the function with an empty dataframe to ensure it raises a ValueError. | ||
""" | ||
# Create an empty df | ||
data = pd.DataFrame() | ||
|
||
# Check if ValueError is raised | ||
with pytest.raises(ValueError, match="DataFrame must contain observations."): | ||
split(data, 'target', 'some_directory') | ||
|
||
def test_split_nonexistent_directory(): | ||
""" | ||
Tests the function with a valid DataFrame but a non-existent directory to | ||
ensure it raises a FileNotFoundError. | ||
""" | ||
# Create a sample df | ||
data = pd.DataFrame({ | ||
'feature1': [1, 2, 3], | ||
'target': [0, 1, 0] | ||
}) | ||
|
||
# Check if FileNotFoundError is raised | ||
with pytest.raises(FileNotFoundError, match="Directory nonexistent_directory does not exist."): | ||
split(data, 'target', 'nonexistent_directory') | ||
|
||
def test_split_invalid_data_type(): | ||
""" | ||
Tests the function with a string instead of a DataFrame to ensure it raises a TypeError. | ||
""" | ||
# Check if TypeError is raised for non-DataFrame input | ||
with pytest.raises(TypeError, match="Input must be a pandas DataFrame"): | ||
split("not_a_dataframe", 'target', 'some_directory') |