Skip to content

Commit

Permalink
added test cases for split() and preprocess()
Browse files Browse the repository at this point in the history
  • Loading branch information
diwanashita committed Dec 16, 2024
1 parent f790482 commit 771b2d9
Show file tree
Hide file tree
Showing 3 changed files with 170 additions and 1 deletion.
91 changes: 91 additions & 0 deletions test/test_preprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# tests/test_preprocessing.py
# References: Tiffany's Breast Cancer Predictor and ChatGPT were used as references

import os
import sys
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))

import pytest
import pandas as pd
import tempfile
from src.preprocessing import preprocess

# Tests for preprocess:
# 1. With regular/normal df
# 2. With empty df
# 3. With invalid path
# 4. With invalid input types

# note: split and preprocessing will have similar tests.


def test_preprocess_normal():
"""
Tests the function with valid DataFrames and checks if the output files are created
"""
# Create sample df
X_train = pd.DataFrame({
'num_feature': [1, 2, 3],
'cat_feature': ['A', 'B', 'A']
})
X_test = pd.DataFrame({
'num_feature': [4, 5],
'cat_feature': ['B', 'A']
})
numeric_features = ['num_feature']
categorical_features = ['cat_feature']

# Create a temporary directory
with tempfile.TemporaryDirectory() as temp_dir:
preprocess(X_train, X_test, numeric_features, categorical_features, temp_dir)

# Check if the processed files are created
assert os.path.exists(os.path.join(temp_dir, 'delay_preprocessor.pickle'))
assert os.path.exists(os.path.join(temp_dir, 'train_processed.pickle'))
assert os.path.exists(os.path.join(temp_dir, 'test_processed.pickle'))

def test_preprocess_empty_dataframe():
"""
Tests the function with empty DataFrames to ensure it raises a ValueError
"""
X_train = pd.DataFrame()
X_test = pd.DataFrame()
numeric_features = ['num_feature']
categorical_features = ['cat_feature']

with pytest.raises(ValueError, match="DataFrame must contain observations."):
preprocess(X_train, X_test, numeric_features, categorical_features, 'some_path')

def test_preprocess_invalid_path():
"""
Tests the function with a non-existent directory to ensure it raises a FileNotFoundError
"""
X_train = pd.DataFrame({
'num_feature': [1, 2, 3],
'cat_feature': ['A', 'B', 'A']
})
X_test = pd.DataFrame({
'num_feature': [4, 5],
'cat_feature': ['B', 'A']
})
numeric_features = ['num_feature']
categorical_features = ['cat_feature']

with pytest.raises(FileNotFoundError, match="Directory some_invalid_path does not exist."):
preprocess(X_train, X_test, numeric_features, categorical_features, 'some_invalid_path')

def test_preprocess_invalid_input_type():
"""
Tests the function with an invalid input type (string instead of df)
to ensure it raises a TypeError
"""
X_train = "not_a_dataframe"
X_test = pd.DataFrame({
'num_feature': [4, 5],
'cat_feature': ['B', 'A']
})
numeric_features = ['num_feature']
categorical_features = ['cat_feature']

with pytest.raises(TypeError, match="Input must be a pandas DataFrame"):
preprocess(X_train, X_test, numeric_features, categorical_features, 'some_path')
2 changes: 1 addition & 1 deletion test/test_remove_outliers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# src/tests/test_remove_outliers.py
# tests/test_remove_outliers.py
# References: Tiffany's Breast Cancer Predictor and ChatGPT were used as references


Expand Down
78 changes: 78 additions & 0 deletions test/test_split.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# tests/test_split.py
# References: Tiffany's Breast Cancer Predictor and ChatGPT were used as references

import os
import sys
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))

import pytest
import pandas as pd
from src.split import split


# Tests for split() function:
# 1. Can it split a regular dataframe with string and int values
# 2. Test for empty dataset
# 3. Raise error when dataframe not in an existing directory
# 4. Raise error for invalid data type

# note: split and preprocessing will have similar tests.


def test_split_normal_case(tmpdir):
"""
Tests the function with a valid dataframe to checks if the output files
are created in a temporary directory.
"""
# Create a sample df
data = pd.DataFrame({
'feature1': [1, 2, 3, 4, 5],
'feature2': ['A', 'B', 'C', 'D', 'E'],
'target': [0, 1, 0, 1, 0]
})

# Use a temporary directory for saving files
directory = tmpdir.mkdir("test_dir")

# Call the split function
split(data, 'target', str(directory))

# Check if the files are created
assert os.path.exists(os.path.join(directory, 'X_train.csv'))
assert os.path.exists(os.path.join(directory, 'y_train.csv'))
assert os.path.exists(os.path.join(directory, 'X_test.csv'))
assert os.path.exists(os.path.join(directory, 'y_test.csv'))

def test_split_empty_dataframe():
"""
Tests the function with an empty dataframe to ensure it raises a ValueError.
"""
# Create an empty df
data = pd.DataFrame()

# Check if ValueError is raised
with pytest.raises(ValueError, match="DataFrame must contain observations."):
split(data, 'target', 'some_directory')

def test_split_nonexistent_directory():
"""
Tests the function with a valid DataFrame but a non-existent directory to
ensure it raises a FileNotFoundError.
"""
# Create a sample df
data = pd.DataFrame({
'feature1': [1, 2, 3],
'target': [0, 1, 0]
})

# Check if FileNotFoundError is raised
with pytest.raises(FileNotFoundError, match="Directory nonexistent_directory does not exist."):
split(data, 'target', 'nonexistent_directory')

def test_split_invalid_data_type():
"""
Tests the function with a string instead of a DataFrame to ensure it raises a TypeError.
"""
# Check if TypeError is raised for non-DataFrame input
with pytest.raises(TypeError, match="Input must be a pandas DataFrame"):
split("not_a_dataframe", 'target', 'some_directory')

0 comments on commit 771b2d9

Please sign in to comment.