-
Notifications
You must be signed in to change notification settings - Fork 0
/
Makefile
65 lines (51 loc) · 2.54 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# Makefile
# Wine Quality Prediction Project
.PHONY: all data process train plot report clean retrain
# Run the entire pipeline
all: report
# Download the dataset
data: data/raw/wine_data.csv
data/raw/wine_data.csv: src/data_download.py
@echo "Checking and downloading dataset..."
[ -f data/raw/wine_data.csv ] || python src/data_download.py --folder_path="data/raw" --data_id=186
# Process and validate the data
process: data/processed/wine_train.csv data/processed/wine_test.csv
data/processed/wine_train.csv data/processed/wine_test.csv: data/raw/wine_data.csv src/validation.py
@echo "Checking and processing data..."
[ -f data/processed/wine_train.csv ] && [ -f data/processed/wine_test.csv ] || \
python src/validation.py \
--raw="data/raw" \
--processed="data/processed" \
--report_path="report"
# Train the machine learning model
train: data/model/model.pkl data/processed/feature_importance.csv
data/model/model.pkl data/processed/feature_importance.csv: data/processed/wine_train.csv data/processed/wine_test.csv src/data_training.py
@echo "Checking and training model..."
[ -f data/model/model.pkl ] && [ -f data/processed/feature_importance.csv ] || mkdir -p data/model && \
python src/data_training.py \
--model_path="data/model" \
--train_data="data/processed/wine_train.csv" \
--test_data="data/processed/wine_test.csv"
# Generate plots
plot: data/img/feature_importance.png data/img/quality_distribution.png
data/img/feature_importance.png data/img/quality_distribution.png: data/processed/feature_importance.csv data/processed/wine_train.csv src/plots.py
@echo "Checking and generating plots..."
[ -f data/img/feature_importance.png ] && [ -f data/img/quality_distribution.png ] || mkdir -p data/img && \
python src/plots.py \
--img_path="data/img" \
--train_data_path="data/processed/wine_train.csv" \
--test_data_path="data/processed/wine_test.csv"
# Generate the final report
report: report/wine_quality_eda.html
report/wine_quality_eda.html: data/img/feature_importance.png data/img/quality_distribution.png report/wine_quality_eda.qmd
@echo "Checking and generating report..."
[ -f report/wine_quality_eda.html ] || \
quarto render report/wine_quality_eda.qmd --to html
quarto render report/wine_quality_eda.qmd --to pdf
# Clean up all generated files
clean:
@echo "Cleaning up all generated files..."
rm -f data/raw/* data/processed/* data/model/* data/img/* \
report/validation_report.html report/wine_quality_eda.html report/wine_quality_eda.pdf
# Retrain the model and regenerate everything
retrain: clean train plot report