From 093f8f8cea1a048e737f7beee1688d2fef2ce63d Mon Sep 17 00:00:00 2001 From: sanyhe Date: Sun, 2 Jun 2024 15:20:39 +0800 Subject: [PATCH] build: v0.6.0 --- README.md | 4 +--- docs/source/Home/CHANGELOG.md | 2 +- geochemistrypi/_version.py | 2 +- geochemistrypi/data_mining/cli_pipeline.py | 11 +++++------ geochemistrypi/data_mining/constants.py | 1 + geochemistrypi/data_mining/enum.py | 15 +++++++++++++++ .../model/func/algo_decomposition/_common.py | 9 ++------- pyproject.toml | 17 +++-------------- 8 files changed, 29 insertions(+), 32 deletions(-) create mode 100644 geochemistrypi/data_mining/enum.py diff --git a/README.md b/README.md index 5916e887..9ac5b200 100644 --- a/README.md +++ b/README.md @@ -224,8 +224,6 @@ Copy the URL shown on the console into any browser to open the MLflow web interf For more details: Please refer to: -- Manual v1.1.0 for Geochemistry π - Beta [[Tencent Docs]](https://docs.qq.com/pdf/DQ0l5d2xVd2VwcnVW?&u=6868f96d4a384b309036e04e637e367a) | [[Google drive]](https://drive.google.com/file/d/1yryykCyWKM-Sj88fOYbOba6QkB_fu2ws/view?usp=sharing) - - Geochemistry π - Download and Run the Beta Version [[Bilibili]](https://www.bilibili.com/video/BV1UM4y1Q7Ju/?spm_id_from=333.999.0.0&vd_source=27944ab3b73a78970c1a52a5dcbb9140) | [[YouTube]](https://www.youtube.com/watch?v=EeVaJ3H7_AU&list=PLy8hNsI55lvh1UHjhVhqNUj3xPdV9sEiM&index=9) - MLflow UI user guide - Geochemistry π v0.5.0 [[Bilibili]](https://b23.tv/CW5Rjmo) | [[YouTube]](https://www.youtube.com/watch?v=Yu1nzNeLfRY) @@ -313,7 +311,6 @@ The whole package is under construction and the documentation is progressively e + Yang Lyu (Daisy, Zhejiang University, China) + Bailun Jiang (EPSI / Lille University, France) + Ruitao Chang (China University of Geosciences Beijing, China) -+ Junchi Liao(Roceda, University of Electronic Science and Technology of China, China) + Panyan Weng (The University of Sydney, Australia) + Siqi Yao (Clara, Dongguan University of Technology, China) + Zhelan Lin(Lan, Fuzhou University, China) @@ -397,3 +394,4 @@ More Videos will be recorded soon. + Aixiwake·Janganuer (Ayshuak, Sun Yat-sen University, China) + Zhenglin Xu (Garry, Jilin University, China) + Jianing Wang (National University of Singapore, Singapore) ++ Junchi Liao(Roceda, University of Electronic Science and Technology of China, China) diff --git a/docs/source/Home/CHANGELOG.md b/docs/source/Home/CHANGELOG.md index a598659e..5b1bac60 100644 --- a/docs/source/Home/CHANGELOG.md +++ b/docs/source/Home/CHANGELOG.md @@ -10,7 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), + MLOps core of continuous training in web interface -## [0.5.0] - 2023-01-14 +## [0.5.0] - 2024-01-14 ### Added diff --git a/geochemistrypi/_version.py b/geochemistrypi/_version.py index 3d187266..906d362f 100644 --- a/geochemistrypi/_version.py +++ b/geochemistrypi/_version.py @@ -1 +1 @@ -__version__ = "0.5.0" +__version__ = "0.6.0" diff --git a/geochemistrypi/data_mining/cli_pipeline.py b/geochemistrypi/data_mining/cli_pipeline.py index e300f3cd..3dd117b6 100644 --- a/geochemistrypi/data_mining/cli_pipeline.py +++ b/geochemistrypi/data_mining/cli_pipeline.py @@ -22,6 +22,7 @@ MISSING_VALUE_STRATEGY, MLFLOW_ARTIFACT_DATA_PATH, MODE_OPTION, + MODE_OPTION_WITH_MISSING_VALUES, NON_AUTOML_MODELS, OPTION, OUTPUT_PATH, @@ -276,6 +277,7 @@ def cli_pipeline(training_data_path: str, application_data_path: Optional[str] = clear_output() # Ask the user whether to use imputation techniques to deal with the missing values. print("-*-*- Missing Values Process -*-*-") + print("[bold red]Caution: Only some algorithms can process the data with missing value, such as XGBoost for regression and classification![/bold red]") print("Do you want to deal with the missing values?") num2option(OPTION) is_process_missing_value = limit_num_input(OPTION, SECTION[1], num_input) @@ -401,12 +403,9 @@ def cli_pipeline(training_data_path: str, application_data_path: Optional[str] = # If the selected data set is with missing values and is not been imputed, then only allow the user to choose regression, classification and clustering models. # Otherwise, allow the user to choose decomposition models. if missing_value_flag and not process_missing_value_flag: - # Delete the decomposition mode because it doesn't support missing values. - MODE_OPTION.remove("Dimensional Reduction") - # Delete the abnormal detection mode because it doesn't support missing values. - MODE_OPTION.remove("Abnormal Detection") - num2option(MODE_OPTION) - mode_num = limit_num_input(MODE_OPTION, SECTION[2], num_input) + # The abnormal detection mode and decomposition mode don't support missing values. + num2option(MODE_OPTION_WITH_MISSING_VALUES) + mode_num = limit_num_input(MODE_OPTION_WITH_MISSING_VALUES, SECTION[2], num_input) else: num2option(MODE_OPTION) mode_num = limit_num_input(MODE_OPTION, SECTION[2], num_input) diff --git a/geochemistrypi/data_mining/constants.py b/geochemistrypi/data_mining/constants.py index 027ed660..b3d8ccb7 100644 --- a/geochemistrypi/data_mining/constants.py +++ b/geochemistrypi/data_mining/constants.py @@ -28,6 +28,7 @@ DATA_OPTION = ["Own Data", "Testing Data (Built-in)"] TEST_DATA_OPTION = ["Data For Regression", "Data For Classification", "Data For Clustering", "Data For Dimensional Reduction", "Data For Abnormal Detection"] MODE_OPTION = ["Regression", "Classification", "Clustering", "Dimensional Reduction", "Abnormal Detection"] +MODE_OPTION_WITH_MISSING_VALUES = ["Regression", "Classification", "Clustering"] # The model provided to use REGRESSION_MODELS = [ diff --git a/geochemistrypi/data_mining/enum.py b/geochemistrypi/data_mining/enum.py new file mode 100644 index 00000000..b14a5cbb --- /dev/null +++ b/geochemistrypi/data_mining/enum.py @@ -0,0 +1,15 @@ +from enum import Enum + + +class ModeOption(Enum): + REGRESSION = "Regression" + CLASSIFICATION = "Classification" + CLUSTERING = "Clustering" + DIMENSIONAL_REDUCTION = "Dimensional Reduction" + ABNORMAL_DETECTION = "Abnormal Detection" + + +class ModeOptionWithMissingValues(Enum): + REGRESSION = "Regression" + CLASSIFICATION = "Classification" + CLUSTERING = "Clustering" diff --git a/geochemistrypi/data_mining/model/func/algo_decomposition/_common.py b/geochemistrypi/data_mining/model/func/algo_decomposition/_common.py index f17dc11c..55ddd0f3 100644 --- a/geochemistrypi/data_mining/model/func/algo_decomposition/_common.py +++ b/geochemistrypi/data_mining/model/func/algo_decomposition/_common.py @@ -101,14 +101,9 @@ def plot_contour(data: pd.DataFrame, algorithm_name: str) -> None: algorithm_name : str The name of the dimensionality reduction algorithm. """ - quantile_threshold = 0.9 - x_upper_threshold = data.iloc[:, 0].quantile(quantile_threshold) - y_upper_threshold = data.iloc[:, 1].quantile(quantile_threshold) - filtered_data = data[(data.iloc[:, 0] <= x_upper_threshold) & (data.iloc[:, 1] <= y_upper_threshold)] - # Calculate the density - x = filtered_data.iloc[:, 0] - y = filtered_data.iloc[:, 1] + x = data.iloc[:, 0] + y = data.iloc[:, 1] buffer = max(x.max() - x.min(), y.max() - y.min()) * 0.05 xmin, xmax = x.min() - buffer, x.max() + buffer ymin, ymax = y.min() - buffer, y.max() + buffer diff --git a/pyproject.toml b/pyproject.toml index 5a7526db..c896ad0f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "geochemistrypi" -version = "0.5.0" +version = "0.6.0" authors = [ { name="Can He", email="sanyhew1097618435@163.com" }, ] @@ -52,25 +52,14 @@ dependencies = [ "imblearn", ] -[tool.black] -line-length = "200" - -[tool.isort] -line_length = "200" -profile = "black" - -[tool.flake8] -max-line-length = "200" -max-complexity = "20" -ignore = "F811,W605" - [project.optional-dependencies] test = [ "pytest" ] [project.urls] -"Homepage" = "https://github.com/ZJUEarthData/geochemistrypi" +"Homepage" = "https://geochemistrypi.deep-time.org" +"GitHub" = "https://github.com/ZJUEarthData/geochemistrypi" "Bug Tracker" = "https://github.com/ZJUEarthData/geochemistrypi/issues" "Online Documentation" = "https://geochemistrypi.readthedocs.io/en/latest/"