Skip to content

Commit

Permalink
improve swag procedure to be based on f1 score after a certain dimens…
Browse files Browse the repository at this point in the history
…ion rather than the current balanced accuracy. recomputed procedure, saved model, tested
  • Loading branch information
lionelvoirol committed Jun 2, 2024
1 parent cd3fa74 commit 98dceec
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 7 deletions.
Binary file modified model.rds
Binary file not shown.
2 changes: 1 addition & 1 deletion submission.R
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ max_class <- function(vec) {



# for testing
# # for testing
# # check returned df when running this on fake data
# df = read.csv("PreFer_fake_data.csv")
# background_df = read.csv("PreFer_fake_background_data.csv")
Expand Down
21 changes: 15 additions & 6 deletions training.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ model_combination <- function(
# cleaned_df = clean_df(df = data, background_df = data_back)
# outcome_df = read_csv(file = "not_commit/PreFer_train_outcome.csv")
#
#


train_save_model <- function(cleaned_df, outcome_df) {
# Trains a model using the cleaned dataframe and saves the model to a file.
Expand All @@ -120,7 +120,7 @@ train_save_model <- function(cleaned_df, outcome_df) {
# Meta-parameters swag algorithm
control <- list(pmax = 35, # maximum dimension explored
alpha = .15, #normally a small value, corresponds to the kept at each iteration
m = 80L, # max number of models explored per dimension
m = 40L, # max number of models explored per dimension
seed = 123L, #for replicability
verbose = T #keeps track of completed dimensions)
)
Expand Down Expand Up @@ -167,7 +167,7 @@ train_save_model <- function(cleaned_df, outcome_df) {
sample_size[i] = nrow(df_sub)

if(control$verbose){
cat(paste0("Variable ",i,"/", ncol(X),". accuracy of ", round(cv_errors[i] , 4), "\n"))
cat(paste0("Variable ",i,"/", ncol(X),". balanced accuracy of ", round(cv_errors[i] , 4), "\n"))

}
}
Expand All @@ -181,7 +181,7 @@ train_save_model <- function(cleaned_df, outcome_df) {
CVs_precison[[d]] = cv_errors_precison
CVs_recall[[d]] = cv_errors_recall

cv_alpha[d] <- quantile(cv_errors,control$alpha,na.rm=T)
cv_alpha[d] <- quantile(cv_errors, (1-control$alpha), na.rm=T)
IDs[[d]] <- which(cv_errors >= cv_alpha[d])
id_screening <- IDs[[d]]
VarMat[[d]] <- var_mat
Expand Down Expand Up @@ -237,12 +237,21 @@ train_save_model <- function(cleaned_df, outcome_df) {
CVs_precison[[d]] = cv_errors_precison
CVs_recall[[d]] = cv_errors_recall
VarMat[[d]] <- var_mat
cv_alpha[d] <- quantile(cv_errors,control$alpha,na.rm=T)
cv_alpha[d] <- quantile(cv_errors,probs = (1-control$alpha),na.rm=T)
IDs[[d]] <- which(cv_errors >= cv_alpha[d])
# if proportion of f1 score per tested model , switch to f1 score, to do, for now...
measure_considered= " balanced accuracy"
if(d > 10){
# switch for f1 score
measure_considered= " f1 score"
cv_alpha[d] <- quantile(cv_errors_f1_score,probs = (1-control$alpha), na.rm=T) # save in vector of quantile per dimension
IDs[[d]] <- which(cv_errors_f1_score >= cv_alpha[d])
}

Sample_size[[d]] = sample_size


if(control$verbose) print(paste0("Dimension explored: ", d ," - CV errors at alpha: ",round(cv_alpha[d],4)))
if(control$verbose) print(paste0("Dimension explored: ", d ,",", measure_considered," at quantile ",(1-control$alpha),":" ,round(cv_alpha[d],4)))
if(ncol(var_mat)==1) break
}

Expand Down

0 comments on commit 98dceec

Please sign in to comment.