diff --git a/tools/waveica/macros.xml b/tools/waveica/macros.xml
index 551d1321..e5f9bbaf 100644
--- a/tools/waveica/macros.xml
+++ b/tools/waveica/macros.xml
@@ -96,10 +96,20 @@
+
+
+
-
+
+
+
+
+
+
+ keep_two_output
diff --git a/tools/waveica/test-data/test10_output1.tsv b/tools/waveica/test-data/test10_output1.tsv
new file mode 100644
index 00000000..375e65d3
--- /dev/null
+++ b/tools/waveica/test-data/test10_output1.tsv
@@ -0,0 +1,5 @@
+id VT_160120_002 VT_160120_004 VT_160120_006 VT_160120_008 VT_160120_010
+M85T34 355200.506508035 216897.826587868 362337.195084504 143303.377379009 189065.516447239
+M86T41 75115889.9077485 75204863.1495248 76490295.1450204 83771659.9549148 84108898.7658797
+M86T518 6101488.54615418 6170882.26270475 12588041.969092 6181538.46316058 6103964.42378424
+M86T539 2007379.02604984 2069979.64992079 1818589.63912375 1975712.25920485 1935671.32085241
diff --git a/tools/waveica/test-data/test10_output2.tsv b/tools/waveica/test-data/test10_output2.tsv
new file mode 100644
index 00000000..21ccd260
--- /dev/null
+++ b/tools/waveica/test-data/test10_output2.tsv
@@ -0,0 +1,6 @@
+sampleName class sampleType injectionOrder batch
+VT_160120_002 sample sample 1 1
+VT_160120_004 sample sample 2 1
+VT_160120_006 sample sample 3 1
+VT_160120_008 sample sample 4 1
+VT_160120_010 sample sample 5 1
diff --git a/tools/waveica/test-data/test9_output1.parquet b/tools/waveica/test-data/test9_output1.parquet
new file mode 100644
index 00000000..45cff33e
Binary files /dev/null and b/tools/waveica/test-data/test9_output1.parquet differ
diff --git a/tools/waveica/test-data/test9_output2.parquet b/tools/waveica/test-data/test9_output2.parquet
new file mode 100644
index 00000000..7f1bf055
Binary files /dev/null and b/tools/waveica/test-data/test9_output2.parquet differ
diff --git a/tools/waveica/waveica.xml b/tools/waveica/waveica.xml
index a9f09b8f..d8756e76 100644
--- a/tools/waveica/waveica.xml
+++ b/tools/waveica/waveica.xml
@@ -1,4 +1,4 @@
-
+
removal of batch effects for untargeted metabolomics data
macros.xml
@@ -51,14 +51,14 @@
)'
#end if
- -e 'store_data(normalized_data, "$normalized_data", "$input_num.data.ext")'
+ -e 'store_data(normalized_data, "$normalized_data", "$metadata", "$input_num.data.ext", $keep_two_output)'
]]>
-
-
+
+
@@ -89,12 +89,13 @@
+
-
+
@@ -103,9 +104,9 @@
-
+
-
+
@@ -114,9 +115,9 @@
-
+
-
+
@@ -127,7 +128,7 @@
-
+
@@ -138,9 +139,9 @@
-
+
-
+
@@ -151,9 +152,9 @@
-
+
-
+
@@ -166,7 +167,7 @@
-
+
@@ -180,7 +181,7 @@
-
+
@@ -192,7 +193,7 @@
-
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
diff --git a/tools/waveica/waveica_wrapper.R b/tools/waveica/waveica_wrapper.R
index 1332919d..875982ac 100644
--- a/tools/waveica/waveica_wrapper.R
+++ b/tools/waveica/waveica_wrapper.R
@@ -3,10 +3,7 @@ read_file <- function(file, metadata, ft_ext, mt_ext, transpose) {
if (transpose) {
col_names <- c("sampleName", data[[1]])
- t_data <- data[-1]
- t_data <- t(t_data)
- data <- data.frame(rownames(t_data), t_data)
- colnames(data) <- col_names
+ data <- tranpose_data(data, col_names)
}
if (!is.na(metadata)) {
@@ -133,7 +130,6 @@ waveica_singlebatch <- function(file,
return(data)
}
-
sort_by_injection_order <- function(data) {
if ("batch" %in% colnames(data)) {
data <- data[order(data[, "batch"], data[, "injectionOrder"], decreasing = FALSE), ]
@@ -143,7 +139,6 @@ sort_by_injection_order <- function(data) {
return(data)
}
-
verify_input_dataframe <- function(data, required_columns) {
if (anyNA(data)) {
stop("Error: dataframe cannot contain NULL values!
@@ -194,7 +189,6 @@ verify_column_types <- function(data, required_columns) {
return(data)
}
-
# Match group labels with [blank/sample/qc] and enumerate them
enumerate_groups <- function(group) {
group[grepl("blank", tolower(group))] <- 0
@@ -204,7 +198,6 @@ enumerate_groups <- function(group) {
return(group)
}
-
# Create appropriate input for R wavelets function
get_wf <- function(wavelet_filter, wavelet_length) {
wf <- paste(wavelet_filter, wavelet_length, sep = "")
@@ -217,7 +210,6 @@ get_wf <- function(wavelet_filter, wavelet_length) {
return(wf)
}
-
# Exclude blanks from a dataframe
exclude_group <- function(data, group) {
row_idx_to_exclude <- which(group %in% 0)
@@ -230,14 +222,62 @@ exclude_group <- function(data, group) {
}
}
-store_data <- function(data, output, ext) {
+store_data <- function(data, feature_output, metadata_output, ext, split_output = FALSE) {
if (ext == "parquet") {
- arrow::write_parquet(data, output)
+ if (split_output == TRUE) {
+ split_df <- split_output(data)
+ arrow::write_parquet(split_df$metadata, metadata_output)
+ arrow::write_parquet(split_df$feature_table, feature_output)
+ } else {
+ arrow::write_parquet(data, feature_output)
+ }
} else {
- write.table(data,
- file = output, sep = "\t",
- row.names = FALSE, quote = FALSE
- )
+ if (split_output == TRUE) {
+ split_df <- split_output(data)
+ write.table(split_df$metadata,
+ file = metadata_output, sep = "\t",
+ row.names = FALSE, quote = FALSE
+ )
+ write.table(split_df$feature_table,
+ file = feature_output, sep = "\t",
+ row.names = FALSE, quote = FALSE
+ )
+ } else {
+ write.table(data,
+ file = feature_output, sep = "\t",
+ row.names = FALSE, quote = FALSE
+ )
+ }
}
cat("Normalization has been completed.\n")
}
+
+split_output <- function(df) {
+ required_columns_set1 <- c("sampleName", "class", "sampleType", "injectionOrder", "batch")
+ required_columns_set2 <- c("sampleName", "class", "sampleType", "injectionOrder")
+
+ if (all(required_columns_set1 %in% colnames(df))) {
+ metadata_df <- df[, required_columns_set1, drop = FALSE]
+ df <- df[, -c(2:5)]
+ } else if (all(required_columns_set2 %in% colnames(df))) {
+ metadata_df <- df[, required_columns_set2, drop = FALSE]
+ df <- df[, -c(2:4)]
+ } else {
+ stop("Neither set of required columns is present in the dataframe.")
+ }
+
+ # Transpose the feature table
+ col_names <- c("id", as.vector(df[[1]]))
+ feature_table <- tranpose_data(df, col_names)
+
+ return(list(metadata = metadata_df, feature_table = feature_table))
+}
+
+tranpose_data <- function(data, column_names) {
+ t_data <- data[-1]
+ t_data <- t(t_data)
+ tranposed_data <- data.frame(rownames(t_data), t_data)
+ colnames(tranposed_data) <- column_names
+
+ return(tranposed_data)
+}