Skip to content

Commit

Permalink
Add tests for annotator-cli.R
Browse files Browse the repository at this point in the history
  • Loading branch information
logstar committed Jul 22, 2021
1 parent ebcc6ac commit 52ff63e
Show file tree
Hide file tree
Showing 2 changed files with 402 additions and 0 deletions.
398 changes: 398 additions & 0 deletions analyses/long-format-table-utils/annotator/tests/test_annotator_cli.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,398 @@
# The working directory is the directory that contains this test R file, if this
# file is executed by test_dir
#
# testthat package is loaded, if this file is executed by test_dir
context("test_annotator_cli.R")


working_input_tsv_path <- "test_data/test_long_format_table.tsv"

# Add [] after reading to be compatible with readr >= 1.3.1, otherwise the tests
# will fail on readr >= 1.3.1 as found by @NHJohnson at
# <https://github.com/PediatricOpenTargets/OpenPedCan-analysis/pull/56
# #issuecomment-885188592>
#
# readr 1.3.1 returns spec_tbl_df subclass, which becomes tbl_df after any
# subsetting
#
# Ref: https://www.tidyverse.org/blog/2018/12/readr-1-3-1/#tibble-subclass
long_format_tibble <- readr::read_tsv(
working_input_tsv_path,
col_types = readr::cols(.default = readr::col_character()))[]

inspected_annotated_long_format_tibble <- readr::read_tsv(
"test_data/inspected_annotated_test_long_format_table.tsv",
col_types = readr::cols(.default = readr::col_character()),
na = c("NA"), quoted_na = FALSE, trim_ws = FALSE)[]

# to save intermediate files
scratch_dir <- "test_scratch"
annotator_cli_path <- file.path("..", "annotator-cli.R")
annotator_cli_output_path <- file.path(
scratch_dir, "annotated_test_long_format_table.tsv")

# Test cases
#
# Helper function to run annotator CLI
run_cli_get_tibble <- function(columns_to_add,
input_table_path,
output_table_path) {
run_command <- function(cmd_str) {
# ignore.stderr = TRUE avoids printing when testing
#
# intern = TRUE captures output as return value
#
# If a command fails, only a warning is generated, so expect warning when
# testing
#
# Future updates could parse warning messages to determine whether it is
# failed by the CLI or other issues
out <- system(cmd_str, ignore.stderr = TRUE, intern = TRUE)
return(out)
}
if (is.null(columns_to_add)) {
# run without -c
run_command(paste0(
"Rscript --vanilla ../annotator-cli.R -r ",
" -i ", input_table_path,
" -o ", output_table_path))
} else {
columns_to_add_opt_val <- paste0(
"'", paste(columns_to_add, collapse = ","), "'")
run_command(paste0(
"Rscript --vanilla ../annotator-cli.R ",
"-r -c ", columns_to_add_opt_val,
" -i ", input_table_path,
" -o ", output_table_path))
}

# the file may not be created due to CLI call failure
if (file.exists(output_table_path)) {
ann_tibble <- readr::read_tsv(
output_table_path,
col_types = readr::cols(.default = readr::col_character()),
na = c("NA"), quoted_na = FALSE, trim_ws = FALSE)[]

# clean up, so other tests will not be affected
file.remove(output_table_path)
return(ann_tibble)
} else {
return(NULL)
}
}

# Add package prefix for auto completion purpose only
#
# Test standard usecase
testthat::expect_equal(
run_cli_get_tibble(
columns_to_add = NULL,
input_table_path = working_input_tsv_path,
output_table_path = annotator_cli_output_path),
inspected_annotated_long_format_tibble)

# Test annotation order
testthat::expect_equal(
run_cli_get_tibble(
columns_to_add = c("RMTL", "Gene_type", "OncoKB_cancer_gene",
"OncoKB_oncogene_TSG", "Gene_full_name",
"Protein_RefSeq_ID", "EFO", "MONDO"),
input_table_path = working_input_tsv_path,
output_table_path = annotator_cli_output_path),
inspected_annotated_long_format_tibble[,
c("Gene_symbol", "Gene_Ensembl_ID", "Disease", "cohort", "tpm_mean", "RMTL",
"Gene_type", "OncoKB_cancer_gene", "OncoKB_oncogene_TSG",
"Gene_full_name", "Protein_RefSeq_ID", "EFO", "MONDO")])

testthat::expect_equal(
run_cli_get_tibble(
columns_to_add = c("MONDO", "RMTL", "EFO"),
input_table_path = working_input_tsv_path,
output_table_path = annotator_cli_output_path),
inspected_annotated_long_format_tibble[,
c("Gene_symbol", "Gene_Ensembl_ID", "Disease", "cohort", "tpm_mean",
"MONDO", "RMTL", "EFO")])

testthat::expect_equal(
run_cli_get_tibble(
columns_to_add = c("RMTL", "EFO", "MONDO"),
input_table_path = working_input_tsv_path,
output_table_path = annotator_cli_output_path),
inspected_annotated_long_format_tibble[,
c("Gene_symbol", "Gene_Ensembl_ID", "Disease", "cohort", "tpm_mean",
"RMTL", "EFO", "MONDO")])

testthat::expect_equal(
run_cli_get_tibble(
columns_to_add = c("RMTL", "Protein_RefSeq_ID", "Gene_full_name"),
input_table_path = working_input_tsv_path,
output_table_path = annotator_cli_output_path),
inspected_annotated_long_format_tibble[,
c("Gene_symbol", "Gene_Ensembl_ID", "Disease", "cohort", "tpm_mean",
"RMTL", "Protein_RefSeq_ID", "Gene_full_name")])

testthat::expect_equal(
run_cli_get_tibble(
columns_to_add = c("OncoKB_oncogene_TSG", "OncoKB_cancer_gene", "MONDO"),
input_table_path = working_input_tsv_path,
output_table_path = annotator_cli_output_path),
inspected_annotated_long_format_tibble[,
c("Gene_symbol", "Gene_Ensembl_ID", "Disease", "cohort", "tpm_mean",
"OncoKB_oncogene_TSG", "OncoKB_cancer_gene", "MONDO")])

# Return same table if no annotation to add
testthat::expect_equal(
run_cli_get_tibble(
columns_to_add = "",
input_table_path = working_input_tsv_path,
output_table_path = annotator_cli_output_path),
long_format_tibble)

# Error on duplicated annotation columns
testthat::expect_warning(
run_cli_get_tibble(
columns_to_add = c("OncoKB_oncogene_TSG", "OncoKB_oncogene_TSG", "MONDO"),
input_table_path = working_input_tsv_path,
output_table_path = annotator_cli_output_path))

testthat::expect_warning(
run_cli_get_tibble(
columns_to_add = c("OncoKB_oncogene_TSG", "MONDO", "MONDO"),
input_table_path = working_input_tsv_path,
output_table_path = annotator_cli_output_path))

# Error on non-available annotation columns
testthat::expect_warning(
run_cli_get_tibble(
columns_to_add = c("NOT_AVAILABLE", "MONDO", "MONDO"),
input_table_path = working_input_tsv_path,
output_table_path = annotator_cli_output_path))

testthat::expect_warning(
run_cli_get_tibble(
columns_to_add = c("NOT_AVAILABLE"),
input_table_path = working_input_tsv_path,
output_table_path = annotator_cli_output_path))

# Error on missing required columns
req_col_missing_tbl_path <- file.path(
scratch_dir, "test_missing_req_col_long_format_table.tsv")

readr::write_tsv(
dplyr::select(long_format_tibble, -Gene_symbol),
req_col_missing_tbl_path)

testthat::expect_warning(
run_cli_get_tibble(
columns_to_add = NULL,
input_table_path = req_col_missing_tbl_path,
output_table_path = annotator_cli_output_path))


readr::write_tsv(
dplyr::select(long_format_tibble, -Gene_Ensembl_ID),
req_col_missing_tbl_path)

testthat::expect_warning(
run_cli_get_tibble(
columns_to_add = NULL,
input_table_path = req_col_missing_tbl_path,
output_table_path = annotator_cli_output_path))


readr::write_tsv(
dplyr::select(long_format_tibble, -Disease),
req_col_missing_tbl_path)

testthat::expect_warning(
run_cli_get_tibble(
columns_to_add = NULL,
input_table_path = req_col_missing_tbl_path,
output_table_path = annotator_cli_output_path))


readr::write_tsv(
dplyr::select(long_format_tibble, -Gene_symbol, -Gene_Ensembl_ID),
req_col_missing_tbl_path)

testthat::expect_warning(
run_cli_get_tibble(
columns_to_add = NULL,
input_table_path = req_col_missing_tbl_path,
output_table_path = annotator_cli_output_path))


readr::write_tsv(
dplyr::select(long_format_tibble, -Gene_symbol, -Gene_Ensembl_ID, -Disease),
req_col_missing_tbl_path)

testthat::expect_warning(
run_cli_get_tibble(
columns_to_add = NULL,
input_table_path = req_col_missing_tbl_path,
output_table_path = annotator_cli_output_path))


readr::write_tsv(
dplyr::select(long_format_tibble, -Gene_Ensembl_ID, -Disease),
req_col_missing_tbl_path)

testthat::expect_warning(
run_cli_get_tibble(
columns_to_add = NULL,
input_table_path = req_col_missing_tbl_path,
output_table_path = annotator_cli_output_path))


# Error on requiring existing annotation columns
ann_col_exist_tbl_path <- file.path(
scratch_dir, "test_ann_col_exist_long_format_table.tsv")

readr::write_tsv(
dplyr::select(inspected_annotated_long_format_tibble, -EFO),
ann_col_exist_tbl_path)

testthat::expect_warning(
run_cli_get_tibble(
columns_to_add = NULL,
input_table_path = ann_col_exist_tbl_path,
output_table_path = annotator_cli_output_path))

readr::write_tsv(
inspected_annotated_long_format_tibble,
ann_col_exist_tbl_path)

testthat::expect_warning(
run_cli_get_tibble(
columns_to_add = NULL,
input_table_path = ann_col_exist_tbl_path,
output_table_path = annotator_cli_output_path))


readr::write_tsv(
dplyr::select(inspected_annotated_long_format_tibble, -EFO),
ann_col_exist_tbl_path)

testthat::expect_warning(
run_cli_get_tibble(
columns_to_add = c("EFO", "MONDO"),
input_table_path = ann_col_exist_tbl_path,
output_table_path = annotator_cli_output_path))


# Error on duplicated annotation columns
testthat::expect_warning(
run_cli_get_tibble(
columns_to_add = c("EFO", "EFO"),
input_table_path = working_input_tsv_path,
output_table_path = annotator_cli_output_path))

# Error on non character annotation columns
testthat::expect_warning(
run_cli_get_tibble(
columns_to_add = c(1),
input_table_path = working_input_tsv_path,
output_table_path = annotator_cli_output_path))

testthat::expect_warning(
run_cli_get_tibble(
columns_to_add = c(TRUE),
input_table_path = working_input_tsv_path,
output_table_path = annotator_cli_output_path))


# No error on requiring non-existing annotation columns
#
# Relocate to last, so the order is expected. Adapted from
# https://stackoverflow.com/a/43902237/4638182. The dplyr::relocate is not
# available in the Docker image
#
# The behavior of testthat::expect_equal changed at some point. The Docker
# image/container version does not check column order, whereas the latest
# version checks.
req_non_existing_ann_tbl_path <- file.path(
scratch_dir, "test_req_non_existing_ann_long_format_table.tsv")

readr::write_tsv(
dplyr::select(
inspected_annotated_long_format_tibble, -EFO, -OncoKB_cancer_gene),
req_non_existing_ann_tbl_path)

testthat::expect_equal(
run_cli_get_tibble(
columns_to_add = c("EFO", "OncoKB_cancer_gene"),
input_table_path = req_non_existing_ann_tbl_path,
output_table_path = annotator_cli_output_path),
dplyr::select(
inspected_annotated_long_format_tibble,
-EFO, -OncoKB_cancer_gene,
EFO, OncoKB_cancer_gene))


readr::write_tsv(
dplyr::select(
inspected_annotated_long_format_tibble,
-EFO, -OncoKB_cancer_gene, -OncoKB_oncogene_TSG),
req_non_existing_ann_tbl_path)

testthat::expect_equal(
run_cli_get_tibble(
columns_to_add = c("EFO", "OncoKB_cancer_gene"),
input_table_path = req_non_existing_ann_tbl_path,
output_table_path = annotator_cli_output_path),
dplyr::select(
inspected_annotated_long_format_tibble,
-OncoKB_oncogene_TSG, -EFO, -OncoKB_cancer_gene,
EFO, OncoKB_cancer_gene))


readr::write_tsv(
dplyr::select(
inspected_annotated_long_format_tibble,
-MONDO, -OncoKB_cancer_gene, -Protein_RefSeq_ID, -RMTL),
req_non_existing_ann_tbl_path)

testthat::expect_equal(
run_cli_get_tibble(
columns_to_add = c("MONDO", "OncoKB_cancer_gene", "Protein_RefSeq_ID"),
input_table_path = req_non_existing_ann_tbl_path,
output_table_path = annotator_cli_output_path),
dplyr::select(
inspected_annotated_long_format_tibble,
-RMTL, -MONDO, -OncoKB_cancer_gene, -Protein_RefSeq_ID,
MONDO, OncoKB_cancer_gene, Protein_RefSeq_ID))


readr::write_tsv(
dplyr::select(
inspected_annotated_long_format_tibble,
-EFO, -OncoKB_oncogene_TSG, -Gene_full_name, -RMTL),
req_non_existing_ann_tbl_path)

testthat::expect_equal(
run_cli_get_tibble(
columns_to_add = c("EFO", "OncoKB_oncogene_TSG", "Gene_full_name"),
input_table_path = req_non_existing_ann_tbl_path,
output_table_path = annotator_cli_output_path),
dplyr::select(
inspected_annotated_long_format_tibble,
-RMTL, -EFO, -OncoKB_oncogene_TSG, -Gene_full_name,
EFO, OncoKB_oncogene_TSG, Gene_full_name))


readr::write_tsv(
dplyr::select(
inspected_annotated_long_format_tibble,
-EFO, -OncoKB_oncogene_TSG, -Gene_full_name, -RMTL, -Gene_type),
req_non_existing_ann_tbl_path)

testthat::expect_equal(
run_cli_get_tibble(
columns_to_add = c("EFO", "OncoKB_oncogene_TSG", "Gene_full_name",
"Gene_type"),
input_table_path = req_non_existing_ann_tbl_path,
output_table_path = annotator_cli_output_path),
dplyr::select(
inspected_annotated_long_format_tibble,
-RMTL, -EFO, -OncoKB_oncogene_TSG, -Gene_full_name, -Gene_type,
EFO, OncoKB_oncogene_TSG, Gene_full_name, Gene_type))
Loading

0 comments on commit 52ff63e

Please sign in to comment.