diff --git a/.circleci/config.yml b/.circleci/config.yml
index bee4586a58..343c04f91a 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -28,14 +28,14 @@ jobs:
       - run:
           name: fusion filtering
           command: ./scripts/run_in_ci.sh Rscript analyses/fusion_filtering/01-fusion-standardization.R -f "data/pbta-fusion-starfusion.tsv.gz" -c "STARfusion" -o "scratch/star.RDS" 
-          
+     
       - run:
           name: Transcriptome dimensionality reduction
           command: ./scripts/run_in_ci.sh ./analyses/transcriptomic-dimension-reduction/ci-dimension-reduction-plots.sh 
-        
+
       - run:
-          name: SNV Caller Set Up    
-          command: ./scripts/run_in_ci.sh Rscript analyses/snv-callers/scripts/00-set_up.R      
+          name: SNV Caller Analysis 
+          command: ./scripts/run_in_ci.sh bash analyses/snv-callers/run_caller_evals.sh    
 
         #### Add your analysis here ####
         
diff --git a/analyses/snv-callers/README.md b/analyses/snv-callers/README.md
index dcb23229d6..fdd42f9674 100644
--- a/analyses/snv-callers/README.md
+++ b/analyses/snv-callers/README.md
@@ -4,6 +4,8 @@ This analysis evaluates [MAF files](https://docs.gdc.cancer.gov/Data/File_Format
 The GDC has [good documentation on the fields](https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/) contained in a standard MAF file.
 
 **Table of Contents**
+* [How to run this pipeline](#how-to-run-this-pipeline)
+* [General usage of scripts](#general-usage-of-scripts)
 * [Individual caller evaluation](#individual-caller-evaluation)  
   * [Base change analysis](#base-change-analysis)  
   * [Variant allele fraction calculation](#variant-allele-fraction-calculation)  
@@ -14,7 +16,95 @@ The GDC has [good documentation on the fields](https://docs.gdc.cancer.gov/Data/
  * [Mutation IDs](#mutation-ids)
 * [Overall file structure](#overall-file-structure)
 * [Summary of functions](#summary-of-custom-functions)
-## Individual Caller Evaluation
+
+## How to run this pipeline
+
+**Run evaluations of each MAF file**
+
+To run the initial evaluations of all the SNV callers, call the bash script:
+```
+bash run_caller_evals.sh
+```
+This script will return results for each caller in the `plots` and `results` folder.
+To see an overall summary report, look in the `results` folder for that caller.
+(See [Overall File Structure](#overall-file-structure) for more details on
+everything that is returned.)
+
+**Run comparison analysis of the callers**
+
+*Coming soon*
+
+## General usage of scripts
+
+**Overall notes about these scripts:**
+- The scripts are sequential as noted by their number.
+- All file path-related options assume the file path given is relative to `OpenPBTA-analysis`.
+- By default, the scripts will not overwrite existing files of the same name. However,
+this can be overridden with `--overwrite` option.
+
+### 00-set_up.R
+
+00-set_up.R creates the [annotation RDS file](#genomic-regional-analyses) and [COSMIC mutations file](#cosmic-mutation-overlap) that are used by
+the subsequent scripts.
+This set up script only needs to be run once and its three options are all relating to where the reference files should be stored.
+
+**Option descriptions**
+```
+ --annot_rds : File path to where you would like the annotation_rds file to be
+               stored
+ --cosmic_og : Path to original COSMIC file. Can be .gz compressed. Will need to
+               download this from COSMIC at https://cancer.sanger.ac.uk/cosmic/download
+               These data are available if you register.
+ --cosmic_clean : File path specifying where you would like the cleaned brain-related
+                  COSMIC mutations file to be stored. This file is provided to you in
+                  GitHub. Only coordinates are needed.
+```
+
+### 01-calculate_vaf_tmb.R
+
+This script sets up the given MAF file and outputs three files ([VAF](#variant-allele-fraction-calculation),
+[TMB](#tumor-mutation-burden-calculation), and [Regional analysis](#genomic-regional-analyses))
+that are used to make an overall evaluation report in `02-run_eval.R`.
+
+**Option descriptions**
+```
+ -label : Label to be used for folder and all output. eg. 'strelka2'. Default is 'maf'.
+ -output : File path that specifies the folder where the output should go.
+           New folder will be created if it doesn't exist.
+ --maf :  Relative file path to MAF file to be analyzed. Can be .gz compressed.
+ --metadata : Relative file path to original metadata file.
+ --annot_rds : Relative file path to annotation object RDS file to be analyzed.
+ --bed_wgs : File path that specifies the caller-specific BED regions file.
+ --bed_wxs : File path that specifies the WXS BED regions file.
+ --overwrite : If specified, will overwrite any files of the same name. Default is FALSE.
+```
+
+### 02-run_eval.R
+
+This script takes the files output by `01-calculate_vaf_tmb.R` and makes five
+plots ([base_change](#base-change-analysis), [depth_vs_vaf](#variant-allele-fraction-calculation), [cosmic_plot](#cosmic-mutation-overlap), [snv_region](#genomic-regional-analyses), and [tmb_plot](#tumor-mutation-burden-calculation)) that are put into an overall report.
+
+**Option descriptions**
+```
+# --label : Label to be used for folder and all output. eg. 'strelka2'. Optional.
+#           Default is 'maf'
+# --plot_type : Specify what kind of plots you want printed out. Must be
+#               compatible with ggsave. eg pdf. Default is png
+# --vaf : Folder from 01-calculate_vaf_tmb.R following files:
+#                                             <caller_name>_vaf.tsv
+#                                             <caller_name>_region.tsv
+#                                             <caller_name>_tmb.tsv
+# --output : Where you would like the output from this script to be stored.
+# --strategy : Specify whether you would like WXS and WGS separated for the plots.
+#              Analysis is still done on all data in the MAF file regardless.
+#              Acceptable options are 'wgs', 'wxs' or 'both', both for if you
+#              don't want to separate them. Default is both.
+# --cosmic : Relative file path to COSMIC file to be analyzed.
+# --overwrite : If TRUE, will overwrite any reports of the same name. Default is
+#              FALSE
+```
+
+# Individual Caller Evaluation
 
 The first step in this analysis is an individual evaluation of each MAF from each caller.
 This analysis prints out a report that is further subdivided by the WGS and WXS samples.
@@ -45,7 +135,7 @@ This is following the [code used in
 `maftools`](https://github.com/PoisonAlien/maftools/blob/1d0270e35c2e0f49309eba08b62343ac0db10560/R/plot_vaf.R#L39).
 The VAF calculations and other special variables are added to the MAF fields and written to a TSV ending in `_vaf.tsv` in the caller's results folder.
 
- *Output for this analysiss*
+ *Output for this analysis*
  * `results/<caller_name>/<caller_name>_vaf.tsv`
  * `plots/<caller_name>/<caller_name>_<strategy>_depth_vs_vaf.png`
 
@@ -87,7 +177,7 @@ The sample-wise TMB calculations written to a TSV ending in `_tmb.tsv` in the ca
 The COSMIC mutation data were obtained from https://cancer.sanger.ac.uk/cosmic/download
 *To run this analysis, you need to obtain these data.*
 The full, unfiltered somatic mutations file `CosmicMutantExport.tsv` for grch38 is used here and the genomic coordinates is arranged to be in BED format.
-The COSMIC set is unfiltered down to only mutations detected in brain-related
+The COSMIC set is filtered down to only mutations detected in brain-related
 samples using the `Site subtype 1` field.
 COSMIC mutations are overlapped with the present data's mutations using `GenomicRanges`.
 The outcome of this overlap is added to the VAF data.frame with two `TRUE/FALSE` columns:
diff --git a/analyses/snv-callers/run_caller_evals.sh b/analyses/snv-callers/run_caller_evals.sh
new file mode 100644
index 0000000000..89f0f9cc46
--- /dev/null
+++ b/analyses/snv-callers/run_caller_evals.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# C. Savonen
+# CCDL for ALSF 2019
+
+# Purpose:Run an intial evaluation of each variant caller's MAF file
+
+# Change directory
+cd kitematic
+
+# The files named in these arrays will be ran in the analysis. 
+datasets=("strelka2" "mutect2" "lancet")
+wgs_files=("WGS.hg38.strelka2.unpadded.bed" "WGS.hg38.mutect2.unpadded.bed" "WGS.hg38.lancet.300bp_padded.bed")
+
+# Reference file paths
+cosmic=analyses/snv-callers/brain_cosmic_variants_coordinates.tsv
+annot_rds=scratch/hg38_genomic_region_annotation.rds
+
+############################ Set Up Reference Files ############################
+# The original COSMIC file is obtained from: https://cancer.sanger.ac.uk/cosmic/download
+# These data are available if you register. The full, unfiltered somatic mutations 
+# file CosmicMutantExport.tsv.gz for grch38 is used here.
+Rscript analyses/snv-callers/scripts/00-set_up.R \
+  --annot_rds $annot_rds \
+  --cosmic_og scratch/CosmicMutantExport.tsv.gz \
+  --cosmic_clean $cosmic
+  
+########################## Calculate and Set Up Data ##########################
+# Create files that contain calculated VAF, TMB, and regional analyses.
+for ((i=0;i<${#datasets[@]};i++)); 
+do
+  echo "Processing dataset: ${datasets[$i]}"
+  Rscript analyses/snv-callers/scripts/01-calculate_vaf_tmb.R \
+    --label ${datasets[$i]} \
+    --output analyses/snv-callers/results/${datasets[$i]} \
+    --maf data/pbta-snv-${datasets[$i]}.vep.maf.gz \
+    --metadata data/pbta-histologies.tsv \
+    --bed_wgs data/${wgs_files[$i]} \
+    --bed_wxs data/WXS.hg38.100bp_padded.bed \
+    --annot_rds $annot_rds \
+    --overwrite
+done
diff --git a/analyses/snv-callers/scripts/00-set_up.R b/analyses/snv-callers/scripts/00-set_up.R
index 6cf1dce3b2..9c57ca541e 100644
--- a/analyses/snv-callers/scripts/00-set_up.R
+++ b/analyses/snv-callers/scripts/00-set_up.R
@@ -1,22 +1,30 @@
-# SNV Caller set up and functions
-#
+# SNV-caller set up
 # 2019
 # C. Savonen for ALSF - CCDL
 #
-# Purpose: Set up for analyzing MAF files
+# Purpose: Set up for reference files that are used by 01-calculate_vaf_tmb.R
+#          script.
+#
+# Option descriptions
+# --annot_rds : File path to where you would like the annotation_rds file to be
+#               stored
+# --cosmic_og : Path to original COSMIC file. Can be .gz compressed. Give path relative
+#               to top directory, 'OpenPBTA-analysis'. Will need to download this from
+#               COSMIC at https://cancer.sanger.ac.uk/cosmic/download.
+#               These data are available if you register.
+# --cosmic_clean : File path specifying where you would like the cleaned brain-related
+#                  COSMIC mutations file to be stored.
 #
+#################################### Set Up ####################################
 # Establish base dir
 root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
 
-# Import special functions
-source(file.path(root_dir, "analyses", "snv-callers", "util", "wrangle_functions.R"))
-source(file.path(root_dir, "analyses", "snv-callers", "util", "plot_functions.R"))
-
-#################################### Set Up ####################################
-
 # Magrittr pipe
 `%>%` <- dplyr::`%>%`
 
+# Load the optparse library
+library(optparse)
+
 # Will need optparse for collecting options
 if (!("optparse" %in% installed.packages())) {
   install.packages("optparse", repos = "http://cran.us.r-project.org")
@@ -34,50 +42,46 @@ if (!("data.table" %in% installed.packages())) {
   install.packages("data.table", repos = "http://cran.us.r-project.org")
 }
 
-##################### Set base directories common file paths ###################
-# Declare base directory names
-scratch_dir <- file.path(root_dir, "scratch")
-data_dir <- file.path(root_dir, "data")
-snv_dir <- file.path(root_dir, "analyses", "snv-callers")
-
-# Directories that we will need
-base_results_dir <- file.path(snv_dir, "results")
-base_plots_dir <- file.path(snv_dir, "plots")
-cosmic_dir <- file.path(snv_dir, "cosmic")
-
-# Create these folders if they haven't been created yet
-if (!dir.exists(base_results_dir)) {
-  dir.create(base_results_dir)
-}
-if (!dir.exists(base_plots_dir)) {
-  dir.create(base_plots_dir)
-}
-if (!dir.exists(cosmic_dir)) {
-  dir.create(cosmic_dir)
-}
-
-# Declare reference file names
-original_metadata <- file.path(data_dir, "pbta-histologies.tsv")
-
-# These data were obtained from https://cancer.sanger.ac.uk/cosmic/download
-# These data are available if you register.
-# The full, unfiltered somatic mutations file CosmicMutantExport.tsv.gz for grch38
-# is used here.
-cosmic_file <- file.path(snv_dir, "CosmicMutantExport.tsv.gz")
-
-# This is the cleaned version that only contains the genomic coordinates and
-# base changes from the original file for brain sample mutations only.
-cosmic_clean_file <- file.path(cosmic_dir, "brain_cosmic_variants_coordinates.tsv")
+################################ Set up options ################################
+# Set up optparse options
+option_list <- list(
+  make_option(
+    opt_str = "--cosmic_og", type = "character", default = "none",
+    help = "Relative file path (assuming from top directory of 
+              'OpenPBTA-analysis') to where the COSMIC mutation file is stored. 
+              Will need to download this from COSMIC at 
+              https://cancer.sanger.ac.uk/cosmic/download
+              These data are available if you register.",
+    metavar = "character"
+  ),
+  make_option(
+    opt_str = "--cosmic_clean", type = "character", default = "none",
+    help = "Relative file path (assuming from top directory of 
+              'OpenPBTA-analysis') where you would like the cleaned COSMIC file
+              to be stored.",
+    metavar = "character"
+  ),
+  make_option(
+    opt_str = "--annot_rds", type = "character",
+    default = "none", help = "Relative file path (assuming from top 
+              directory of 'OpenPBTA-analysis') that specifies where you would
+              like the annotation_rds file to be stored.",
+    metavar = "character"
+  )
+)
 
-# Will set up this annotation file below
-annot_rds <- file.path(scratch_dir, "hg38_genomic_region_annotation.rds")
+# Parse options
+opt <- parse_args(OptionParser(option_list = option_list))
 
-# This will be used for all WXS samples, but WGS bed regions are caller-specific
-wxs_bed_file <- file.path(data_dir, "WXS.hg38.100bp_padded.bed")
+##################### Set base directories common file paths ###################
+# Make all base names relative to root_dir
+opt$cosmic_og <- file.path(root_dir, opt$cosmic_og)
+opt$cosmic_clean <- file.path(root_dir, opt$cosmic_clean)
+opt$annot_rds <- file.path(root_dir, opt$annot_rds)
 
 ################################ Build Annotation ##############################
 # We will only run this if it hasn't been run before
-if (!file.exists(annot_rds)) {
+if (opt$annot_rds != "none") {
   # Print progress message
   message("Setting up genomic region annotation file. Only need to do this once.")
 
@@ -98,7 +102,7 @@ if (!file.exists(annot_rds)) {
 
   # Write this object so we don't have to write it again
   readr::write_rds(annotations,
-    annot_rds,
+    opt$annot_rds,
     compress = "gz"
   )
 }
@@ -108,37 +112,41 @@ if (!file.exists(annot_rds)) {
 # if you register. The full, unfiltered somatic mutations file
 # CosmicMutantExport.tsv for grch38 is used here but only mutations from brain
 # related samples are kept.
-if (!file.exists(cosmic_clean_file)) {
-  # Print progress message
-  message("Setting up COSMIC mutation file. Only need to do this once.")
-
-  # Read in original file
-  cosmic_variants <- data.table::fread(cosmic_file,
-    data.table = FALSE
-  ) %>%
-    # Keep only brain mutations so the file is smaller
-    dplyr::filter(`Site subtype 1` == "brain")
-  # Get rid of spaces in column names
-  cosmic_variants <- cosmic_variants %>%
-    dplyr::rename_all(dplyr::funs(stringr::str_replace_all(., " ", "_"))) %>%
-    # Separate the genome coordinates into their own BED like variables
-    dplyr::mutate(
-      Chromosome = paste0(
-        "chr",
-        stringr::word(Mutation_genome_position, sep = ":", 1)
-      ),
-      Start_Position = stringr::word(Mutation_genome_position, sep = ":|-", 1),
-      End_Position = stringr::word(Mutation_genome_position, sep = "-", 2),
-      # Make a base_change variable so we can compare to our set up for PBTA data
-      base_change = substr(Mutation_CDS, nchar(Mutation_CDS) - 2, nchar(Mutation_CDS))
-    ) %>%
-    # Carry over the strand info, but rename to match our PBTA set up
-    dplyr::rename(Strand = Mutation_strand) %>%
-    # Narrow down to just the needed columns
-    dplyr::select(
-      "Chromosome", "Start_Position", "End_Position", "Strand",
-      "base_change"
+if (opt$cosmic_clean != "none") {
+  if (file.exists(opt$cosmic_og)) {
+    # Print progress message
+    message("Setting up COSMIC mutation file. Only need to do this once.")
+
+    # Read in original file
+    cosmic_variants <- data.table::fread(opt$cosmic_og,
+      data.table = FALSE
     ) %>%
-    # Write to a TSV file
-    readr::write_tsv(cosmic_clean_file)
+      # Keep only brain mutations so the file is smaller
+      dplyr::filter(`Site subtype 1` == "brain")
+    # Get rid of spaces in column names
+    cosmic_variants <- cosmic_variants %>%
+      dplyr::rename_all(dplyr::funs(stringr::str_replace_all(., " ", "_"))) %>%
+      # Separate the genome coordinates into their own BED like variables
+      dplyr::mutate(
+        Chromosome = paste0(
+          "chr",
+          stringr::word(Mutation_genome_position, sep = ":", 1)
+        ),
+        Start_Position = stringr::word(Mutation_genome_position, sep = ":|-", 1),
+        End_Position = stringr::word(Mutation_genome_position, sep = "-", 2),
+        # Make a base_change variable so we can compare to our set up for PBTA data
+        base_change = substr(Mutation_CDS, nchar(Mutation_CDS) - 2, nchar(Mutation_CDS))
+      ) %>%
+      # Carry over the strand info, but rename to match our PBTA set up
+      dplyr::rename(Strand = Mutation_strand) %>%
+      # Narrow down to just the needed columns
+      dplyr::select(
+        "Chromosome", "Start_Position", "End_Position", "Strand",
+        "base_change"
+      ) %>%
+      # Write to a TSV file
+      readr::write_tsv(opt$cosmic_clean)
+  } else {
+    stop("Original COSMIC Mutation file not found. Specify with --cosmic_og")
+  }
 }
diff --git a/analyses/snv-callers/scripts/01-calculate_vaf_tmb.R b/analyses/snv-callers/scripts/01-calculate_vaf_tmb.R
new file mode 100644
index 0000000000..4100f330d0
--- /dev/null
+++ b/analyses/snv-callers/scripts/01-calculate_vaf_tmb.R
@@ -0,0 +1,294 @@
+# Run variant caller evaluation for a given MAF file.
+#
+# C. Savonen for ALSF - CCDL
+#
+# 2019
+#
+# Option descriptions
+#
+# -label : Label to be used for folder and all output. eg. 'strelka2'. Default is 'maf'.
+# -output : File path that specifies the folder where the output should go.
+#           New folder will be created if it doesn't exist. Assumes file path is
+#           given from top directory of 'OpenPBTA-analysis'.
+# --maf :  Relative file path to MAF file to be analyzed. Can be .gz compressed.
+#          Assumes file path is given from top directory of 'OpenPBTA-analysis'.
+# --metadata : Relative file path to MAF file to be analyzed. Can be .gz compressed.
+#              Assumes file path is given from top directory of 'OpenPBTA-analysis'.
+# --annot_rds : Relative file path to annotation object RDS file to be analyzed.
+#               Assumes file path is given from top directory of 'OpenPBTA-analysis'.
+# --bed_wgs : File path that specifies the caller-specific BED regions file.
+#             Assumes from top directory, 'OpenPBTA-analysis'.
+# --bed_wxs : File path that specifies the WXS BED regions file. Assumes file path
+#             is given from top directory of 'OpenPBTA-analysis'
+# --overwrite : If specified, will overwrite any files of the same name. Default is FALSE.
+#
+# Command line example:
+#
+# Rscript analyses/snv-callers/scripts/01-calculate_vaf_tmb.R \
+#   --label strelka2 \
+#   --output analyses/snv-callers/results \
+#   --maf scratch/snv_dummy_data/strelka2 \
+#   --metadata data/pbta-histologies.tsv \
+#   --bed_wgs data/WGS.hg38.mutect2.unpadded.bed \
+#   --bed_wxs data/WXS.hg38.100bp_padded.bed \
+#   --annot_rds scratch/hg38_genomic_region_annotation.rds
+
+################################ Initial Set Up ################################
+# Establish base dir
+root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
+
+# Import special functions
+source(file.path(root_dir, "analyses", "snv-callers", "util", "wrangle_functions.R"))
+
+# Magrittr pipe
+`%>%` <- dplyr::`%>%`
+
+# Load library:
+library(optparse)
+
+################################ Set up options ################################
+# Set up optparse options
+option_list <- list(
+  make_option(
+    opt_str = c("-l", "--label"), type = "character",
+    default = "maf", help = "Label to be used for folder and all
+                output. eg. 'strelka2'. Default is 'maf'",
+    metavar = "character"
+  ),
+  make_option(
+    opt_str = c("-o", "--output"), type = "character", default = "none",
+    help = "File path that specifies the folder where the output should
+              go. Assumes from top directory, 'OpenPBTA-analysis'. New folder
+              will be created if it doesn't exist.",
+    metavar = "character"
+  ),
+  make_option(
+    opt_str = "--maf", type = "character", default = "none",
+    help = "Relative file path (assuming from top directory of
+              'OpenPBTA-analysis') to MAF file to be analyzed. Can be .gz compressed.",
+    metavar = "character"
+  ),
+  make_option(
+    opt_str = "--metadata", type = "character", default = "none",
+    help = "Relative file path (assuming from top directory of
+              'OpenPBTA-analysis') to MAF file to be analyzed. Can be .gz compressed.",
+    metavar = "character"
+  ),
+  make_option(
+    opt_str = c("-a", "--annot_rds"), type = "character", default = "none",
+    help = "Relative file path (assuming from top directory of
+              'OpenPBTA-analysis') to annotation object RDS file to be analyzed.",
+    metavar = "character"
+  ),
+  make_option(
+    opt_str = "--bed_wgs", type = "character", default = "none",
+    help = "File path that specifies the caller-specific
+                BED regions file. Assumes from top directory, 'OpenPBTA-analysis'",
+    metavar = "character"
+  ),
+  make_option(
+    opt_str = "--bed_wxs", type = "character", default = "none",
+    help = "File path that specifies the WXS BED regions file. Assumes
+              from top directory, 'OpenPBTA-analysis'",
+    metavar = "character"
+  ),
+  make_option(
+    opt_str = "--overwrite", action = "store_true",
+    default = FALSE, help = "If TRUE, will overwrite any files of
+              the same name. Default is FALSE",
+    metavar = "character"
+  )
+)
+
+# Parse options
+opt <- parse_args(OptionParser(option_list = option_list))
+
+########### Check that the files we need are in the paths specified ############
+needed_files <- c(opt$maf, opt$metadata, opt$bed_wgs, opt$bed_wxs, opt$annot_rds,
+                  opt$cosmic)
+
+# Add root directory to the file paths
+needed_files <- file.path(root_dir, needed_files)
+
+# Get list of which files were found
+files_found <- file.exists(needed_files)
+
+# Report error if any of them aren't found
+if (!all(files_found)) {
+  stop(paste("\n Could not find needed file(s):",
+    needed_files[which(!files_found)],
+    "Check your options and set up.",
+    sep = "\n"
+  ))
+}
+
+################## Create output directories for this caller ##################
+# Caller specific results directory path
+caller_results_dir <- file.path(root_dir, opt$output)
+
+# Make caller specific results folder
+if (!dir.exists(caller_results_dir)) {
+  dir.create(caller_results_dir, recursive = TRUE)
+}
+
+####################### File paths for files we will create ####################
+vaf_file <- file.path(caller_results_dir, paste0(opt$label, "_vaf.tsv"))
+region_annot_file <- file.path(caller_results_dir, paste0(opt$label, "_region.tsv"))
+tmb_file <- file.path(caller_results_dir, paste0(opt$label, "_tmb.tsv"))
+
+# Declare metadata file name for this caller
+metadata_file <- file.path(
+  caller_results_dir,
+  paste0(opt$label, "_metadata_filtered.tsv")
+)
+
+##################### Check for files if overwrite is FALSE ####################
+# If overwrite is set to FALSE, check if these exist before continuing
+if (!opt$overwrite) {
+  # Make a list of the output files
+  output_files <- c(vaf_file, region_annot_file, tmb_file)
+
+  # Find out which of these exist
+  existing_files <- file.exists(output_files)
+
+  # If all files exist; stop
+  if (all(existing_files)) {
+    stop(cat(
+      "Stopping; --overwrite is not being used and all output files already exist: \n",
+      vaf_file, "\n",
+      region_annot_file, "\n",
+      tmb_file
+    ))
+  }
+  # If some files exist, print a warning:
+  if (any(existing_files)) {
+    warning(cat(
+      "Some output files already exist and will not be overwritten unless you use --overwrite: \n",
+      paste0(output_files[which(existing_files)], "\n")
+    ))
+  }
+}
+
+########################### Set up this caller's data ##########################
+# Print progress message
+message(paste("Reading in", opt$maf, "MAF data..."))
+
+# Read in this MAF, skip the version number
+maf_df <- data.table::fread(opt$maf, skip = 1, data.table = FALSE)
+
+# Print progress message
+message(paste("Setting up", opt$label, "metadata..."))
+
+# Isolate metadata to only the samples that are in the datasets
+metadata <- readr::read_tsv(opt$metadata) %>%
+  dplyr::filter(Kids_First_Biospecimen_ID %in% maf_df$Tumor_Sample_Barcode) %>%
+  dplyr::distinct(Kids_First_Biospecimen_ID, .keep_all = TRUE) %>%
+  dplyr::arrange() %>%
+  dplyr::rename(Tumor_Sample_Barcode = Kids_First_Biospecimen_ID) %>%
+  readr::write_tsv(metadata_file)
+
+# Print out completion message
+message(paste("Filtered metadata file saved to: \n", metadata_file))
+
+# Make sure that we have metadata for all these samples.
+if (!all(unique(maf_df$Tumor_Sample_Barcode) %in% metadata$Tumor_Sample_Barcode)) {
+  stop("There are samples in this MAF file that are not in the metadata.")
+}
+
+################## Calculate VAF and set up other variables ####################
+# If the file exists or the overwrite option is not being used, calculate VAF 
+if (file.exists(vaf_file) && !opt$overwrite) {
+  # Stop if this file exists and overwrite is set to FALSE
+  warning(cat(
+    "The VAF file already exists: \n",
+    vaf_file, "\n",
+    "Use --overwrite if you want to overwrite it."
+  ))
+} else {
+  # Print out warning if this file is going to be overwritten
+  if (file.exists(vaf_file)) {
+    warning("Overwriting existing VAF file.")
+  }
+  # Print out progress message
+  message(paste("Calculating VAF for", opt$label, "MAF data..."))
+
+  # Use the premade function to calculate VAF this will also merge the metadata
+  vaf_df <- set_up_maf(maf_df, metadata) %>%
+    readr::write_tsv(vaf_file)
+
+  # Print out completion message
+  message(paste("VAF calculations saved to: \n", vaf_file))
+}
+######################### Annotate genomic regions #############################
+# If the file exists or the overwrite option is not being used, run regional annotation analysis
+if (file.exists(region_annot_file) && !opt$overwrite) {
+  # Stop if this file exists and overwrite is set to FALSE
+  warning(cat(
+    "The regional annotation file already exists: \n",
+    region_annot_file, "\n",
+    "Use --overwrite if you want to overwrite it."
+  ))
+} else {
+  # Print out warning if this file is going to be overwritten
+  if (file.exists(vaf_file)) {
+    warning("Overwriting existing regional annotation file.")
+  }
+  # Print out progress message
+  message(paste("Annotating genomic regions for", opt$label, "MAF data..."))
+
+  # Annotation genomic regions
+  maf_annot <- annotr_maf(vaf_df, annotation_file = opt$annot_rds) %>%
+    readr::write_tsv(region_annot_file)
+
+  # Print out completion message
+  message(paste("Genomic region annotations saved to:", region_annot_file))
+}
+############################# Calculate TMB ####################################
+# If the file exists or the overwrite option is not being used, run TMB calculations
+if (file.exists(region_annot_file) && !opt$overwrite) {
+  # Stop if this file exists and overwrite is set to FALSE
+  warning(cat(
+    "The Tumor Mutation Burden file already exists: \n",
+    tmb_file, "\n",
+    "Use --overwrite if you want to overwrite it."
+  ))
+} else {
+  # Print out warning if this file is going to be overwritten
+  if (file.exists(vaf_file)) {
+    warning("Overwriting existing TMB file.")
+  }
+  # Print out progress message
+  message(paste("Calculating TMB for", opt$label, "MAF data..."))
+
+  # Set up BED region files for TMB calculations
+  wgs_bed <- readr::read_tsv(opt$bed_wgs, col_names = FALSE)
+  wxs_bed <- readr::read_tsv(opt$bed_wxs, col_names = FALSE)
+
+  # Calculate size of genome surveyed
+  wgs_genome_size <- sum(wgs_bed[, 3] - wgs_bed[, 2])
+  wxs_exome_size <- sum(wxs_bed[, 3] - wxs_bed[, 2])
+
+  # Print out these genome sizes
+  cat(
+    " WGS size in bp:", wgs_genome_size,
+    "\n",
+    "WXS size in bp:", wxs_exome_size,
+    "\n"
+  )
+  
+  # Only do this step if you have WXS samples
+  if (any(metadata$experimental_strategy == "WXS")) {
+    # Filter out mutations for WXS that are outside of these BED regions.
+    vaf_df <- wxs_bed_filter(vaf_df, wxs_bed_file = opt$bed_wxs)
+  }
+  
+  # Calculate TMBs and write to TMB file
+  tmb_df <- calculate_tmb(vaf_df,
+    wgs_size = wgs_genome_size,
+    wxs_size = wxs_exome_size
+  ) %>%
+    readr::write_tsv(tmb_file)
+
+  # Print out completion message
+  message(paste("TMB calculations saved to:", tmb_file))
+}
diff --git a/analyses/snv-callers/util/wrangle_functions.R b/analyses/snv-callers/util/wrangle_functions.R
index 59487472dc..f0ca1948ec 100644
--- a/analyses/snv-callers/util/wrangle_functions.R
+++ b/analyses/snv-callers/util/wrangle_functions.R
@@ -29,8 +29,7 @@ set_up_maf <- function(maf_df, metadata_df = NULL) {
   maf_df <- maf_df %>%
     dplyr::mutate(
       # Calculate the variant allele frequency
-      vaf = as.numeric(t_alt_count) / (as.numeric(t_ref_count) +
-        as.numeric(t_alt_count)),
+      vaf = as.numeric(t_alt_count) / (as.numeric(t_ref_count) + as.numeric(t_alt_count)),
       # Create a base_change variable
       base_change = paste0(Reference_Allele, ">", Allele),
 
@@ -69,7 +68,7 @@ set_up_maf <- function(maf_df, metadata_df = NULL) {
   if (!is.null(metadata_df)) {
     # Tack on the metadata so we have this info
     maf_df <- maf_df %>%
-      dplyr::left_join(metadata, by = "Tumor_Sample_Barcode") %>%
+      dplyr::left_join(metadata_df, by = "Tumor_Sample_Barcode") %>%
       # Get rid of any variables that have completely NAs.
       dplyr::select(-which(apply(is.na(.), 2, all)))
   }
@@ -168,8 +167,8 @@ wxs_bed_filter <- function(maf_df, wxs_bed_file = NULL, bp_window = 0) {
 
   # What fraction of mutations are in these bed regions?
   cat(
-    "Ratio of variants in this BED:", ratio,
-    "Ratio of variants being filtered out:", 1 - ratio
+    "Ratio of variants in this BED:", ratio, "\n",
+    "Ratio of variants being filtered out:", 1 - ratio, "\n"
   )
 
   # Only keep those in the BED regions that overlap the `wxs_bed_granges`
@@ -326,7 +325,7 @@ find_cosmic_overlap <- function(maf_df, cosmic_clean_file, bp_window = 0) {
   # What fraction of mutations are in these bed regions?
   cat(
     " Ratio of variants overlapping with COSMIC:", ratio, "\n",
-    "Number of mutations with same base_change:", sum(same_change)
+    "Number of mutations with same base_change:", sum(same_change), "\n"
   )
 
   # Make this a new column