Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for mzMLs on Panorama #21

Merged
merged 3 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 5 additions & 8 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,6 @@ include { EXPORT_GENE_REPORTS } from "./modules/qc_report"
// useful functions and variables
include { param_to_list } from "./workflows/get_input_files"

// String to test for Panoramaness
PANORAMA_URL = 'https://panoramaweb.org'

// Check if old Skyline parameter variables are defined.
// If the old variable is defnied, return the params value of the old variable,
// otherwise return the params value of the new variable
Expand Down Expand Up @@ -429,17 +426,17 @@ workflow {
// return true if any entry in the list created from the param is a panoramaweb URL
def any_entry_is_panorama(param) {
values = param_to_list(param)
return values.any { it.startsWith(PANORAMA_URL) }
return values.any { it.startsWith(params.panorama.domain) }
}

// return true if panoramaweb will be accessed by this Nextflow run
def is_panorama_used() {

return params.panorama.upload ||
(params.fasta && params.fasta.startsWith(PANORAMA_URL)) ||
(params.spectral_library && params.spectral_library.startsWith(PANORAMA_URL)) ||
(params.replicate_metadata && params.replicate_metadata.startsWith(PANORAMA_URL)) ||
(params.skyline.template_file && params.skyline.template_file.startsWith(PANORAMA_URL)) ||
(params.fasta && params.fasta.startsWith(params.panorama.domain)) ||
(params.spectral_library && params.spectral_library.startsWith(params.panorama.domain)) ||
(params.replicate_metadata && params.replicate_metadata.startsWith(params.panorama.domain)) ||
(params.skyline.template_file && params.skyline.template_file.startsWith(params.panorama.domain)) ||
(params.quant_spectra_dir && any_entry_is_panorama(params.quant_spectra_dir)) ||
(params.chromatogram_library_spectra_dir && any_entry_is_panorama(params.chromatogram_library_spectra_dir)) ||
(params.skyline_skyr_file && any_entry_is_panorama(params.skyline_skyr_file))
Expand Down
12 changes: 6 additions & 6 deletions modules/panorama.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ String setupPanoramaAPIKeySecret(secret_id, executor_type) {
} else {
SECRET_NAME = 'PANORAMA_API_KEY'
REGION = params.aws.region

return """
echo "Getting Panorama API key from AWS secrets manager..."
SECRET_JSON=\$(${params.aws.batch.cliPath} secretsmanager get-secret-value --secret-id ${secret_id} --region ${REGION} --query 'SecretString' --output text)
Expand Down Expand Up @@ -50,7 +50,7 @@ String getPanoramaProjectURLForWebDavDirectory(String webdavDirectory) {
return newUrl
}

process PANORAMA_GET_RAW_FILE_LIST {
process PANORAMA_GET_MS_FILE_LIST {
cache false
label 'process_low_constant'
label 'error_retry'
Expand All @@ -64,7 +64,7 @@ process PANORAMA_GET_RAW_FILE_LIST {
val aws_secret_id

output:
path('download_files.txt'), emit: raw_files
path('download_files.txt'), emit: ms_files
path("*.stdout"), emit: stdout
path("*.stderr"), emit: stderr

Expand All @@ -78,7 +78,6 @@ process PANORAMA_GET_RAW_FILE_LIST {
echo "Running file list from Panorama..."
${exec_java_command(task.memory)} \
-l \
-e raw \
-w "${web_dav_url}" \
-k \$PANORAMA_API_KEY \
-o all_files.txt \
Expand Down Expand Up @@ -127,7 +126,7 @@ process PANORAMA_GET_FILE {
"""
}

process PANORAMA_GET_RAW_FILE {
process PANORAMA_GET_MS_FILE {
label 'process_low_constant'
label 'error_retry'
maxForks 4
Expand Down Expand Up @@ -232,7 +231,8 @@ process UPLOAD_FILE {

stub:
"""
touch "panorama-upload-${file(file_to_upload).name}.stdout" "panorama-upload-${file(file_to_upload).name}.stderr"
touch "panorama-upload-${file(file_to_upload).name}.stdout" \
"panorama-upload-${file(file_to_upload).name}.stderr"
"""
}

Expand Down
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ params {
msconvert_only = false

// Parameters related to uploading results to PanoramaWeb
panorama.domain = 'https://panoramaweb.org'
panorama.upload = false // Whether or not to upload to PanoramaWeb
panorama.upload_url = null // The webdav URL of a folder to hold all uploaded files
panorama.import_skyline = false // whether or not to import the Skyline into Panorama's internal database
Expand Down
12 changes: 5 additions & 7 deletions workflows/get_input_files.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ include { PANORAMA_GET_SKYR_FILE } from "../modules/panorama"
include { PANORAMA_GET_FILE as PANORAMA_GET_METADATA } from "../modules/panorama"
include { MAKE_EMPTY_FILE as METADATA_PLACEHOLDER } from "../modules/qc_report"

PANORAMA_URL = 'https://panoramaweb.org'

/**
* Process a parameter variable which is specified as either a single value or List.
* If param_variable has multiple lines, each line with text is returned as an
Expand Down Expand Up @@ -42,15 +40,15 @@ workflow get_input_files {
main:

// get files from Panorama as necessary
if(params.fasta.startsWith(PANORAMA_URL)) {
if(params.fasta.startsWith(params.panorama.domain)) {
PANORAMA_GET_FASTA(params.fasta, aws_secret_id)
fasta = PANORAMA_GET_FASTA.out.panorama_file
} else {
fasta = Channel.value(file(params.fasta, checkIfExists: true))
}

if(params.spectral_library) {
if(params.spectral_library.startsWith(PANORAMA_URL)) {
if(params.spectral_library.startsWith(params.panorama.domain)) {
PANORAMA_GET_SPECTRAL_LIBRARY(params.spectral_library, aws_secret_id)
spectral_library = PANORAMA_GET_SPECTRAL_LIBRARY.out.panorama_file
} else {
Expand All @@ -61,7 +59,7 @@ workflow get_input_files {
}

if(params.skyline.template_file != null) {
if(params.skyline.template_file.startsWith(PANORAMA_URL)) {
if(params.skyline.template_file.startsWith(params.panorama.domain)) {
PANORAMA_GET_SKYLINE_TEMPLATE(params.skyline.template_file, aws_secret_id)
skyline_template_zipfile = PANORAMA_GET_SKYLINE_TEMPLATE.out.panorama_file
} else {
Expand All @@ -75,7 +73,7 @@ workflow get_input_files {

// Split skyr files stored on Panorama and locally into separate channels.
Channel.fromList(param_to_list(params.skyline.skyr_file)).branch{
panorama_files: it.startsWith(PANORAMA_URL)
panorama_files: it.startsWith(params.panorama.domain)
local_files: true
return file(it, checkIfExists: true)
}.set{skyr_paths}
Expand All @@ -90,7 +88,7 @@ workflow get_input_files {
}

if(params.replicate_metadata != null) {
if(params.replicate_metadata.trim().startsWith(PANORAMA_URL)) {
if(params.replicate_metadata.trim().startsWith(params.panorama.domain)) {
PANORAMA_GET_METADATA(params.replicate_metadata, aws_secret_id)
replicate_metadata = PANORAMA_GET_METADATA.out.panorama_file
} else {
Expand Down
107 changes: 63 additions & 44 deletions workflows/get_mzmls.nf
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
// modules
include { PANORAMA_GET_RAW_FILE } from "../modules/panorama"
include { PANORAMA_GET_RAW_FILE_LIST } from "../modules/panorama"
include { PANORAMA_GET_MS_FILE } from "../modules/panorama"
include { PANORAMA_GET_MS_FILE_LIST } from "../modules/panorama"
include { MSCONVERT } from "../modules/msconvert"

// useful functions and variables
include { param_to_list } from "./get_input_files"
include { escapeRegex } from "../modules/panorama"

workflow get_mzmls {
take:
spectra_dir
Expand All @@ -14,56 +18,71 @@ workflow get_mzmls {

main:

if(spectra_dir.contains("https://")) {

spectra_dirs_ch = Channel.from(spectra_dir)
.splitText() // split multiline input
.map{ it.trim() } // removing surrounding whitespace
.filter{ it.length() > 0 } // skip empty lines

// get raw files from panorama
PANORAMA_GET_RAW_FILE_LIST(spectra_dirs_ch, spectra_glob, aws_secret_id)
raw_url_ch = PANORAMA_GET_RAW_FILE_LIST.out.raw_files
.splitText()
.map{ it -> it.strip() }

PANORAMA_GET_RAW_FILE(raw_url_ch, aws_secret_id)

mzml_ch = MSCONVERT(
PANORAMA_GET_RAW_FILE.out.panorama_file,
params.msconvert.do_demultiplex,
params.msconvert.do_simasspectra
)
// Parse spectra_dir parameter and split local and panorama directories
spectra_dirs = param_to_list(spectra_dir)
spectra_dirs_ch = Channel.fromList(spectra_dirs)
.branch{
panorama_dirs: it.startsWith(params.panorama.domain)
local_dirs: true
}

} else {
// Find files in local directories matching spectra_glob
String spectra_regex = '^' + escapeRegex(spectra_glob).replaceAll('\\*', '.*') + '$'
local_file_ch = spectra_dirs_ch.local_dirs
.map{ it ->
file(it, checkIfExists: true)
.listFiles()
.findAll{ it ==~ spectra_regex }
}.flatten()

file_glob = spectra_glob
spectra_dir = file(spectra_dir, checkIfExists: true)
data_files = file("$spectra_dir/${file_glob}")
// List files matching spectra_glob in panorama directories
PANORAMA_GET_MS_FILE_LIST(spectra_dirs_ch.panorama_dirs, spectra_glob, aws_secret_id)
PANORAMA_GET_MS_FILE_LIST.out.ms_files
.map{it -> it.readLines().collect{ line -> line.strip() }}
.flatten()
.set{panorama_url_ch}

if(data_files.size() < 1) {
error "No files found for: $spectra_dir/${file_glob}"
// make sure that all files have the same extension
all_paths_ch = panorama_url_ch.concat(
local_file_ch.map{
it -> it.name
}
)
all_paths_ch.collect().subscribe{ fileList ->
extensions = fileList.collect { it.substring(it.lastIndexOf('.') + 1) }.unique()

mzml_files = data_files.findAll { it.name.endsWith('.mzML') }
raw_files = data_files.findAll { it.name.endsWith('.raw') }

if(mzml_files.size() < 1 && raw_files.size() < 1) {
error "No raw or mzML files found in: $spectra_dir"
// Check that we have exactly 1 MS file extension
directories = spectra_dir.collect{ it -> "${it}${it[-1] == '/' ? '' : '/' }${spectra_glob}" }.join('\n')
if (extensions.size() == 0) {
error "No files matches fore:\n" + directories +
"\nPlease choose a file glob that will match raw or mzML files."
}

if(mzml_files.size() > 0 && raw_files.size() > 0) {
error "Matched raw files and mzML files for: $spectra_dir/${file_glob}. Please choose a file matching string that will only match one or the other."
if (extensions.size() > 1) {
error "Matched more than 1 file type for:\n" + directories +
"\nPlease choose a file glob that will only match one type of file"
}

if(mzml_files.size() > 0) {
mzml_ch = Channel.fromList(mzml_files)
} else {
mzml_ch = MSCONVERT(
Channel.fromList(raw_files),
params.msconvert.do_demultiplex,
params.msconvert.do_simasspectra
)
if(!extensions in ['raw', 'mzML']) {
error "No MS data files found for:\n" + directories
}
}

// Download files from panorama if applicable
PANORAMA_GET_MS_FILE(panorama_url_ch, aws_secret_id)

PANORAMA_GET_MS_FILE.out.panorama_file
.concat(local_file_ch)
.branch{
mzml: it.name.endsWith('.mzML')
raw: it.name.endsWith('.raw')
other: true
error "Unknown file type:" + it.name
}.set{ms_file_ch}

// Convert raw files if applicable
MSCONVERT(ms_file_ch.raw,
params.msconvert.do_demultiplex,
params.msconvert.do_simasspectra)

mzml_ch = MSCONVERT.out.concat(ms_file_ch.mzml)
}
Loading