Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Isolib tool first version #521

Merged
merged 16 commits into from
Apr 23, 2024
11 changes: 11 additions & 0 deletions tools/isolib/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
categories:
- Metabolomics
description: Create an isotopic pattern library for given compounds and adducts.
homepage_url: https://github.com/RECETOX/galaxytools/
long_description: |
Create MSP files containing the isotopic patterns for given molecules with given adducts.
The tool is based on enviPat and the RforMassSpectrometry toolbox.
name: isolib
owner: recetox
remote_repository_url: https://github.com/RECETOX/galaxytools/tree/master/tools/isolib
type: unrestricted
80 changes: 80 additions & 0 deletions tools/isolib/isolib.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
library(enviPat)
library(Spectra)
library(MsBackendMsp)
library(MetaboCoreUtils)

#' @param args A list of command line arguments.
main <- function() {
data(isotopes)
data(adducts)

args <- commandArgs(trailingOnly = TRUE)
compound_table <- read.delim(args[1], stringsAsFactors = FALSE)
adducts_to_use <- c(unlist(strsplit(args[2], ",", fixed = TRUE)))

chemforms <- compound_table$formula
chemforms <- check_chemform(isotopes, chemforms)[, 2]

spectra <- data.frame()

for (current in adducts_to_use) {
adduct <- adducts[adducts$Name == current, ]
multiplied_chemforms <- multiform(chemforms, adduct$Mult)

if (adduct$Ion_mode == "negative") {
merged_chemforms <- subform(multiplied_chemforms, adduct$Formula_ded)
} else {
merged_chemforms <- mergeform(multiplied_chemforms, adduct$Formula_add)
}

charge_string <- paste0(if (adduct$Charge > 0) "+" else "-", if (abs(adduct$Charge) > 1) abs(adduct$Charge) else "")
adduct_string <- paste0("[", adduct$Name, "]", charge_string)
precursor_mz <- calculateMass(multiplied_chemforms) + adduct$Mass

if (args[4] == TRUE) {
names <- paste(compound_table$name, paste0("(", adduct$Name, ")"), sep = " ")
} else {
names <- compound_table$name
}

spectra_df <- data.frame(
name = names,
adduct = adduct_string,
formula = chemforms,
charge = adduct$Charge,
ionization_mode = adduct$Ion_mode,
precursor_mz = precursor_mz,
msLevel = as.integer(1)
)

if ("rt" %in% colnames(compound_table)) {
spectra_df$retention_time <- compound_table$rt
}

patterns <- enviPat::isopattern(
isotopes = isotopes,
chemforms = merged_chemforms,
charge = adduct$Charge,
threshold = as.numeric(args[3]),
)

mzs <- list()
intensities <- list()
for (i in seq_along(patterns)) {
mzs <- append(mzs, list(patterns[[i]][, 1]))
intensities <- append(intensities, list(patterns[[i]][, 2]))
}

spectra_df$mz <- mzs
spectra_df$intensity <- intensities
spectra <- rbind(spectra, spectra_df)
}

sps <- Spectra(spectra)
export(sps, MsBackendMsp(), file = args[5])
}

# Get the command line arguments
args <- commandArgs(trailingOnly = TRUE)
# Call the main function
main()
72 changes: 72 additions & 0 deletions tools/isolib/isolib.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
<tool id="isolib" name="isolib" version="1.0.0+galaxy0" profile="21.09">
<description>create an isotopic pattern library for given compounds and adducts</description>
<creator>
<person
givenName="Helge"
familyName="Hecht"
url="https://github.com/hechth"
identifier="0000-0001-6744-996X" />
<organization
url="https://www.recetox.muni.cz/"
email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
name="RECETOX MUNI" />
</creator>
<edam_operations>
<edam_operation>operation_3632</edam_operation>
</edam_operations>
<requirements>
<requirement type="package" version="1.10.0">bioconductor-metabocoreutils</requirement>
<requirement type="package" version="1.12.0">bioconductor-spectra</requirement>
<requirement type="package" version="1.6.0">bioconductor-msbackendmsp</requirement>
<requirement type="package" version="2.6">r-envipat</requirement>
</requirements>
<command detect_errors="exit_code"><![CDATA[
Rscript ${__tool_directory__}/isolib.R '${input_file}' '${adducts}' '${threshold}' '${append_adduct}' '${isotope_library}'
]]></command>
<inputs>
<param name="input_file" type="data" format="tabular" label="Table with input compounds"/>
<conditional name="ionization">
<param name="ionization" type="select" label="Ionization mode" help="Ionization mode used in the experiment">
<option value="negative" selected="true">negative</option>
<option value="positive">positive</option>
</param>
<when value="positive">
<param name="adducts" type="select" label="Adducts" multiple="true" help="Adducts to use">
<option value="M+H" selected="true">M+H</option>
</param>
</when>
<when value="negative">
<param name="adducts" type="select" label="Adducts" multiple="true" help="Adducts to use">
<option value="M-H" selected="true">M-H</option>
<option value="M-2H">M-2H</option>
<option value="2M-H">2M-H</option>
</param>
</when>
</conditional>
<param name="threshold" type="float" min="0" max="100" value="1" label="Threshold" help="Probability threshold to use as cutoff for isotopic pattern distribution - this can be used to remove low abundant peaks and improve computation performance." />
<param name="append_adduct" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Append adduct to compound name" help="Append the adduct string to the compound name for easy identification." />
</inputs>
<outputs>
<data format="msp" name="isotope_library"/>
</outputs>

<tests>
<test>
<param name="input_file" value="lc_markers_neg.tsv"/>
<output name="isotope_library" file="test0.msp"/>
</test>
</tests>
<help><![CDATA[
This tool computes isotopic patterns for given compounds and adduct forms.
The compound table input file should contain the following columns:
- name: compound name
- formula: compound formula
- rt (optional): retention time

The output is a spectral library in the MSP format.
]]></help>
<citations>
<citation type="doi">10.1021/acs.analchem.5b00941</citation>
<citation type="doi">10.3390/metabo12020173</citation>
</citations>
</tool>
24 changes: 24 additions & 0 deletions tools/isolib/test-data/lc_markers_neg.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
formula name rt
C8H6Cl2O3 2,4-Dichlorophenoxyacetic acid ou 2,4-D 484.2
C9H15N3O1 2-diethylamino-6-methyl pyrimidin-4-ol/one 451.8
C5H2Cl3N1O1 3,5,6-Trichloro-2-pyridinol 499.2
C13H10O3 3-phenoxybenzoic acid 517.8
C13H9FO3 4-Fluoro-3-phenoxybenzoic acid 532.2
C6H5NO3 4-nitrophenol 165
C6H4Cl1N1O2 6-Chloronicotinic acid 172.2
C19H28N2O5S Acetochlor mercapturate 607.2
C19H28N2O5S1 Alachlor mercapturate 607.2
C10H12N2O3S1 Bentazone 747.6
C4H11O3P1S1 Diethylthiophosphate 177
C14H17Cl2NO2 Fenhexamid 689.4
C11H13ClO2 Fenvalerate free acid 585
C12H4Cl2F6N4OS Fipronil 750
C12H4Cl2F6N4O2S Fipronil sulfone 771
C16H22ClN3O2 Hydroxy-tebuconazole 696.6
C16H11ClF6N2O Fluopyram 725.4
C10H11Cl1O3 Mecoprop 544.8
C7H9NO2S p-Toluenesulfonamide 362.4
C12H7Cl3O2 Triclosan 811.8
C18H15Cl3O8 Triclosan glucuronide 665.4
C12H7Cl3O5S Triclosan sulfate 695.4
C9H9N4Cl acetamiprid-N-desmethyl 402.6
Loading