Skip to content

Commit

Permalink
Changed galaxy xml file to use conda biotransformer package
Browse files Browse the repository at this point in the history
  • Loading branch information
trachtok authored and smartx-usman committed Sep 22, 2020
1 parent da5b70f commit 459f828
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 5 deletions.
13 changes: 8 additions & 5 deletions tools/biotransformer/biotransformer.xml
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
<tool id="biotransformer" name="BioTransformer" version="1.1.0">
<requirements>
<container type="docker">registry.gitlab.ics.muni.cz:443/recetox/mass-spectrometry/biotransformer:1.1.0</container>
</requirements>
<command><![CDATA[ cd /biotransformer && python3 wrapper_biotransformer.py
<tool id="biotransformer" name="BioTransformer" version="1.1.5">
<requirements>
<requirement type="package" version="1.1.5"> biotransformer </requirement>
<requirement type="package" version="3.1.1"> openbabel </requirement>
<requirement type="package" version="1.1.1"> pandas </requirement>
</requirements>
<command><![CDATA[ python3 '${__tool_directory__}/wrapper_biotransformer.py'
-k $k
-b $b
-s $steps
Expand Down Expand Up @@ -127,3 +129,4 @@ reaction(Oxidation, reduction, (de-)conjugation) at each step.
<citation type="doi">https://doi.org/10.1186/s13321-018-0324-5</citation>
</citations>
</tool>

89 changes: 89 additions & 0 deletions tools/biotransformer/wrapper_biotransformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import sys
import tempfile
import subprocess
import pandas
from openbabel import pybel

#function for translating inchi to smiles
def InchiToSmiles(df):
sm = []
for item in df['InChI']:
tmp = pybel.readstring("inchi",item)
sm.append(tmp.write("smi"))
return(sm)

executable = ["biotransformer"]
#executable_r = ["Rscript", "inchi_to_smiles.r"]

argv = sys.argv[1:]
if "-icsv" in argv:
icsv = argv.pop(argv.index("-icsv") + 1)
argv.remove("-icsv")

if "-ocsv" not in argv:
sys.stderr.write("excpected -ocsv parameter\n")
sys.exit(1)
ocsv = argv.pop(argv.index("-ocsv") + 1)
argv.remove("-ocsv")
ocsv_dup = argv.pop(argv.index("-ocsvDup") + 1)
argv.remove("-ocsvDup")
ocsv_dup2 = argv.pop(argv.index("-ocsvDup2") + 1)
argv.remove("-ocsvDup2")

in_df = pandas.read_csv(icsv, header=None)
out_df1 = pandas.DataFrame() #file for all results
out_df2 = pandas.DataFrame() #file for filtered results based on 6 columns
out_df3 = pandas.DataFrame() #file for filtered results based on 3 columns

tmp2 = pandas.DataFrame()
tmp3 = pandas.DataFrame()

smList1 = [] #list with smiles string
smList2 = []
smList3 = []
for _, (smiles,) in in_df.iterrows():
with tempfile.NamedTemporaryFile() as out:
subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name])
tmp2 = pandas.read_csv(out.name)
tmp3 = pandas.read_csv(out.name)
tmp2.drop_duplicates(inplace=True, subset=["InChI","InChIKey","Synonyms","Molecular formula","Major Isotope Mass","ALogP"])
tmp3.drop_duplicates(inplace=True, subset=["Molecular formula","Major Isotope Mass","ALogP"])
smList2.append([smiles]*tmp2.shape[0])
smList3.append([smiles]*tmp3.shape[0])
out_df1 = pandas.concat([out_df1, pandas.read_csv(out.name)])
out_df2 = pandas.concat([out_df2, tmp2])
out_df3 = pandas.concat([out_df3, tmp3])
smList1.append([smiles]*pandas.read_csv(out.name).shape[0])


smList1 = sum(smList1,[]) #merge sublists into one list
smList2 = sum(smList2,[])
smList3 = sum(smList3,[])

out_df1.insert(0,"SMILES query",smList1)
out_df1.drop_duplicates(inplace=True)
out_df1.insert(1,"SMILES target", InchiToSmiles(out_df1))
out_df1.to_csv(ocsv)

out_df2.insert(0,"SMILES query",smList2)
out_df3.insert(0,"SMILES query",smList3)
out_df2.drop_duplicates(inplace=True)
out_df3.drop_duplicates(inplace=True)
out_df2.insert(1,"SMILES target", InchiToSmiles(out_df2))
out_df3.insert(1,"SMILES target", InchiToSmiles(out_df3))
#out_df.drop_duplicates(inplace=True, subset=["InChI","InChIKey","Synonyms","Molecular formula","Major Isotope Mass","ALogP"])
out_df2.to_csv(ocsv_dup)
out_df3.to_csv(ocsv_dup2)
else:
#code = subprocess.run(executable + argv).returncode
#sys.exit(code)
subprocess.run(executable + argv)
smile = argv.pop(argv.index("-ismi") + 1)

tmp = pandas.DataFrame()
out = argv.pop(argv.index("-ocsv") + 1)
tmp = pandas.read_csv(out) #reads created output file
tmp.insert(0,"SMILES query",smile) #add SMILES string for query
tmp.insert(1,"SMILES target", InchiToSmiles(tmp)) #add SMILES string for target
tmp.to_csv(out)

0 comments on commit 459f828

Please sign in to comment.