Skip to content

Commit

Permalink
Renamed Protein report to General report
Browse files Browse the repository at this point in the history
  • Loading branch information
iquasere committed Dec 28, 2023
1 parent 118a163 commit 11b1967
Show file tree
Hide file tree
Showing 10 changed files with 41 additions and 41 deletions.
2 changes: 1 addition & 1 deletion meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ build:
requirements:
run:
- python >=3.9
- snakemake
- snakemake <8

test:
commands:
Expand Down
9 changes: 7 additions & 2 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,17 @@ include: "rules/metaproteomics.smk"
include: "rules/quantification.smk"
include: "rules/normalization.smk"
include: "rules/de_analysis.smk"
include: "rules/protein_report.smk"
include: "rules/general_report.smk"
include: "rules/entry_report.smk"
include: "rules/keggcharter.smk"
include: "rules/summary_report.smk"

##### target rules #####
rule all:
input:
all_input
[f"{OUTPUT}/MOSCA_General_Report.xlsx",
f"{OUTPUT}/MOSCA_Entry_Report.xlsx",
f"{OUTPUT}/MOSCA_Versions_Report.xlsx",
f"{OUTPUT}/MOSCA_Summary_Report.tsv",
f"{OUTPUT}/MOSCA_results.zip",
f"{OUTPUT}/KEGG_maps/KEGGCharter_results.tsv"]
10 changes: 8 additions & 2 deletions workflow/mosca.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
args = parser.parse_args()


def validate_config(config_data):
if not config_data['do_assembly'] and config_data['do_binning']:
sys.exit('ERROR: Can only do binning if assembly is performed.')


def read_config(filename):
if filename.split('.')[-1] == 'yaml':
with open(filename) as stream:
Expand All @@ -36,7 +41,7 @@ def read_config(filename):
with open(filename) as f:
return json.load(f), 'json'
else:
exit('Config file must end in either ".json" or ".yaml"')
sys.exit('ERROR: Config file must end in either ".json" or ".yaml"')


def save_config(config_data, filename, output_format):
Expand Down Expand Up @@ -73,8 +78,9 @@ def validate_exps(exps_data):

start_time = time()
config, config_format = read_config(args.configfile)
pathlib.Path(config["output"]).mkdir(parents=True, exist_ok=True)
validate_config(config)
validate_exps(config["experiments"])
pathlib.Path(config["output"]).mkdir(parents=True, exist_ok=True)
save_config(config, f'{config["output"]}/config.json', output_format=config_format)

command = (
Expand Down
16 changes: 0 additions & 16 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -56,19 +56,3 @@ def join_reads_input(wildcards):
return [f'{OUTPUT}/Preprocess/Trimmomatic/quality_trimmed_{df.iloc[i]["Name"]}{fr}.fq'
for i in range(len(df))
for fr in (['_forward_paired', '_reverse_paired'] if ',' in df.iloc[i]["Files"] else [''])]

def fastq2fasta_input(wildcards):
return expand("{output}/Preprocess/Trimmomatic/quality_trimmed_{name}{fr}.fq", output=OUTPUT,
fr=(['_forward_paired', '_reverse_paired'] if EXPS["Files"].str.contains(',').tolist() else ''),
name=wildcards.sample)

def gene_calling_input(wildcards):
if config['do_assembly']:
return expand("{output}/Assembly/{sample}/scaffolds.fasta", output=OUTPUT, sample=wildcards.sample)
return expand(
"{output}/Preprocess/piled_{name}.fasta", output=OUTPUT, name=wildcards.sample)

def upimapi_input(wildcards):
if config['do_assembly']:
return expand("{output}/Annotation/{sample}/aligned.blast", output=OUTPUT, sample=set(EXPS['Sample']))
return expand("{output}/Annotation/{name}/aligned.blast", output=OUTPUT, name=set(EXPS['Name']))
2 changes: 1 addition & 1 deletion workflow/rules/entry_report.smk
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
rule entry_report:
input:
p_reports = expand("{output}/MOSCA_{sample}_Protein_Report.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])),
p_reports = expand("{output}/MOSCA_{sample}_General_Report.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])),
norm = f"{OUTPUT}/Quantification/mt_normalized.tsv" if len(mt_exps) > 0 else f"{OUTPUT}/Metaproteomics/mp_normalized.tsv"
output:
f"{OUTPUT}/MOSCA_Entry_Report.xlsx",
Expand Down
11 changes: 7 additions & 4 deletions workflow/rules/gene_calling.smk
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
rule fastq2fasta:
input:
fastq2fasta_input
expand("{output}/Preprocess/Trimmomatic/quality_trimmed_{name}{fr}.fq", output=OUTPUT,
fr=(['_forward_paired', '_reverse_paired'] if EXPS["Files"].str.contains(',').tolist() else ''),
name=lambda wildcards: wildcards.sample)
output:
f"{OUTPUT}/Preprocess/piled_{{sample}}.fasta"
threads:
Expand All @@ -10,10 +12,11 @@ rule fastq2fasta:

rule gene_calling:
input:
gene_calling_input
(f"{OUTPUT}/Assembly/{{sample}}/scaffolds.fasta" if config['do_assembly'] else
f"{OUTPUT}/Preprocess/piled_{{sample}}.fasta")
output:
expand("{output}/Annotation/{{sample}}/fgs.faa", output=OUTPUT),
expand("{output}/Annotation/{{sample}}/fgs.ffn", output=OUTPUT)
f"{OUTPUT}/Annotation/{{sample}}/fgs.faa",
f"{OUTPUT}/Annotation/{{sample}}/fgs.ffn"
threads:
config["threads"]
params:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ rule protein_report:
expand("{output}/Quantification/{sample}_mt_norm.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])),
expand("{output}/Metaproteomics/{sample}_mp.spectracounts", output=OUTPUT, sample=set(mp_exps['Sample']))
output:
expand("{output}/MOSCA_{sample}_Protein_Report.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])),
f"{OUTPUT}/MOSCA_Protein_Report.xlsx",
expand("{output}/MOSCA_{sample}_General_Report.tsv", output=OUTPUT, sample=set(mg_exps['Sample'])),
f"{OUTPUT}/MOSCA_General_Report.xlsx",
f"{OUTPUT}/Quantification/dea_input.tsv",
f"{OUTPUT}/Quantification/mg_entry_quant.tsv",
f"{OUTPUT}/Quantification/mt_entry_quant.tsv" if len(mt_exps) > 0 else f"{OUTPUT}/Metaproteomics/mp_entry_quant.tsv"
Expand All @@ -21,4 +21,4 @@ rule protein_report:
conda:
"../envs/reports.yaml"
script:
"../scripts/protein_report.py"
"../scripts/general_report.py"
8 changes: 5 additions & 3 deletions workflow/rules/summary_report.smk
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
rule summary_report:
input:
expand("{output}/MOSCA_{sample}_Protein_Report.tsv", output=OUTPUT, sample=set(EXPS['Sample'])),
expand("{output}/MOSCA_{sample}_General_Report.tsv", output=OUTPUT, sample=set(EXPS['Sample'])),
f"{OUTPUT}/MOSCA_Entry_Report.xlsx",
f"{OUTPUT}/DE_analysis/condition_treated_results.tsv"
f"{OUTPUT}/DE_analysis/condition_treated_results.tsv",
(expand("{output}/Binning/{sample}/checkm.tsv", output=OUTPUT, sample=set(EXPS['Sample']))
if config['do_binning'] else [])
output:
f"{OUTPUT}/MOSCA_Versions_Report.xlsx",
f"{OUTPUT}/MOSCA_General_Report.tsv",
f"{OUTPUT}/MOSCA_Summary_Report.tsv",
f"{OUTPUT}/MOSCA_results.zip"
threads:
1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
'General functional category', 'Functional category', 'Protein description', 'COG ID', 'EC number (reCOGnizer)']


def make_protein_report(out, exps, sample, mg_preport, mt_preport, mp_preport, de_input):
def make_general_report(out, exps, sample, mg_preport, mt_preport, mp_preport, de_input):
timed_message(f'Joining data for sample: {sample}.')
with open(f'{out}/Annotation/{sample}/fgs.faa') as f:
lines = f.readlines()
Expand Down Expand Up @@ -58,15 +58,15 @@ def make_protein_report(out, exps, sample, mg_preport, mt_preport, mp_preport, d
mp_preport = pd.merge(mp_preport, report[['Entry'] + mp_names], on='Entry', how='outer')
report[mg_names + mt_names + mp_names] = report[mg_names + mt_names + mp_names].fillna(
value=0).astype(float).astype(int)
report.to_csv(f'{out}/MOSCA_{sample}_Protein_Report.tsv', sep='\t', index=False)
report.to_csv(f'{out}/MOSCA_{sample}_General_Report.tsv', sep='\t', index=False)
return report, mg_preport, mt_preport, mp_preport, de_input


def make_protein_reports(out, exps, max_lines=1000000):
def make_general_reports(out, exps, max_lines=1000000):
mg_report = mt_report = mp_report = de_input = pd.DataFrame(columns=['Entry'])
writer = pd.ExcelWriter(f'{out}/MOSCA_Protein_Report.xlsx', engine='xlsxwriter')
writer = pd.ExcelWriter(f'{out}/MOSCA_General_Report.xlsx', engine='xlsxwriter')
for sample in set(exps['Sample']):
report, mg_report, mt_report, mp_report, de_input = make_protein_report(
report, mg_report, mt_report, mp_report, de_input = make_general_report(
out, exps, sample, mg_report, mt_report, mp_report, de_input)
timed_message(f'Writing Protein Report for sample: {sample}.')
if len(report) < max_lines:
Expand Down Expand Up @@ -103,7 +103,7 @@ def make_protein_reports(out, exps, max_lines=1000000):

def run():
exps = pd.read_csv(snakemake.params.exps, sep='\t')
make_protein_reports(snakemake.params.output, exps)
make_general_reports(snakemake.params.output, exps)


if __name__ == '__main__':
Expand Down
6 changes: 3 additions & 3 deletions workflow/scripts/summary_report.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
General report construction and export
Summary report construction and export
By João Sequeira
Expand Down Expand Up @@ -139,7 +139,7 @@ def zip_outputs(self, out_dir):
'de_plots': glob(f'{out_dir}/DE_analysis/*.jpeg'),
'kegg_maps': glob(f'{out_dir}/KEGG_maps/*.png'),
'main_reports': [f'{out_dir}/{filename}' for filename in [
'MOSCA_Protein_Report.xlsx', 'MOSCA_Entry_Report.xlsx', 'MOSCA_General_Report.tsv']]}
'MOSCA_Protein_Report.xlsx', 'MOSCA_Entry_Report.xlsx', 'MOSCA_Summary_Report.tsv']]}
with ZipFile(f'{out_dir}/MOSCA_results.zip', 'w') as archive:
for k, v in files_n_folders.items():
for file in v:
Expand All @@ -157,7 +157,7 @@ def run(self):
exps = pd.read_csv(f'{snakemake.params.output}/exps.tsv', sep='\t')
self.info_from_differential_expression(
snakemake.params.output, cutoff=snakemake.params.cutoff, mp='protein' in exps['Data type'].tolist())
self.report.to_csv(f'{snakemake.params.output}/MOSCA_General_Report.tsv', sep='\t')
self.report.to_csv(f'{snakemake.params.output}/MOSCA_Summary_Report.tsv', sep='\t')
self.zip_outputs(snakemake.params.output)


Expand Down

0 comments on commit 11b1967

Please sign in to comment.