Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update ARIBA Module AMR Detection Mechanism #67

Merged
merged 4 commits into from
Aug 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,8 @@ The pipeline is compatible with [Launchpad](https://help.tower.nf/23.2/launch/la
## Other AMR
| Option | Values | Description |
| --- | ---| --- |
| `--ariba_ref` | Any valid path to a `.fa` or `.fasta` file<br />(Default: `"$projectDir/data/ariba_ref_sequences-20230712.fasta"`) | Path to the reference sequences for ARIBA. |
| `--ariba_metadata` | Any valid path to a `tsv` file<br />(Default: `"$projectDir/data/ariba_metadata-20230712.tsv"`) | Path to the metadata file for ARIBA. |
| `--ariba_ref` | Any valid path to a `.fa` or `.fasta` file<br />(Default: `"$projectDir/data/ariba_ref_sequences.fasta"`) | Path to the reference sequences for ARIBA. |
| `--ariba_metadata` | Any valid path to a `tsv` file<br />(Default: `"$projectDir/data/ariba_metadata.tsv"`) | Path to the metadata file for ARIBA. |

## Singularity
> ℹ️ This section is only valid when Singularity is used as the container engine
Expand Down Expand Up @@ -444,8 +444,8 @@ This project uses open-source components. You can find the homepage or source co
[resistanceDatabase](https://github.com/kumarnaren/resistanceDatabase)
- Narender Kumar ([@kumarnaren](https://github.com/kumarnaren))
- License (GPL-3.0): https://github.com/kumarnaren/resistanceDatabase/blob/main/LICENSE
- `sequences.fasta` is renamed to `ariba_ref_sequences-*.fasta` and used as-is
- `metadata.tsv` is renamed to `ariba_metadata-*.tsv` and modified
- `sequences.fasta` is renamed to `ariba_ref_sequences.fasta` and used as-is
- `metadata.tsv` is renamed to `ariba_metadata.tsv` and modified
- The files are used as the default inputs of `GET_ARIBA_DB` process of the `amr.nf` module

[Shovill](https://github.com/tseemann/shovill)
Expand Down
39 changes: 18 additions & 21 deletions bin/parse_other_resistance.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,18 @@
# Output AMR of a sample based on its ARIBA report and ARIBA metadata

import sys
from itertools import chain
from collections import defaultdict
import pandas as pd
import csv


# Check argv and save to global variables
if len(sys.argv) != 5:
sys.exit('Usage: get_other_resistance.py REPORT_PATH DEBUG_REPORT_PATH METADATA_PATH OUTPUT_FILE')
if len(sys.argv) != 4:
sys.exit('Usage: get_other_resistance.py DEBUG_REPORT_PATH METADATA_PATH OUTPUT_FILE')

REPORT_PATH = sys.argv[1]
DEBUG_REPORT_PATH = sys.argv[2]
METADATA_PATH = sys.argv[3]
OUTPUT_FILE = sys.argv[4]
DEBUG_REPORT_PATH = sys.argv[1]
METADATA_PATH = sys.argv[2]
OUTPUT_FILE = sys.argv[3]


def main():
Expand Down Expand Up @@ -57,35 +55,34 @@ def prepare_dicts():

# Finding hits in ARIBA results based on targets_dict and save hits to hits_dict
def find_hits(targets_dict, hits_dict):
with open(REPORT_PATH) as report, open(DEBUG_REPORT_PATH) as debug_report:
# Skip the header in report and debug report
next(report)
with open(DEBUG_REPORT_PATH) as debug_report:
# Skip the header in debug report
next(debug_report)

# Go through lines in both report and debug report to detect targets
for line in (line.strip() for line in chain(report, debug_report)):
# Go through lines in the debug report to detect targets
for line in (line.strip() for line in debug_report):
# Extract useful fields
fields = [str(field) for field in line.split("\t")]
ref_name, gene, var_only, ref_len, ref_base_assembled, known_var_change, has_known_var, ref_ctg_effect, ref_start, ref_end = fields[1], fields[2], fields[3], fields[7], fields[8], fields[16], fields[17], fields[19], fields[20], fields[21]

# If coverage (ref_base_assembled / ref_len) < 0.9 or either variable contains non-numeric value, skip the line
if not ref_base_assembled.isdigit() or not ref_len.isdigit() or int(ref_base_assembled)/int(ref_len) < 0.9:
continue

# If the known_var_change (. for genes, specific change for variants) is not found in the metadata of the (ref_name, gene, var_only) combination, skip the line
try:
target = targets_dict[(ref_name, gene, var_only)][known_var_change]
except KeyError:
continue

# Logic for gene detection. Found means hit.
if var_only == "0":
# If ref_base_assembled or ref_len variable contains non-numeric value, skip the line
if not ref_base_assembled.isdigit() or not ref_len.isdigit():
continue

# Logic for gene detection, check coverage.
if var_only == "0" and int(ref_base_assembled)/int(ref_len) >= 0.8:
hits_dict[target].add(f'{ref_name}')
# Logic for variant detection, further criteria required

# Logic for variant detection, coverage check is not needed, but check for other criteria
if var_only == "1":
# folP-specific criteria: ref_ctg_effect (effect of change between reference and contig) is one of the keywords and the change occurs within nt 168-201
if ref_name.lower().startswith("folp") and ref_ctg_effect.lower() in ('fshift', 'trunc', 'indel', 'ins', 'multiple') and (168 <= int(ref_start) <= 201 or 168 <= int(ref_end) <= 201):
if ref_name.lower().startswith("folp") and ref_ctg_effect.lower() in ('fshift', 'trunc', 'indel', 'indels', 'ins', 'multiple') and (168 <= int(ref_start) <= 201 or 168 <= int(ref_end) <= 201):
pos = ref_start if ref_start == ref_end else f'{ref_start}-{ref_end}'
hits_dict[target].add(f'{ref_name} {ref_ctg_effect} at {pos}')
# Common criteria: the assembly has that variant
Expand Down
4 changes: 2 additions & 2 deletions data/ariba_metadata-20230712.tsv → data/ariba_metadata.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ parE_AE007317 1 1 D435N . FQ
parE_AE007317 1 1 D435H . FQ
parE_AE007317 1 1 P454S . FQ
tetO_Y07780 1 0 . . TET
ermBups_HG799494 0 0 . . ERY
ermbTr_CP002121 0 0 . . ERY
ermBups_HG799494 0 0 . . ERY_CLI
ermbTr_CP002121 0 0 . . ERY_CLI
rpoB_AE007317 1 1 D489E . RIF
rpoB_AE007317 1 1 H499N . RIF
rpoB_AE007317 1 1 D489N . RIF
Expand Down
File renamed without changes.
9 changes: 4 additions & 5 deletions modules/amr.nf
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,12 @@ process OTHER_RESISTANCE {
tuple val(sample_id), path(read1), path(read2), path(unpaired)

output:
tuple val(sample_id), path(report), path(report_debug), emit: reports
tuple val(sample_id), path(report_debug), emit: report

script:
report='result/report.tsv'
report_debug='result/debug.report.tsv'
"""
ariba run --nucmer_min_id 80 --assembled_threshold 0.80 "$ariba_database/$database" "$read1" "$read2" result
ariba run --nucmer_min_id 80 "$ariba_database/$database" "$read1" "$read2" result
"""
}

Expand All @@ -103,7 +102,7 @@ process PARSE_OTHER_RESISTANCE {
tag "$sample_id"

input:
tuple val(sample_id), path(report), path(report_debug)
tuple val(sample_id), path(report_debug)
path metadata

output:
Expand All @@ -112,6 +111,6 @@ process PARSE_OTHER_RESISTANCE {
script:
output_file="other_amr_report.csv"
"""
parse_other_resistance.py "$report" "$report_debug" "$metadata" "$output_file"
parse_other_resistance.py "$report_debug" "$metadata" "$output_file"
"""
}
4 changes: 2 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ params {
depth = 20.00

// Default ARIBA referece sequences and metadata paths
ariba_ref = "$projectDir/data/ariba_ref_sequences-20230815.fasta"
ariba_metadata = "$projectDir/data/ariba_metadata-20230712.tsv"
ariba_ref = "$projectDir/data/ariba_ref_sequences.fasta"
ariba_metadata = "$projectDir/data/ariba_metadata.tsv"

// Toggle for removing .bam and .sam files mid-run to reduce storage requirement
// Warning: This will break the -resume function of Nextflow
Expand Down
4 changes: 2 additions & 2 deletions workflows/pipeline.nf
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,9 @@ workflow PIPELINE {
PARSE_PBP_RESISTANCE(PBP_RESISTANCE.out.json)

// From Channel OVERALL_QC_PASSED_ASSEMBLIES_ch, infer resistance and determinants of other antimicrobials
// Output into Channel PARSE_OTHER_RESISTANCE.out.result
// Output into Channel PARSE_OTHER_RESISTANCE.out.report
OTHER_RESISTANCE(GET_ARIBA_DB.out.path, GET_ARIBA_DB.out.database, OVERALL_QC_PASSED_READS_ch)
PARSE_OTHER_RESISTANCE(OTHER_RESISTANCE.out.reports, params.ariba_metadata)
PARSE_OTHER_RESISTANCE(OTHER_RESISTANCE.out.report, params.ariba_metadata)

// Generate sample reports by merging outputs from all result-generating modules
GENERATE_SAMPLE_REPORT(
Expand Down