Skip to content

Commit

Permalink
Merge pull request #67 from HarryHung/feature/ariba-amr-update
Browse files Browse the repository at this point in the history
Update ARIBA Module AMR Detection Mechanism 

Former-commit-id: b04cc1f
  • Loading branch information
HarryHung authored Aug 23, 2023
2 parents c076b3d + 5daaeea commit 6981f2b
Show file tree
Hide file tree
Showing 7 changed files with 32 additions and 36 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,8 @@ The pipeline is compatible with [Launchpad](https://help.tower.nf/23.2/launch/la
## Other AMR
| Option | Values | Description |
| --- | ---| --- |
| `--ariba_ref` | Any valid path to a `.fa` or `.fasta` file<br />(Default: `"$projectDir/data/ariba_ref_sequences-20230712.fasta"`) | Path to the reference sequences for ARIBA. |
| `--ariba_metadata` | Any valid path to a `tsv` file<br />(Default: `"$projectDir/data/ariba_metadata-20230712.tsv"`) | Path to the metadata file for ARIBA. |
| `--ariba_ref` | Any valid path to a `.fa` or `.fasta` file<br />(Default: `"$projectDir/data/ariba_ref_sequences.fasta"`) | Path to the reference sequences for ARIBA. |
| `--ariba_metadata` | Any valid path to a `tsv` file<br />(Default: `"$projectDir/data/ariba_metadata.tsv"`) | Path to the metadata file for ARIBA. |

## Singularity
> ℹ️ This section is only valid when Singularity is used as the container engine
Expand Down Expand Up @@ -444,8 +444,8 @@ This project uses open-source components. You can find the homepage or source co
[resistanceDatabase](https://github.com/kumarnaren/resistanceDatabase)
- Narender Kumar ([@kumarnaren](https://github.com/kumarnaren))
- License (GPL-3.0): https://github.com/kumarnaren/resistanceDatabase/blob/main/LICENSE
- `sequences.fasta` is renamed to `ariba_ref_sequences-*.fasta` and used as-is
- `metadata.tsv` is renamed to `ariba_metadata-*.tsv` and modified
- `sequences.fasta` is renamed to `ariba_ref_sequences.fasta` and used as-is
- `metadata.tsv` is renamed to `ariba_metadata.tsv` and modified
- The files are used as the default inputs of `GET_ARIBA_DB` process of the `amr.nf` module

[Shovill](https://github.com/tseemann/shovill)
Expand Down
39 changes: 18 additions & 21 deletions bin/parse_other_resistance.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,18 @@
# Output AMR of a sample based on its ARIBA report and ARIBA metadata

import sys
from itertools import chain
from collections import defaultdict
import pandas as pd
import csv


# Check argv and save to global variables
if len(sys.argv) != 5:
sys.exit('Usage: get_other_resistance.py REPORT_PATH DEBUG_REPORT_PATH METADATA_PATH OUTPUT_FILE')
if len(sys.argv) != 4:
sys.exit('Usage: get_other_resistance.py DEBUG_REPORT_PATH METADATA_PATH OUTPUT_FILE')

REPORT_PATH = sys.argv[1]
DEBUG_REPORT_PATH = sys.argv[2]
METADATA_PATH = sys.argv[3]
OUTPUT_FILE = sys.argv[4]
DEBUG_REPORT_PATH = sys.argv[1]
METADATA_PATH = sys.argv[2]
OUTPUT_FILE = sys.argv[3]


def main():
Expand Down Expand Up @@ -57,35 +55,34 @@ def prepare_dicts():

# Finding hits in ARIBA results based on targets_dict and save hits to hits_dict
def find_hits(targets_dict, hits_dict):
with open(REPORT_PATH) as report, open(DEBUG_REPORT_PATH) as debug_report:
# Skip the header in report and debug report
next(report)
with open(DEBUG_REPORT_PATH) as debug_report:
# Skip the header in debug report
next(debug_report)

# Go through lines in both report and debug report to detect targets
for line in (line.strip() for line in chain(report, debug_report)):
# Go through lines in the debug report to detect targets
for line in (line.strip() for line in debug_report):
# Extract useful fields
fields = [str(field) for field in line.split("\t")]
ref_name, gene, var_only, ref_len, ref_base_assembled, known_var_change, has_known_var, ref_ctg_effect, ref_start, ref_end = fields[1], fields[2], fields[3], fields[7], fields[8], fields[16], fields[17], fields[19], fields[20], fields[21]

# If coverage (ref_base_assembled / ref_len) < 0.9 or either variable contains non-numeric value, skip the line
if not ref_base_assembled.isdigit() or not ref_len.isdigit() or int(ref_base_assembled)/int(ref_len) < 0.9:
continue

# If the known_var_change (. for genes, specific change for variants) is not found in the metadata of the (ref_name, gene, var_only) combination, skip the line
try:
target = targets_dict[(ref_name, gene, var_only)][known_var_change]
except KeyError:
continue

# Logic for gene detection. Found means hit.
if var_only == "0":
# If ref_base_assembled or ref_len variable contains non-numeric value, skip the line
if not ref_base_assembled.isdigit() or not ref_len.isdigit():
continue

# Logic for gene detection, check coverage.
if var_only == "0" and int(ref_base_assembled)/int(ref_len) >= 0.8:
hits_dict[target].add(f'{ref_name}')
# Logic for variant detection, further criteria required

# Logic for variant detection, coverage check is not needed, but check for other criteria
if var_only == "1":
# folP-specific criteria: ref_ctg_effect (effect of change between reference and contig) is one of the keywords and the change occurs within nt 168-201
if ref_name.lower().startswith("folp") and ref_ctg_effect.lower() in ('fshift', 'trunc', 'indel', 'ins', 'multiple') and (168 <= int(ref_start) <= 201 or 168 <= int(ref_end) <= 201):
if ref_name.lower().startswith("folp") and ref_ctg_effect.lower() in ('fshift', 'trunc', 'indel', 'indels', 'ins', 'multiple') and (168 <= int(ref_start) <= 201 or 168 <= int(ref_end) <= 201):
pos = ref_start if ref_start == ref_end else f'{ref_start}-{ref_end}'
hits_dict[target].add(f'{ref_name} {ref_ctg_effect} at {pos}')
# Common criteria: the assembly has that variant
Expand Down
4 changes: 2 additions & 2 deletions data/ariba_metadata-20230712.tsv → data/ariba_metadata.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ parE_AE007317 1 1 D435N . FQ
parE_AE007317 1 1 D435H . FQ
parE_AE007317 1 1 P454S . FQ
tetO_Y07780 1 0 . . TET
ermBups_HG799494 0 0 . . ERY
ermbTr_CP002121 0 0 . . ERY
ermBups_HG799494 0 0 . . ERY_CLI
ermbTr_CP002121 0 0 . . ERY_CLI
rpoB_AE007317 1 1 D489E . RIF
rpoB_AE007317 1 1 H499N . RIF
rpoB_AE007317 1 1 D489N . RIF
Expand Down
File renamed without changes.
9 changes: 4 additions & 5 deletions modules/amr.nf
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,12 @@ process OTHER_RESISTANCE {
tuple val(sample_id), path(read1), path(read2), path(unpaired)

output:
tuple val(sample_id), path(report), path(report_debug), emit: reports
tuple val(sample_id), path(report_debug), emit: report

script:
report='result/report.tsv'
report_debug='result/debug.report.tsv'
"""
ariba run --nucmer_min_id 80 --assembled_threshold 0.80 "$ariba_database/$database" "$read1" "$read2" result
ariba run --nucmer_min_id 80 "$ariba_database/$database" "$read1" "$read2" result
"""
}

Expand All @@ -103,7 +102,7 @@ process PARSE_OTHER_RESISTANCE {
tag "$sample_id"

input:
tuple val(sample_id), path(report), path(report_debug)
tuple val(sample_id), path(report_debug)
path metadata

output:
Expand All @@ -112,6 +111,6 @@ process PARSE_OTHER_RESISTANCE {
script:
output_file="other_amr_report.csv"
"""
parse_other_resistance.py "$report" "$report_debug" "$metadata" "$output_file"
parse_other_resistance.py "$report_debug" "$metadata" "$output_file"
"""
}
4 changes: 2 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ params {
depth = 20.00

// Default ARIBA referece sequences and metadata paths
ariba_ref = "$projectDir/data/ariba_ref_sequences-20230815.fasta"
ariba_metadata = "$projectDir/data/ariba_metadata-20230712.tsv"
ariba_ref = "$projectDir/data/ariba_ref_sequences.fasta"
ariba_metadata = "$projectDir/data/ariba_metadata.tsv"

// Toggle for removing .bam and .sam files mid-run to reduce storage requirement
// Warning: This will break the -resume function of Nextflow
Expand Down
4 changes: 2 additions & 2 deletions workflows/pipeline.nf
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,9 @@ workflow PIPELINE {
PARSE_PBP_RESISTANCE(PBP_RESISTANCE.out.json)

// From Channel OVERALL_QC_PASSED_ASSEMBLIES_ch, infer resistance and determinants of other antimicrobials
// Output into Channel PARSE_OTHER_RESISTANCE.out.result
// Output into Channel PARSE_OTHER_RESISTANCE.out.report
OTHER_RESISTANCE(GET_ARIBA_DB.out.path, GET_ARIBA_DB.out.database, OVERALL_QC_PASSED_READS_ch)
PARSE_OTHER_RESISTANCE(OTHER_RESISTANCE.out.reports, params.ariba_metadata)
PARSE_OTHER_RESISTANCE(OTHER_RESISTANCE.out.report, params.ariba_metadata)

// Generate sample reports by merging outputs from all result-generating modules
GENERATE_SAMPLE_REPORT(
Expand Down

0 comments on commit 6981f2b

Please sign in to comment.