diff --git a/bin/get_lineage.sh b/bin/get_lineage.sh index 24ea2e4..09523ba 100755 --- a/bin/get_lineage.sh +++ b/bin/get_lineage.sh @@ -1,13 +1,16 @@ # Run PopPUNK to assign GPSCs to samples -# Add "prefix_" to all sample names in qfile to avoid poppunk_assign crashing due to sample name already exists in database -# Remove "prefix_" from all sample names in the result +# Assign sequential names (gps_pipeline_poppunk_query_) to all sample names in qfile to avoid poppunk_assign crashing due to sample name already exists in database +# Reassign original names back to all sample in the result # Save results of individual sample into .csv with its name as filename -sed 's/^/prefix_/' "$QFILE" > safe_qfile.txt +awk -F '\t' '{ print "gps_pipeline_poppunk_query_" FNR "\t" $2 }' "$QFILE" > safe_qfile.txt + poppunk_assign --db "${POPPUNK_DIR}/${DB_NAME}" --external-clustering "${EXT_CLUSTERS_DIR}/${EXT_CLUSTERS_FILE}" --query safe_qfile.txt --output output --threads "$(nproc)" -sed 's/^prefix_//' output/output_external_clusters.csv > result.txt +tail -n +2 output/output_external_clusters.csv | sort -V > result.txt + +paste <(cut -f 1 "$QFILE") <(cut -f 2 -d ',' result.txt) > renamed_result.txt -awk -F , 'NR!=1 { print "\"GPSC\"\n" "\"" $2 "\"" > $1 ".csv" }' result.txt +awk -F '\t' '{ print "\"GPSC\"\n" "\"" $2 "\"" > $1 ".csv" }' renamed_result.txt diff --git a/main.nf b/main.nf index 9acd77a..a8d6a92 100644 --- a/main.nf +++ b/main.nf @@ -1,7 +1,7 @@ #!/usr/bin/env nextflow // Version of this release -pipelineVersion = '1.0.0-rc6' +pipelineVersion = '1.0.0-rc7' // Import workflow modules include { PIPELINE } from "$projectDir/workflows/pipeline"