From bb5d9c2a032a361734d56b51ee5b7edcc3f911b1 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Wed, 28 Aug 2024 15:30:25 -0400 Subject: [PATCH 01/16] adding 10x wrapper function --- pipelines/skylab/atac/atac.wdl | 36 ++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 45f6a7175d..9ed8ce5324 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -58,7 +58,7 @@ workflow ATAC { String cutadapt_docker = "cutadapt:1.0.0-4.4-1686752919" String samtools_docker = "samtools-dist-bwa:3.0.0" String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311" - String snap_atac_docker = "snapatac2:1.0.9-2.6.3-1715865353" + String snap_atac_docker = "snapatac2:lk-PD-2738" # Make sure either 'gcp' or 'azure' is supplied as cloud_provider input. If not, raise an error if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { @@ -158,11 +158,13 @@ workflow ATAC { File bam_aligned_output_atac = select_first([BBTag.bb_bam, BWAPairedEndAlignment.bam_aligned_output]) File fragment_file_atac = select_first([BB_fragment.fragment_file, CreateFragmentFile.fragment_file]) File snap_metrics_atac = select_first([BB_fragment.Snap_metrics,CreateFragmentFile.Snap_metrics]) + File library_metrics = select_first([BB_fragment.atac_library_metrics, CreateFragmentFile.atac_library_metrics]) output { File bam_aligned_output = bam_aligned_output_atac File fragment_file = fragment_file_atac File snap_metrics = snap_metrics_atac + File library_metrics_file = library_metrics } } @@ -547,13 +549,38 @@ task CreateFragmentFile { import snapatac2.preprocessing as pp import snapatac2 as snap import anndata as ad + from collections import OrderedDict + import csv # extract CB or BB (if preindex is true) tag from bam file to create fragment file if preindex == "true": - pp.make_fragment_file("~{bam}", "~{bam_base_name}.fragments.tsv", is_paired=True, barcode_tag="BB") + data = pp.recipe_10x_metrics("~{bam}", "~{bam_base_name}.fragments.tsv", is_paired=True, barcode_tag="BB", chrom_sizes=chrom_size_dict, gene_anno=atac_gtf, peaks=None) elif preindex == "false": - pp.make_fragment_file("~{bam}", "~{bam_base_name}.fragments.tsv", is_paired=True, barcode_tag="CB") - + data = pp.recipe_10x_metrics("~{bam}", "~{bam_base_name}.fragments.tsv", is_paired=True, barcode_tag="CB", chrom_sizes=chrom_size_dict, gene_anno=atac_gtf, peaks=None) + + # Add NHashID to metrics + nhash_ID_value = "XXX" + data = OrderedDict({'NHash_ID': atac_nhash_id, **data}) + # Flatten the dictionary + flattened_data = [] + for category, metrics in data.items(): + if isinstance(metrics, dict): + for metric, value in metrics.items(): + flattened_data.append((metric, value)) + else: + flattened_data.append((category, metrics)) + + # Write to CSV + csv_file_path = "~{bam_base_name}_~{atac_nhash}.atac_metrics.csv" + with open(csv_file_path, mode='w', newline='') as file: + writer = csv.writer(file) + writer.writerow(['Metric', 'Value']) # Write header + writer.writerows(flattened_data) # Write data + + print(f"Dictionary successfully written to {csv_file_path}") + + + # calculate quality metrics; note min_num_fragments and min_tsse are set to 0 instead of default # those settings allow us to retain all barcodes @@ -580,5 +607,6 @@ task CreateFragmentFile { output { File fragment_file = "~{bam_base_name}.fragments.tsv" File Snap_metrics = "~{bam_base_name}.metrics.h5ad" + File atac_library_metrics = "~{bam_base_name}_~{atac_nhash}.atac_metrics.csv" } } From 6340acad47835658f264af02fe62c36067eb618d Mon Sep 17 00:00:00 2001 From: ekiernan Date: Wed, 28 Aug 2024 15:34:04 -0400 Subject: [PATCH 02/16] fixing nhash_id variable --- pipelines/skylab/atac/atac.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 9ed8ce5324..67166fbcf1 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -571,7 +571,7 @@ task CreateFragmentFile { flattened_data.append((category, metrics)) # Write to CSV - csv_file_path = "~{bam_base_name}_~{atac_nhash}.atac_metrics.csv" + csv_file_path = "~{bam_base_name}_~{atac_nhash_id}.atac_metrics.csv" with open(csv_file_path, mode='w', newline='') as file: writer = csv.writer(file) writer.writerow(['Metric', 'Value']) # Write header @@ -607,6 +607,6 @@ task CreateFragmentFile { output { File fragment_file = "~{bam_base_name}.fragments.tsv" File Snap_metrics = "~{bam_base_name}.metrics.h5ad" - File atac_library_metrics = "~{bam_base_name}_~{atac_nhash}.atac_metrics.csv" + File atac_library_metrics = "~{bam_base_name}_~{atac_nhash_id}.atac_metrics.csv" } } From ae011ccd51b4f83545c038a15fd6fc2d6fc9dcb0 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Thu, 29 Aug 2024 08:27:37 -0400 Subject: [PATCH 03/16] added h5ad output --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 67166fbcf1..c39af5fb25 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -554,7 +554,7 @@ task CreateFragmentFile { # extract CB or BB (if preindex is true) tag from bam file to create fragment file if preindex == "true": - data = pp.recipe_10x_metrics("~{bam}", "~{bam_base_name}.fragments.tsv", is_paired=True, barcode_tag="BB", chrom_sizes=chrom_size_dict, gene_anno=atac_gtf, peaks=None) + data = pp.recipe_10x_metrics("~{bam}", "~{bam_base_name}.fragments.tsv", "temp_metrics.h5ad", is_paired=True, barcode_tag="BB", chrom_sizes=chrom_size_dict, gene_anno=atac_gtf, peaks=None) elif preindex == "false": data = pp.recipe_10x_metrics("~{bam}", "~{bam_base_name}.fragments.tsv", is_paired=True, barcode_tag="CB", chrom_sizes=chrom_size_dict, gene_anno=atac_gtf, peaks=None) From 9e91e40cb0d4e8e7a789edc4ba8814022b455e39 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Thu, 29 Aug 2024 09:24:19 -0400 Subject: [PATCH 04/16] adding h5ad argument to second command --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index c39af5fb25..e1abc0aacb 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -556,7 +556,7 @@ task CreateFragmentFile { if preindex == "true": data = pp.recipe_10x_metrics("~{bam}", "~{bam_base_name}.fragments.tsv", "temp_metrics.h5ad", is_paired=True, barcode_tag="BB", chrom_sizes=chrom_size_dict, gene_anno=atac_gtf, peaks=None) elif preindex == "false": - data = pp.recipe_10x_metrics("~{bam}", "~{bam_base_name}.fragments.tsv", is_paired=True, barcode_tag="CB", chrom_sizes=chrom_size_dict, gene_anno=atac_gtf, peaks=None) + data = pp.recipe_10x_metrics("~{bam}", "~{bam_base_name}.fragments.tsv", "temp_metrics.h5ad", is_paired=True, barcode_tag="CB", chrom_sizes=chrom_size_dict, gene_anno=atac_gtf, peaks=None) # Add NHashID to metrics nhash_ID_value = "XXX" From f553620e650aeac4d791de445ade178212b099de Mon Sep 17 00:00:00 2001 From: ekiernan Date: Thu, 29 Aug 2024 09:24:53 -0400 Subject: [PATCH 05/16] removing temp metrics --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index e1abc0aacb..a07f4c2a70 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -584,7 +584,7 @@ task CreateFragmentFile { # calculate quality metrics; note min_num_fragments and min_tsse are set to 0 instead of default # those settings allow us to retain all barcodes - pp.import_data("~{bam_base_name}.fragments.tsv", file="temp_metrics.h5ad", chrom_sizes=chrom_size_dict, min_num_fragments=0) + atac_data = ad.read_h5ad("temp_metrics.h5ad") # Add nhash_id to h5ad file as unstructured metadata atac_data.uns['NHashID'] = atac_nhash_id From 9f62353ab347735f489109502742e6ce90eef3e2 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Thu, 29 Aug 2024 09:41:36 -0400 Subject: [PATCH 06/16] Update atac.wdl --- pipelines/skylab/atac/atac.wdl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index a07f4c2a70..b5d2b3f022 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -579,12 +579,6 @@ task CreateFragmentFile { print(f"Dictionary successfully written to {csv_file_path}") - - - - # calculate quality metrics; note min_num_fragments and min_tsse are set to 0 instead of default - # those settings allow us to retain all barcodes - atac_data = ad.read_h5ad("temp_metrics.h5ad") # Add nhash_id to h5ad file as unstructured metadata atac_data.uns['NHashID'] = atac_nhash_id From 2e597da2d3a2c41ab326d87e06ba05a7d012a61c Mon Sep 17 00:00:00 2001 From: ekiernan Date: Thu, 29 Aug 2024 09:48:45 -0400 Subject: [PATCH 07/16] removing header in ATAC and starting documentation for metrics --- pipelines/skylab/atac/atac.wdl | 1 - website/docs/Pipelines/ATAC/library-metrics.md | 12 ++++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 website/docs/Pipelines/ATAC/library-metrics.md diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index b5d2b3f022..5fcb3ffa7d 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -574,7 +574,6 @@ task CreateFragmentFile { csv_file_path = "~{bam_base_name}_~{atac_nhash_id}.atac_metrics.csv" with open(csv_file_path, mode='w', newline='') as file: writer = csv.writer(file) - writer.writerow(['Metric', 'Value']) # Write header writer.writerows(flattened_data) # Write data print(f"Dictionary successfully written to {csv_file_path}") diff --git a/website/docs/Pipelines/ATAC/library-metrics.md b/website/docs/Pipelines/ATAC/library-metrics.md new file mode 100644 index 0000000000..a16db5b9d2 --- /dev/null +++ b/website/docs/Pipelines/ATAC/library-metrics.md @@ -0,0 +1,12 @@ +--- +sidebar_position: 2 +--- + +# ATAC Library Metrics Overview + +The [ATAC pipeline](README.md) uses [SnapATAC2](https://github.com/kaizhang/SnapATAC2) to generate library-level metrics in CSV format. + + +| Metric | Description | +| --- | --- | + From 1941d99c4ce927e11c626e4b824ff32a90f04e8b Mon Sep 17 00:00:00 2001 From: ekiernan Date: Thu, 29 Aug 2024 09:53:35 -0400 Subject: [PATCH 08/16] added metric definitions to the library overview --- .../docs/Pipelines/ATAC/library-metrics.md | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/website/docs/Pipelines/ATAC/library-metrics.md b/website/docs/Pipelines/ATAC/library-metrics.md index a16db5b9d2..184cfeb8eb 100644 --- a/website/docs/Pipelines/ATAC/library-metrics.md +++ b/website/docs/Pipelines/ATAC/library-metrics.md @@ -8,5 +8,28 @@ The [ATAC pipeline](README.md) uses [SnapATAC2](https://github.com/kaizhang/Snap | Metric | Description | -| --- | --- | +| --- | --- | +| NHash_ID | A unique identifier used to track and reference the specific sample or dataset. | +| Sequenced_reads | The total number of reads generated from the sequencing process, which includes both reads that are mapped and unmapped. | +| Sequenced_read_pairs | The total number of read pairs (two reads per pair) generated from the sequencing process. This is typically half of the total sequenced reads if all reads are paired. | +| Fraction_valid_barcode | The fraction of reads that contain a valid barcode, indicating the proportion of reads that are correctly assigned to a specific cell or sample. | +| Fraction_Q30_bases_in_read_1 | The proportion of bases in Read 1 that have a Phred quality score of 30 or higher, indicating high-confidence base calls. | +| Fraction_Q30_bases_in_read_2 | The proportion of bases in Read 2 that have a Phred quality score of 30 or higher, indicating high-confidence base calls. | +| Number_of_cells | The estimated number of cells captured and sequenced in the experiment, based on the barcodes identified. | +| Mean_raw_read_pairs_per_cell | The average number of raw read pairs associated with each cell, providing an indication of the sequencing depth per cell. | +| Median_high-quality_fragments_per_cell | The median number of high-quality (e.g., confidently mapped) fragments associated with each cell, representing typical fragment quality across cells. | +| Fraction of high-quality fragments in cells | The fraction of high-quality fragments that are associated with identified cells, indicating the proportion of good-quality data that is cell-associated. | +| Fraction_of_transposition_events_in_peaks_in_cells | The fraction of transposition events within identified cells that occur within peaks, which are regions of accessible chromatin. | +| Fraction_duplicates | The fraction of sequenced fragments that are duplicates, which can result from PCR amplification or other factors, indicating the redundancy in the sequencing data. | +| Fraction_confidently_mapped | The fraction of sequenced fragments that are confidently mapped to the reference genome, indicating the proportion of reads that align well to the genome. | +| Fraction_unmapped | The fraction of sequenced fragments that could not be mapped to the reference genome, which can indicate sequencing errors, contamination, or regions not covered by the reference. | +| Fraction_nonnuclear | The fraction of sequenced fragments that are mapped to non-nuclear (e.g., mitochondrial or other organellar) DNA, providing insight into contamination or organellar activity. | +| Fraction_fragment_in_nucleosome_free_region | The fraction of sequenced fragments that map to nucleosome-free regions, which are indicative of accessible chromatin. | +| Fraction_fragment_flanking_single_nucleosome | The fraction of sequenced fragments that map to regions flanking single nucleosomes, indicating regions with partial chromatin accessibility. | +| TSS_enrichment_score | A measure of the enrichment of transposition events at transcription start sites (TSS), indicating the accessibility of promoters across the genome. | +| Fraction_of_high-quality_fragments_overlapping_TSS | The fraction of high-quality fragments that overlap transcription start sites (TSS), providing insight into promoter accessibility. | +| Number_of_peaks | The total number of peaks, or regions of accessible chromatin, identified in the dataset, representing potential regulatory elements. | +| Fraction_of_genome_in_peaks | The fraction of the genome that is covered by identified peaks, indicating the extent of chromatin accessibility across the genome. | +| Fraction_of_high-quality_fragments_overlapping_peaks | The fraction of high-quality fragments that overlap with identified peaks, providing an indication of the efficiency of the assay in capturing accessible regions. | + From 0523b89bfef6693bceeb9f0696fbf88052987e77 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Thu, 29 Aug 2024 12:33:18 -0400 Subject: [PATCH 09/16] updating Multiome documentation and outputs with new ATAC metrics --- pipelines/skylab/multiome/Multiome.wdl | 1 + website/docs/Pipelines/ATAC/README.md | 1 + website/docs/Pipelines/Multiome_Pipeline/README.md | 1 + 3 files changed, 3 insertions(+) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 21584c01dd..0ecb33aa27 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -179,6 +179,7 @@ workflow Multiome { File fragment_file_atac = JoinBarcodes.atac_fragment_tsv File fragment_file_index = JoinBarcodes.atac_fragment_tsv_tbi File snap_metrics_atac = JoinBarcodes.atac_h5ad_file + File atac_library_metrics = Atac.library_metrics # optimus outputs File genomic_reference_version_gex = Optimus.genomic_reference_version diff --git a/website/docs/Pipelines/ATAC/README.md b/website/docs/Pipelines/ATAC/README.md index 1bb2dd6399..fc3a985ab4 100644 --- a/website/docs/Pipelines/ATAC/README.md +++ b/website/docs/Pipelines/ATAC/README.md @@ -93,6 +93,7 @@ To see specific tool parameters, select the task WDL link in the table; then vie | bam_aligned_output | ``.bam | BAM containing aligned reads from ATAC workflow. | | fragment_file | ``.fragments.tsv | TSV containing fragment start and stop coordinates per barcode. In order, the columns are "Chromosome", "Start", "Stop", "ATAC Barcode", and "Number Reads". | | snap_metrics | ``_``.atac_metrics.csv | CSV file containing library-level metrics. Read more in the [Library Metrics Overview](library-metrics.md) ## Versioning and testing diff --git a/website/docs/Pipelines/Multiome_Pipeline/README.md b/website/docs/Pipelines/Multiome_Pipeline/README.md index d77c5ec3b0..afb2777668 100644 --- a/website/docs/Pipelines/Multiome_Pipeline/README.md +++ b/website/docs/Pipelines/Multiome_Pipeline/README.md @@ -107,6 +107,7 @@ The Multiome workflow calls two WARP subworkflows, one external subworkflow (opt | fragment_file_atac | `_atac.fragments.sorted.tsv.gz` | Sorted and bgzipped TSV file containing fragment start and stop coordinates per barcode. The columns are "Chromosome", "Start", "Stop", "ATAC Barcode", "Number of reads", and "GEX Barcode". | | fragment_file_index | `_atac.fragments.sorted.tsv.gz.tbi` | tabix index file for the fragment file. | | snap_metrics_atac | `_atac.metrics.h5ad` | h5ad (Anndata) file containing per-barcode metrics from SnapATAC2. Also contains the equivalent gene expression barcode for each ATAC barcode in the `gex_barcodes` column of the `h5ad.obs` property. See the [ATAC Count Matrix Overview](../ATAC/count-matrix-overview.md) for more details. | +| atac_library_metrics | `_.atac.metrics.csv` | CSV with library-level metrics produced by SnapATAC2. See the ATAC [Library Level Metrics Overview](../ATAC/library-metrics.md) for more details. | | genomic_reference_version_gex | `.txt` | File containing the Genome build, source and GTF annotation version. | | bam_gex | `_gex.bam` | BAM file containing aligned reads from Optimus workflow. | | matrix_gex | `_gex_sparse_counts.npz` | NPZ file containing raw gene by cell counts. | From 17b2f94507b9efcf6f1b8b50188888fc8aeaa67c Mon Sep 17 00:00:00 2001 From: ekiernan Date: Thu, 29 Aug 2024 12:46:44 -0400 Subject: [PATCH 10/16] updated changelogs, pipeline ersions, and final outputs --- pipelines/skylab/atac/atac.changelog.md | 5 +++++ pipelines/skylab/atac/atac.wdl | 2 +- pipelines/skylab/multiome/Multiome.changelog.md | 5 +++++ pipelines/skylab/multiome/Multiome.wdl | 4 ++-- pipelines/skylab/paired_tag/PairedTag.changelog.md | 5 +++++ pipelines/skylab/paired_tag/PairedTag.wdl | 4 +++- 6 files changed, 21 insertions(+), 4 deletions(-) diff --git a/pipelines/skylab/atac/atac.changelog.md b/pipelines/skylab/atac/atac.changelog.md index ffe875fa0b..1207b32c1a 100644 --- a/pipelines/skylab/atac/atac.changelog.md +++ b/pipelines/skylab/atac/atac.changelog.md @@ -1,3 +1,8 @@ +# 2.3.0 +2024-08-29 (Date of Last Commit) + +* Updated the SnapATAC2 docker to include v2.7.0; the pipeline will now produce a library-level summary metric CSV for the BAM. + # 2.2.3 2024-08-02 (Date of Last Commit) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 5fcb3ffa7d..bc4d0c11f6 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -46,7 +46,7 @@ workflow ATAC { String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" } - String pipeline_version = "2.2.3" + String pipeline_version = "2.3.0" # Determine docker prefix based on cloud provider String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md index afc52d57f9..98904837e8 100644 --- a/pipelines/skylab/multiome/Multiome.changelog.md +++ b/pipelines/skylab/multiome/Multiome.changelog.md @@ -1,3 +1,8 @@ +# 5.6.0 +2024-08-02 (Date of Last Commit) + +* Updated the SnapATAC2 docker to include v2.7.0; the pipeline will now produce a library-level summary metric CSV for the BAM. + # 5.5.0 2024-08-06 (Date of Last Commit) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 0ecb33aa27..d647e82944 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -9,7 +9,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils workflow Multiome { - String pipeline_version = "5.5.0" + String pipeline_version = "5.6.0" input { @@ -179,7 +179,7 @@ workflow Multiome { File fragment_file_atac = JoinBarcodes.atac_fragment_tsv File fragment_file_index = JoinBarcodes.atac_fragment_tsv_tbi File snap_metrics_atac = JoinBarcodes.atac_h5ad_file - File atac_library_metrics = Atac.library_metrics + File atac_library_metrics = Atac.library_metrics_file # optimus outputs File genomic_reference_version_gex = Optimus.genomic_reference_version diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md index e9da183ec0..ba4a05376c 100644 --- a/pipelines/skylab/paired_tag/PairedTag.changelog.md +++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md @@ -1,3 +1,8 @@ +# 1.6.0 +2024-08-02 (Date of Last Commit) + +* Updated the SnapATAC2 docker to include v2.7.0; the pipeline will now produce a library-level summary metric CSV for the BAM. + # 1.5.0 2024-08-06 (Date of Last Commit) diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl index e35a153def..4206f4fabb 100644 --- a/pipelines/skylab/paired_tag/PairedTag.wdl +++ b/pipelines/skylab/paired_tag/PairedTag.wdl @@ -8,7 +8,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils workflow PairedTag { - String pipeline_version = "1.5.0" + String pipeline_version = "1.6.0" input { @@ -149,6 +149,7 @@ workflow PairedTag { File atac_fragment_out = select_first([ParseBarcodes.atac_fragment_tsv,Atac_preindex.fragment_file]) File atac_h5ad_out = select_first([ParseBarcodes.atac_h5ad_file, Atac_preindex.snap_metrics]) + output { String pairedtag_pipeline_version_out = pipeline_version @@ -157,6 +158,7 @@ workflow PairedTag { File bam_aligned_output_atac = Atac_preindex.bam_aligned_output File fragment_file_atac = atac_fragment_out File snap_metrics_atac = atac_h5ad_out + File atac_library_final = Atac_preindex.library_metrics_file # optimus outputs File genomic_reference_version_gex = Optimus.genomic_reference_version From 45bf059f0effc06e1a66624810411eb4ccc930c4 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Thu, 29 Aug 2024 16:47:21 +0000 Subject: [PATCH 11/16] Updated pipeline_versions.txt with all pipeline version information --- pipeline_versions.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pipeline_versions.txt b/pipeline_versions.txt index f8a8c4a1a9..a01deead2d 100644 --- a/pipeline_versions.txt +++ b/pipeline_versions.txt @@ -1,8 +1,8 @@ Pipeline Name Version Date of Last Commit Optimus 7.6.0 2024-08-06 -Multiome 5.5.0 2024-08-06 -PairedTag 1.5.0 2024-08-06 -atac 2.2.3 2024-08-02 +Multiome 5.6.0 2024-08-02 +PairedTag 1.6.0 2024-08-02 +atac 2.3.0 2024-08-29 SlideSeq 3.4.0 2024-08-06 snm3C 4.0.4 2024-08-06 MultiSampleSmartSeq2SingleNucleus 1.4.2 2024-08-25-02 From 63f5b1988d79b7d76a198252e6bc913a7d69302e Mon Sep 17 00:00:00 2001 From: ekiernan Date: Thu, 29 Aug 2024 12:58:18 -0400 Subject: [PATCH 12/16] Update README.md --- pipelines/skylab/paired_tag/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/paired_tag/README.md b/pipelines/skylab/paired_tag/README.md index b00f015d65..97a801a495 100644 --- a/pipelines/skylab/paired_tag/README.md +++ b/pipelines/skylab/paired_tag/README.md @@ -1,6 +1,6 @@ ## Announcing a new site for WARP documentation! -Paired-tag documentation has moved! Read more about the [Paired-Tag workflow](https://broadinstitute.github.io/warp/docs/Pipelines/PairedTag_Pipeline/README) on the new [WARP documentation site](https://broadinstitute.github.io/warp/)! +Paired-tag documentation has moved! Read more about the [Paired-Tag workflow](https://broadinstitute.github.io/warp/docs/Pipelines/PairedTag_Pipeline/README) on the new [WARP documentation site](https://broadinstitute.github.io/warp/)! ### Paired-Tag summary From 0eb8920bd4dd8d204ad7c5dd85be58dc14ce084c Mon Sep 17 00:00:00 2001 From: ekiernan Date: Thu, 29 Aug 2024 14:06:55 -0400 Subject: [PATCH 13/16] fixed docker version --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index bc4d0c11f6..a1b19acdb9 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -58,7 +58,7 @@ workflow ATAC { String cutadapt_docker = "cutadapt:1.0.0-4.4-1686752919" String samtools_docker = "samtools-dist-bwa:3.0.0" String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311" - String snap_atac_docker = "snapatac2:lk-PD-2738" + String snap_atac_docker = "snapatac2:1.1.0" # Make sure either 'gcp' or 'azure' is supplied as cloud_provider input. If not, raise an error if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { From aad3ddcc8dd8124d352780fc83a4643ec3331b38 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Fri, 6 Sep 2024 08:37:10 -0400 Subject: [PATCH 14/16] updating memory on CreateFragment to 64 GB --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index a1b19acdb9..b207e393fb 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -507,7 +507,7 @@ task CreateFragmentFile { File annotations_gtf Boolean preindex Int disk_size = 500 - Int mem_size = 16 + Int mem_size = 64 Int nthreads = 4 String cpuPlatform = "Intel Cascade Lake" String docker_path From d8aa1b20768d30866169c5b9fdf06cd15508f625 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Fri, 6 Sep 2024 12:37:34 +0000 Subject: [PATCH 15/16] Updated pipeline_versions.txt with all pipeline version information --- pipeline_versions.txt | 46 +++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/pipeline_versions.txt b/pipeline_versions.txt index a01deead2d..4d12055066 100644 --- a/pipeline_versions.txt +++ b/pipeline_versions.txt @@ -1,42 +1,42 @@ Pipeline Name Version Date of Last Commit -Optimus 7.6.0 2024-08-06 -Multiome 5.6.0 2024-08-02 +MultiSampleSmartSeq2SingleNucleus 1.4.2 2024-08-25-02 +MultiSampleSmartSeq2 2.2.21 2023-04-19 PairedTag 1.6.0 2024-08-02 +Optimus 7.6.0 2024-08-06 atac 2.3.0 2024-08-29 -SlideSeq 3.4.0 2024-08-06 snm3C 4.0.4 2024-08-06 -MultiSampleSmartSeq2SingleNucleus 1.4.2 2024-08-25-02 -scATAC 1.3.2 2023-08-03 SmartSeq2SingleSample 5.1.20 2023-04-19 +Multiome 5.6.0 2024-08-02 +scATAC 1.3.2 2023-08-03 BuildIndices 3.0.0 2023-12-06 -MultiSampleSmartSeq2 2.2.21 2023-04-19 -CEMBA 1.1.6 2023-12-18 +SlideSeq 3.4.0 2024-08-06 BuildCembaReferences 1.0.0 2020-11-15 -UltimaGenomicsWholeGenomeCramOnly 1.0.20 2024-08-02 +CEMBA 1.1.6 2023-12-18 GDCWholeGenomeSomaticSingleSample 1.3.2 2024-08-02 -ExomeGermlineSingleSample 3.1.22 2024-06-12 -UltimaGenomicsWholeGenomeGermline 1.0.20 2024-08-02 -WholeGenomeGermlineSingleSample 3.2.1 2024-06-12 -VariantCalling 2.2.1 2024-06-12 +UltimaGenomicsWholeGenomeCramOnly 1.0.20 2024-08-02 +JointGenotypingByChromosomePartOne 1.4.12 2023-12-18 +JointGenotypingByChromosomePartTwo 1.4.11 2023-12-18 UltimaGenomicsJointGenotyping 1.1.7 2023-12-18 JointGenotyping 1.6.10 2023-12-18 ReblockGVCF 2.2.1 2024-06-12 -JointGenotypingByChromosomePartTwo 1.4.11 2023-12-18 -JointGenotypingByChromosomePartOne 1.4.12 2023-12-18 -ExternalExomeReprocessing 3.2.2 2024-08-02 -ExternalWholeGenomeReprocessing 2.2.2 2024-08-02 -ExomeReprocessing 3.2.2 2024-08-02 -CramToUnmappedBams 1.1.3 2024-08-02 -WholeGenomeReprocessing 3.2.2 2024-08-02 -IlluminaGenotypingArray 1.12.21 2024-08-02 -Arrays 2.6.27 2024-08-02 -MultiSampleArrays 1.6.2 2024-08-02 +VariantCalling 2.2.1 2024-06-12 +WholeGenomeGermlineSingleSample 3.2.1 2024-06-12 +UltimaGenomicsWholeGenomeGermline 1.0.20 2024-08-02 +ExomeGermlineSingleSample 3.1.22 2024-06-12 ValidateChip 1.16.5 2024-08-02 +Arrays 2.6.27 2024-08-02 Imputation 1.1.13 2024-05-21 -RNAWithUMIsPipeline 1.0.16 2023-12-18 +MultiSampleArrays 1.6.2 2024-08-02 BroadInternalUltimaGenomics 1.0.21 2024-08-02 BroadInternalArrays 1.1.11 2024-08-02 BroadInternalImputation 1.1.12 2024-08-02 BroadInternalRNAWithUMIs 1.0.33 2024-08-02 +CramToUnmappedBams 1.1.3 2024-08-02 +ExternalWholeGenomeReprocessing 2.2.2 2024-08-02 +ExternalExomeReprocessing 3.2.2 2024-08-02 +WholeGenomeReprocessing 3.2.2 2024-08-02 +ExomeReprocessing 3.2.2 2024-08-02 +IlluminaGenotypingArray 1.12.21 2024-08-02 CheckFingerprint 1.0.20 2024-08-02 AnnotationFiltration 1.2.5 2023-12-18 +RNAWithUMIsPipeline 1.0.16 2023-12-18 From 1d67da79fc69cdffc234a55f251dc47a9253c078 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Fri, 6 Sep 2024 09:00:43 -0400 Subject: [PATCH 16/16] Update atac.changelog.md --- pipelines/skylab/atac/atac.changelog.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.changelog.md b/pipelines/skylab/atac/atac.changelog.md index 1207b32c1a..544fb8ea50 100644 --- a/pipelines/skylab/atac/atac.changelog.md +++ b/pipelines/skylab/atac/atac.changelog.md @@ -1,7 +1,9 @@ # 2.3.0 2024-08-29 (Date of Last Commit) -* Updated the SnapATAC2 docker to include v2.7.0; the pipeline will now produce a library-level summary metric CSV for the BAM. +* Updated the SnapATAC2 docker to include v2.7.0; the pipeline will now produce a library-level summary metric CSV for the BAM. + +* Updated the memory for the CreateFragmentFile task # 2.2.3 2024-08-02 (Date of Last Commit)