Merge branch 'master' into advanced-update

nextflow-io · Nov 12, 2024 · e15f2b4 · e15f2b4
2 parents b3686de + ea3b640
commit e15f2b4
Show file tree

Hide file tree

Showing 270 changed files with 23,934 additions and 699 deletions.
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -0,0 +1,36 @@
+{
+    "name": "nfcore",
+    "image": "ghcr.io/nextflow-io/training:latest",
+    "remoteUser": "gitpod",
+    "features": {
+        "ghcr.io/devcontainers/features/docker-outside-of-docker:1": {}
+    },
+    "remoteEnv": {
+        "NXF_HOME": "/workspaces/.nextflow",
+        "HOST_PROJECT_PATH": "${localWorkspaceFolder}"
+    },
+    // Configure tool-specific properties.
+    "customizations": {
+        // Configure properties specific to VS Code.
+        "vscode": {
+            // Set *default* container specific settings.json values on container create.
+            "settings": {
+                "python.defaultInterpreterPath": "/opt/conda/bin/python"
+            },
+            // Add the IDs of extensions you want installed when the container is created.
+            "extensions": [
+                "ms-python.python",
+                "ms-python.vscode-pylance",
+                "nf-core.nf-core-extensionpack",
+                "nextflow.nextflow",
+                "codezombiech.gitignore"
+            ]
+        }
+    },
+    "portsAttributes": {
+        "3000": {
+            "label": "Application",
+            "onAutoForward": "openPreview"
+        }
+    }
+}
diff --git a/.github/gitpod.Dockerfile b/.github/gitpod.Dockerfile
@@ -15,14 +15,17 @@ RUN apt-get update --quiet && \
         curl \
         tree \
         graphviz \
-        software-properties-common
-
+        software-properties-common && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
 
 # Taken from: https://github.com/nf-core/tools/blob/master/nf_core/gitpod/gitpod.Dockerfile
 # Install Apptainer (Singularity)
 RUN add-apt-repository -y ppa:apptainer/ppa && \
     apt-get update --quiet && \
-    apt install -y apptainer
+    apt install -y apptainer && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
 
 # Install Conda
 RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
@@ -35,6 +38,11 @@ ENV PATH="/opt/conda/bin:$PATH"
 RUN mkdir -p /workspace/data \
     && chown -R gitpod:gitpod /opt/conda /workspace/data
 
+# Install Tower Agent
+RUN curl -fSL https://github.com/seqeralabs/tower-agent/releases/latest/download/tw-agent-linux-x86_64 > tw-agent && \
+    chmod +x tw-agent && \
+    mv tw-agent /usr/local/bin/tw-agent
+
 # Change user to gitpod
 USER gitpod
 
@@ -56,6 +64,7 @@ RUN conda config --add channels defaults && \
         black \
         prettier \
         pre-commit \
+        linkify-it-py \
         pytest-workflow && \
     conda clean --all --force-pkgs-dirs --yes
 

diff --git a/.gitpod.yml b/.gitpod.yml
@@ -23,14 +23,14 @@ tasks:
     - before: printf 'unset JAVA_TOOL_OPTIONS\n' >> $HOME/.bashrc && exit
 
     - name: Start web server
-      command: gp ports await 23000 && gp preview https://training.nextflow.io
+      command: gp ports await 23000 && gp preview https://training.nextflow.io/hello_nextflow
 
     - name: Load Nextflow Tutorial
       command: docker pull -q nextflow/rnaseq-nf
 
     - name: Start Nextflow Tutorial
       command: |
-          cd nf-training
+          cd hello-nextflow
           source $HOME/.bashrc
           export PS1='\[\e[3;36m\]${PWD/*\//} ->\[\e[0m\] '
           unset JAVA_TOOL_OPTIONS

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,23 @@
+repos:
+    - repo: https://github.com/pre-commit/mirrors-prettier
+      rev: "v3.1.0"
+      hooks:
+          - id: prettier
+            additional_dependencies:
+                - prettier@3.2.5
+
+    - repo: https://github.com/editorconfig-checker/editorconfig-checker.python
+      rev: "2.7.3"
+      hooks:
+          - id: editorconfig-checker
+            alias: ec
+
+    - repo: https://github.com/pre-commit/pre-commit-hooks
+      rev: v5.0.0
+      hooks:
+          - id: trailing-whitespace
+            exclude_types:
+                - svg
+          - id: end-of-file-fixer
+            exclude_types:
+                - svg
diff --git a/README.md b/README.md
@@ -9,20 +9,20 @@ We are excited to have you on the path to writing reproducible and scalable scie
 
 -   👉🏻 Written training material: <https://training.nextflow.io>
 
--   👩🏻‍💻 Instructions on loading this repository within a GitPod environment: <https://training.nextflow.io/basic_training/setup/>
+-   👩🏻‍💻 Instructions on loading this repository within a GitPod environment: <https://training.nextflow.io/envsetup/>
 
 -   📚 Nextflow documentation: <https://www.nextflow.io/docs/latest/>
 
 ## Contributions
 
-We welcome fixes and improvements from the community. Please fork the repository and create pull-requests with any improvements to the docs.
+We welcome fixes and improvements from the community. Please fork the repository and create pull-requests with any improvements you'd like to suggest to the docs.
 
 You can find instructions about how to develop the training material code in [`CONTRIBUTING.md`](CONTRIBUTING.md). If you want to contribute with a translation instead, check [`TRANSLATING.md`](TRANSLATING.md).
 
 ## Credits & Copyright
 
-All training material was originally written by [Seqera](https://seqera.io) but has been made open-source ([CC BY-NC-ND](https://creativecommons.org/licenses/by-nc-nd/4.0/)) for the community.
+This training material is developed and maintained by [Seqera](https://seqera.io) and released under an open-source license ([CC BY-NC-ND](https://creativecommons.org/licenses/by-nc-nd/4.0/)) for the benefit of the community. You are welcome to reuse these materials according to the terms of the license. If you are an instructor running your own trainings, we'd love to hear about how it goes and what we could do to make it easier.
 
 <a rel="license" href="http://creativecommons.org/licenses/by-nc-nd/4.0/"><img alt="Creative Commons License" src="docs/assets/img/cc_by-nc-nd.svg" /></a>
 
-> Copyright 2020-2023, Seqera. All examples and descriptions are licensed under the <a rel="license" href="http://creativecommons.org/licenses/by-nc-nd/4.0/">Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International License</a>.
+> Copyright 2024, Seqera. All examples and descriptions are licensed under the <a rel="license" href="http://creativecommons.org/licenses/by-nc-nd/4.0/">Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International License</a>.
diff --git a/docs/advanced/configuration.md b/docs/advanced/configuration.md
@@ -25,7 +25,7 @@ This gives us two complications:
 
 There may be some configuration values that you will want applied on all runs for a given system. These configuration values should be written to `~/.nextflow/config`.
 
-For example - you may have an account on a HPC system and you know that you will always want to submit jobs using the SLURM scheduler when using that machine and always use the Singularity container engine. In this case, your `~/.nextflow/config` file may include:
+For example - you may have an account on an HPC system and you know that you will always want to submit jobs using the SLURM scheduler when using that machine and always use the Singularity container engine. In this case, your `~/.nextflow/config` file may include:
 
 ```groovy
 process.executor = 'slurm'
@@ -36,10 +36,10 @@ These configuration values would be inherited by every run on that system withou
 
 ## Overriding for a run - `$PWD/nextflow.config`
 
-Move into the chapter example directory:
+Create a chapter example directory:
 
 ```
-cd configuration
+mkdir configuration && cd configuration
 ```
 
 ### Overriding Process Directives
@@ -72,7 +72,7 @@ Glob pattern matching can also be used:
 
 ```groovy
 process {
-    withLabel: '.*:INDEX' {
+    withName: '.*:INDEX' {
         cpus = 2
     }
 }

diff --git a/docs/advanced/grouping.md b/docs/advanced/grouping.md
@@ -155,7 +155,7 @@ MapReads( samples, reference )
 | view
 ```
 
-This is easy enough, but the `groupTuple` operator has to wait until all items are emitted from the incoming queue before it is able to reassemble the output queue. If even one read mapping job takes a long time, the processing of all other samples is held up. We need a way of signalling to nextflow how many items are in a given group so that items can be emitted as early as possible.
+This is easy enough, but the `groupTuple` operator has to wait until all items are emitted from the incoming queue before it is able to reassemble the output queue. If even one read mapping job takes a long time, the processing of all other samples is held up. We need a way of signalling to Nextflow how many items are in a given group so that items can be emitted as early as possible.
 
 By default, the `groupTuple` operator groups on the first item in the element, which at the moment is a `Map`. We can turn this map into a special class using the `groupKey` method, which takes our grouping object as a first parameter and the number of expected elements in the second parameter.
 

diff --git a/docs/advanced/index.md b/docs/advanced/index.md
@@ -1,10 +1,4 @@
----
-description: Overview of the Advanced Nextflow Training material
-hide:
-    - toc
----
-
-# Welcome
+# Advanced Training
 
 Welcome to our Nextflow workshop for intermediate and advanced users!
 
@@ -16,6 +10,30 @@ By the end of this workshop, you will have the skills and knowledge to create co
 
 Let's get started!
 
+[![Open in Gitpod](https://img.shields.io/badge/Gitpod-%20Open%20in%20Gitpod-908a85?logo=gitpod)](https://gitpod.io/#https://github.com/nextflow-io/training)
+
+## Learning objectives
+
+By the end of this course you should:
+
+-   Describe commonly used and well understood operators
+-   Apply good practices for the propagation of metadata
+-   Group and split channels
+-   Apply Groovy helper classes to Nextflow scripts
+-   Sensibly structure workflows
+-   Apply layers of configuration to a workflow
+
+## Audience & prerequisites
+
+Please note that this is **not** a beginner's workshop and familiarity with Nextflow, the command line, and common file formats is assumed.
+
+**Prerequisites**
+
+-   A GitHub account
+-   Experience with command line
+-   Familiarity with Nextflow and Groovy
+-   An understanding of common file formats
+
 ## Follow the training video
 
 We run a free online training event for this course approximately every six months. Videos are streamed to YouTube and questions are handled in the nf-core Slack community. You can watch the recording of the most recent training ([September, 2023](https://nf-co.re/events/2023/training-sept-2023/)) below:

diff --git a/docs/advanced/introduction.md b/docs/advanced/introduction.md
@@ -0,0 +1,41 @@
+# Introduction
+
+Welcome to our Nextflow workshop for intermediate and advanced users!
+
+In this workshop, we will explore the advanced features of the Nextflow language and runtime, and learn how to use them to write efficient and scalable data-intensive workflows. We will cover topics such as parallel execution, error handling, and workflow customization.
+
+Please note that this is not an introductory workshop, and we will assume some basic familiarity with Nextflow.
+
+By the end of this workshop, you will have the skills and knowledge to create complex and powerful Nextflow pipelines for your own data analysis projects.
+
+Let's get started!
+
+## Learning objectives
+
+By the end of this course you should:
+
+-   Describe commonly used and well understood operators
+-   Apply good practices for the propagation of metadata
+-   Group and split channels
+-   Apply Groovy helper classes to Nextflow scripts
+-   Sensibly structure workflows
+-   Apply layers of configuration to a workflow
+
+## Audience & prerequisites
+
+Please note that this is **not** a beginner's workshop and familiarity with Nextflow, the command line, and common file formats is assumed.
+
+**Prerequisites**
+
+-   A GitHub account
+-   Experience with command line
+-   Familiarity with Nextflow and Groovy
+-   An understanding of common file formats
+
+## Follow the training video
+
+We run a free online training event for this course approximately every six months. Videos are streamed to YouTube and questions are handled in the nf-core Slack community. You can watch the recording of the most recent training ([September, 2023](https://nf-co.re/events/2023/training-sept-2023/)) below:
+
+<div style="text-align: center;">
+    <iframe width="560" height="315" src="https://www.youtube.com/embed/nPAH9owvKvI?si=Kt3WmxF7rGhRp2L1" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen="" data-ruffle-polyfilled=""></iframe>
+</div>
diff --git a/docs/advanced/metadata.md b/docs/advanced/metadata.md
@@ -129,7 +129,7 @@ map { id, reads ->
 
     To quickly sanity-check a groovy expression, try the [Groovy web console](https://groovyconsole.appspot.com/)
 
-We are almost there, but we still don't have the "treatment" metadata captured in our meta map. The treament is encoded in this example in the name of the parent directory relative to the reads. Inside the map object, the reads are a list of two UnixPath objects. These objects implement the [`java.nio.Path`](https://docs.oracle.com/javase/7/docs/api/java/nio/file/Path.html) interface, which provides us many useful methods, including `getParent()`.
+We are almost there, but we still don't have the "treatment" metadata captured in our meta map. The treatment is encoded in this example in the name of the parent directory relative to the reads. Inside the map object, the reads are a list of two UnixPath objects. These objects implement the [`java.nio.Path`](https://docs.oracle.com/javase/7/docs/api/java/nio/file/Path.html) interface, which provides us many useful methods, including `getParent()`.
 
 We can call the `getParent()` method on each of the paths like so:
 

diff --git a/docs/advanced/operators.md b/docs/advanced/operators.md
@@ -252,7 +252,7 @@ branch { meta, reads ->
 }
 ```
 
-We may want to emit a slightly different element than the one passed as input. The `branch` operator can (optionally) return a _new_ element to an channel. For example, to add an extra key in the meta map of the tumor samples, we add a new line under the condition and return our new element. In this example, we modify the first element of the `List` to be a new list that is the result of merging the existing meta map with a new map containing a single key:
+We may want to emit a slightly different element than the one passed as input. The `branch` operator can (optionally) return a _new_ element to a channel. For example, to add an extra key in the meta map of the tumor samples, we add a new line under the condition and return our new element. In this example, we modify the first element of the `List` to be a new list that is the result of merging the existing meta map with a new map containing a single key:
 
 ```groovy linenums="1"
 branch { meta, reads ->
@@ -483,7 +483,7 @@ The input channel has two elements. For each element in the input channel, we re
 
 !!! exercise
 
-    The `flatten` operation only "unfolds" one layer from the retuned collection. Given this information, what do you expect the following workflow to return?
+    The `flatten` operation only "unfolds" one layer from the returned collection. Given this information, what do you expect the following workflow to return?
 
     ```
     workflow {
@@ -636,7 +636,7 @@ If the contents of the input channel is a file, its _contents_ are appended to t
 
     In the example below, we include a line of groovy to define a variable `article` which is used in the interpolated script string. This is a convenient way to avoid crowding the final string block with too much logic.
 
-    This line includes two Groovy synax features:
+    This line includes two Groovy syntax features:
 
     1. The [ternary operator](https://docs.groovy-lang.org/latest/html/documentation/core-operators.html#_ternary_operator) - a terse if/else block
     2. The [find operator](https://docs.groovy-lang.org/latest/html/documentation/core-operators.html#_find_operator) `=~`

diff --git a/docs/advanced/orientation.md b/docs/advanced/orientation.md
@@ -0,0 +1,81 @@
+# Orientation
+
+The Gitpod environment contains some test data that will be used in this workshop.
+
+!!! note
+
+    Follow [this link](../envsetup/index.md) if you have not yet setup your Gitpod environment.
+
+## Getting started
+
+You will complete this module in the `nf-training-advanced/` folder.
+
+In this folder you will find a series of folders that will be used during different sections of this training.
+
+```console
+nf-training-advanced
+├── groovy
+│   ├── main.nf
+│   ├── modules
+│   │   └── local
+│   │       └── fastp
+│   │           └── main.nf
+│   └── nextflow.config
+├── grouping
+│   ├── data
+│   │   ├── genome.fasta
+│   │   ├── genome.fasta.fai
+│   │   ├── intervals.bed
+│   │   ├── reads
+│   │   │   ├── treatmentA
+│   │   │   │   └── <data files>
+│   │   │   └── treatmentB
+│   │   │       └── <data files>
+│   │   ├── samplesheet.csv
+│   │   └── samplesheet.ugly.csv
+│   └── main.nf
+├── metadata
+│   ├── data
+│   │   ├── reads
+│   │   │   ├── treatmentA
+│   │   │   │   └── <data files>
+│   │   │   └── treatmentB
+│   │   │       └── <data files>
+│   │   ├── samplesheet.csv
+│   │   └── samplesheet.ugly.csv
+│   └── main.nf
+├── operators
+│   ├── data
+│   │   ├── reads
+│   │   │   └── <data files>
+│   │   ├── samplesheet.csv
+│   │   └── samplesheet.ugly.csv
+│   └── main.nf
+└── structure
+    ├── lib
+    │   └── Food.groovy
+    ├── main.nf
+    └── templates
+        ├── adder.py
+        └── demo_script.sh
+```
+
+## Selecting a Nextflow version
+
+By default, Nextflow will pull the latest stable version into your environment.
+
+However, Nextflow is constantly evolving as we make improvements and fix bugs.
+
+The latest releases can be viewed on GitHub [here](https://github.com/nextflow-io/nextflow).
+
+If you want to use a specific version of Nextflow, you can set the `NXF_VER` variable as shown below:
+
+```bash
+export NXF_VER=23.10.0
+```
+
+!!! Note
+
+    This tutorial workshop requires `NXF_VER=23.10.0`, or later.
+
+Run `nextflow -version` again to confirm that the change has taken effect.