Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Upgrade mehari to 0.30.0 #55

Merged
merged 27 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 39 additions & 13 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ defaults:
shell: bash -l {0}

env:
MEHARI_VERSION: "0.26.1"
SNAKEMAKE_OUTPUT_CACHE: "/github/workspace/snakemake_cache"
MEHARI_VERSION: "0.30.0"
SNAKEMAKE_OUTPUT_CACHE: "${{ github.workspace }}/snakemake_cache"

jobs:
build_data_release:
Expand All @@ -35,6 +35,19 @@ jobs:
id: date
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT

- name: Setup apptainer
run: |
sudo add-apt-repository -y ppa:apptainer/ppa
sudo apt-get update
sudo apt install -y apptainer squashfuse

# See https://github.com/apptainer/apptainer/pull/2262
- name: Disable apparmor namespace restrictions for apptainer
run: |
sudo sysctl -w kernel.apparmor_restrict_unprivileged_unconfined=0 \
-w kernel.apparmor_restrict_unprivileged_userns=0 \
-w kernel.unprivileged_userns_clone=1

- name: Cache data directory
id: cache-mehari-data-tx
uses: actions/cache@v4
Expand All @@ -47,22 +60,35 @@ jobs:
uses: actions/cache@v4
with:
path: |
/github/workspace/snakemake_cache
~/snakemake_cache
${{ github.workspace }}/snakemake_cache
key: ${{ matrix.genome_release }}-${{ matrix.source }}

- name: Run data build workflow
uses: snakemake/snakemake-github-action@v1
- name: Cache conda
uses: actions/cache@v3
env:
# Increase this value to reset cache if etc/example-environment.yml has not changed
CACHE_NUMBER: 0
with:
directory: mehari-data-tx-workflow
snakefile: workflow/Snakefile
stagein: |
mkdir -p /github/workspace/snakemake_cache
mkdir -p ~/snakemake_cache
mkdir -p ${{ github.workspace }}/snakemake_cache
args: "--configfile config/config.yaml --sdm conda --show-failed-logs --cores 4 --jobs 4 results/${{ matrix.genome_release }}-${{ matrix.source }}/mehari/seqrepo/report/mehari_db_check.txt"
show-disk-usage-on-error: true
path: ~/conda_pkgs_dir
key:
${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('environment.yaml') }}

- name: Setup Miniconda and snakemake
uses: conda-incubator/setup-miniconda@v3.1.0
with:
auto-update-conda: true
conda-solver: "libmamba"
python-version: 3.12
environment-file: "environment.yaml"
conda-remove-defaults: "true"

- name: Run data build workflow
shell: bash -el {0}
run: |
mkdir -p ~/snakemake_cache
mkdir -p ${{ github.workspace }}/snakemake_cache
snakemake --verbose --configfile config/config.yaml --workflow-profile workflow/profiles/default --show-failed-logs --cores 4 --jobs 4 results/${{ matrix.genome_release }}-${{ matrix.source }}/mehari/seqrepo/report/mehari_db_check.txt

- name: List files
run: |
Expand Down
52 changes: 39 additions & 13 deletions .github/workflows/release-please.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ name: release-please
# shell: bash -l {0}

env:
MEHARI_VERSION: "0.26.1"
MEHARI_VERSION: "0.30.0"
SNAKEMAKE_OUTPUT_CACHE: "/github/workspace/snakemake_cache"

jobs:
Expand Down Expand Up @@ -53,6 +53,19 @@ jobs:
id: date
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT

- name: Setup apptainer
run: |
sudo add-apt-repository -y ppa:apptainer/ppa
sudo apt-get update
sudo apt install -y apptainer squashfuse

# See https://github.com/apptainer/apptainer/pull/2262
- name: Disable apparmor namespace restrictions for apptainer
run: |
sudo sysctl -w kernel.apparmor_restrict_unprivileged_unconfined=0 \
-w kernel.apparmor_restrict_unprivileged_userns=0 \
-w kernel.unprivileged_userns_clone=1

- name: Cache data directory
id: cache-mehari-data-tx
uses: actions/cache@v4
Expand All @@ -65,23 +78,36 @@ jobs:
uses: actions/cache@v4
with:
path: |
/github/workspace/snakemake_cache
~/snakemake_cache
${{ github.workspace }}/snakemake_cache
key: ${{ matrix.genome_release }}-${{ matrix.source }}

- name: Run data build workflow
if: ${{ needs.release-please.outputs.release_created }}
uses: snakemake/snakemake-github-action@v1
- name: Cache conda
uses: actions/cache@v3
env:
# Increase this value to reset cache if etc/example-environment.yml has not changed
CACHE_NUMBER: 0
with:
directory: mehari-data-tx-workflow
snakefile: workflow/Snakefile
stagein: |
mkdir -p /github/workspace/snakemake_cache
mkdir -p ~/snakemake_cache
mkdir -p ${{ github.workspace }}/snakemake_cache
args: "--configfile config/config.yaml --sdm conda --show-failed-logs --cores 4 --jobs 4 results/${{ matrix.genome_release }}-${{ matrix.source }}/mehari/seqrepo/report/mehari_db_check.txt"
show-disk-usage-on-error: true
path: ~/conda_pkgs_dir
key:
${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('environment.yaml') }}

- name: Setup Miniconda and snakemake
uses: conda-incubator/setup-miniconda@v3.1.0
with:
auto-update-conda: true
conda-solver: "libmamba"
python-version: 3.12
environment-file: "environment.yaml"
conda-remove-defaults: "true"

- name: Run data build workflow
shell: bash -el {0}
run: |
mkdir -p ~/snakemake_cache
mkdir -p ${{ github.workspace }}/snakemake_cache
snakemake --verbose --configfile config/config.yaml --workflow-profile workflow/profiles/default --show-failed-logs --cores 4 --jobs 4 results/${{ matrix.genome_release }}-${{ matrix.source }}/mehari/seqrepo/report/mehari_db_check.txt


- name: upload release assets
if: ${{ needs.release-please.outputs.release_created }}
Expand Down
3 changes: 2 additions & 1 deletion config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -248,4 +248,5 @@ human-phenotype-ontology:

# mehari version to use (currently ignored)
mehari:
version: 0.26.6
version: 0.30.0
docker: "docker://ghcr.io/varfish-org/mehari:pr-603"
5 changes: 5 additions & 0 deletions environment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
- bioconda
dependencies:
- snakemake =8.25.3
Comment on lines +4 to +5
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add mehari dependency

While the snakemake version is correctly specified, the mehari package dependency (version 0.30.0) should also be added here since it's a core dependency of this project.

Apply this diff:

 dependencies:
   - snakemake =8.25.3
+  - mehari =0.30.0
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
dependencies:
- snakemake =8.25.3
dependencies:
- snakemake =8.25.3
- mehari =0.30.0

2 changes: 1 addition & 1 deletion workflow/Snakefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from snakemake.utils import validate, min_version

min_version("8.16.0")
min_version("8.25.3")


configfile: "config/config.yaml"
Expand Down
2 changes: 1 addition & 1 deletion workflow/envs/mehari.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ channels:
- bioconda
- nodefaults
dependencies:
- mehari ==0.26.1
- mehari ==0.30.0
6 changes: 4 additions & 2 deletions workflow/profiles/default/config.v8+.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
software-deployment-method: conda
software-deployment-method:
- conda
- apptainer
cache: True
apptainer-args: "-B ${SNAKEMAKE_OUTPUT_CACHE}:${SNAKEMAKE_OUTPUT_CACHE}:rw"
resources:
ratelimit: 3

27 changes: 26 additions & 1 deletion workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,31 @@ def get_alias(wildcards: Wildcards) -> str:
return f"{wildcards.assembly}-{wildcards.source}"


def genome_release(wildcards: Wildcards) -> str:
def genome_assembly(wildcards: Wildcards) -> str:
return wildcards.assembly.lower()


def genome_assembly_version_parameter(_wildcards: Wildcards) -> str:
# TODO determine e.g. patch version for GRCh37/38
return ""


def transcript_source(wildcards: Wildcards) -> str:
return wildcards.source.lower()


def transcript_source_version_parameter(wildcards: Wildcards) -> str:
if wildcards.source.lower() == "ensembl":
release = config["reference"][wildcards.assembly]["ensembl"]["release"]
return f"--transcript-source-version {release}"
else:
return ""


def cdot_version(wildcards: Wildcards) -> str:
return config["sources"][get_alias(wildcards)]["cdot"]["release"]


def get_ensembl_sequence_param(param: str) -> Callable[[Wildcards], str]:
def inner(wildcards):
return config["reference"][wildcards.assembly]["ensembl"][param]
Expand Down Expand Up @@ -142,3 +163,7 @@ def get_genes_to_disease_download_url() -> str:
def get_known_issues(wildcards: Wildcards) -> list[str]:
alias = get_alias(wildcards)
return config["sources"][alias].get("known_issues", [])


def get_mehari_docker_url() -> str:
return config["mehari"]["docker"]
16 changes: 12 additions & 4 deletions workflow/rules/mehari.smk
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,28 @@ rule mehari_build_txs_db:
else ""
),
cdot=get_mehari_cdot_param_string,
genome_release=genome_release,
assembly=genome_assembly,
assembly_version=genome_assembly_version_parameter,
cdot_version=cdot_version,
transcript_source=transcript_source,
transcript_source_version=transcript_source_version_parameter,
log:
"logs/{assembly}-{source}/mehari/seqrepo/build_txs_db.log",
benchmark:
"benchmarks/{assembly}-{source}/mehari/seqrepo/build_txs_db.tsv"
conda:
"../envs/mehari.yaml"
container:
get_mehari_docker_url()
shell:
"""
mehari db create \
--path-out {output.txs} \
--path-seqrepo-instance {input.seqrepo_instance} \
{params.cdot} \
--genome-release {params.genome_release} \
--cdot-version {params.cdot_version} \
--assembly {params.assembly} \
{params.assembly_version} \
--transcript-source {params.transcript_source} \
{params.transcript_source_version} \
tedil marked this conversation as resolved.
Show resolved Hide resolved
--threads {threads} \
{params.mane} 2> {log}
"""
Expand Down
5 changes: 2 additions & 3 deletions workflow/rules/validate.smk
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,11 @@ rule check_mehari_db:
category="{assembly}-{source}",
),
params:
known_issues=get_known_issues,
cdot=get_mehari_check_cdot_param_string,
log:
"logs/{assembly}-{source}/mehari/seqrepo/check.log",
conda:
"../envs/mehari.yaml"
container:
get_mehari_docker_url()
shell:
"""(
mehari db check \
Expand Down
3 changes: 3 additions & 0 deletions workflow/schemas/config.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,11 @@ properties:
properties:
version:
type: string
docker:
type: string
required:
- version
- docker
required:
- reference
- sources
Expand Down
Loading