Skip to content

Commit

Permalink
Merge pull request #375 from vanheeringen-lab/develop
Browse files Browse the repository at this point in the history
Develop -> master
  • Loading branch information
Maarten-vd-Sande authored Jun 9, 2020
2 parents 6244ad7 + cbaa457 commit d4b79ab
Show file tree
Hide file tree
Showing 48 changed files with 544 additions and 501 deletions.
33 changes: 33 additions & 0 deletions .github/workflows/continuousdeployment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: continuous-deployment

# on push events tagged with a version number
on:
push:
tags:
- 'v*' #

jobs:
build-and-deploy:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2

- uses: actions/setup-python@v1
with:
python-version: 3.6

- name: Create Release
uses: actions/create-release@latest
env:
GITHUB_TOKEN: ${{ secrets.AccessToken }}
with:
tag_name: ${{ github.ref }}
release_name: Release ${{ github.ref }}
body: |
Changes in this Release:
- First Change
- Second Change
Probably link the CHANGELOG
draft: false
prerelease: false
1 change: 1 addition & 0 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ jobs:
python docs/scripts/schemas.py
python docs/scripts/rule_description.py
./docs/scripts/gen_dags
cp CHANGELOG.md docs/content
sphinx-build docs/ build
touch build/.nojekyll
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
*.Rhistory
slurm*.out
report.html
build/
*.egg*
.idea/
.snakemake/
tinydata/
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Changelog
All notable changes to `seq2science` will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

All changed fall under either one of these types: `added`, `changed`, `deprecated`, `removed`, `fixed`, `security`.

## [Unreleased]
43 changes: 26 additions & 17 deletions bin/seq2science
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python
"""
This is the user's entry-point for the seq2science pipeline.
This is the user's entry-point for the seq2science tool.
"""
import sys
import argparse
Expand All @@ -23,6 +23,7 @@ except ImportError:

__version__ = "0.0.0"


def main():
# set helpful paths
base_dir = os.path.dirname(inspect.getfile(seq2science))
Expand All @@ -36,7 +37,7 @@ def main():
dir_path = args.dir if os.path.isabs(args.dir) else os.path.join(os.getcwd(), args.dir)
_init(args, workflows_dir, dir_path)
elif args.command == "run":
config_path = args.config if os.path.isabs(args.config) else os.path.join(os.getcwd(), args.config)
config_path = args.configfile if os.path.isabs(args.configfile) else os.path.join(os.getcwd(), args.configfile)
_run(args, base_dir, workflows_dir, config_path)
elif args.command == "clean":
_clean(base_dir)
Expand Down Expand Up @@ -97,7 +98,7 @@ def seq2science_parser(workflows_dir="./seq2science/workflows/"):

run.add_argument(
"-c",
"--config",
"--configfile",
default="./config.yaml",
metavar="FILE",
help="The path to the config file.",
Expand Down Expand Up @@ -178,25 +179,25 @@ def _run(args, base_dir, workflows_dir, config_path):

# parse the args
parsed_args = {"snakefile": os.path.join(workflows_dir, args.workflow, "Snakefile"),
"config": {"rule_dir": os.path.join(base_dir, "rules")},
"cores": args.cores,
"use_conda": True,
"conda_frontend": "mamba",
"conda_prefix": os.path.join(base_dir, ".snakemake"),
"dryrun": args.dryrun}

# get the additional snakemake options
snakemake_options = args.snakemakeOptions if args.snakemakeOptions is not None else dict()

snakemake_options.setdefault("config", {}).update({"rule_dir": os.path.join(base_dir, "rules")})
# parse the profile
snakemake_options["configfiles"] = [config_path]
if args.profile is not None:
config_file = snakemake.get_profile_file(args.profile, "config.yaml")
if config_file is None:
profile_file = snakemake.get_profile_file(args.profile, "config.yaml")
if profile_file is None:
print("Error: profile given but no config.yaml found.")
sys.exit(1)
parsed_args["configfiles"] += [config_file]
profile = yaml.safe_load(open(config_file).read())
if "cores" in profile:
snakemake_options["configfiles"] += [profile_file]
profile = yaml.safe_load(open(profile_file).read())
if "cores" in profile and parsed_args["cores"] is None:
parsed_args["cores"] = profile["cores"]

parsed_args.update(snakemake_options)
Expand All @@ -208,7 +209,7 @@ def _run(args, base_dir, workflows_dir, config_path):

# run snakemake
exit_code = snakemake.snakemake(**parsed_args)
sys.exit(exit_code)
sys.exit(0) if exit_code else sys.exit(1)


def _clean(base_dir):
Expand Down Expand Up @@ -247,18 +248,26 @@ class _StoreDictKeyPair(argparse.Action):
)

def __call__(self, parser, namespace, values, option_string=None):
# TODO: cleanup
my_dict = {}
for kv in values:
k, v = kv.split("=")
if v[0] == "{" and v[-1] == "}":
pairs = list(filter(None, re.split('{|:| |}', v)))
assert len(pairs) % 2 == 0
v = {pairs[i]: pairs[i+1] for i in range(0, len(pairs), 2)}
v = {k: int(v) if v.isdigit() else v for k, v in v.items()}

if ":" in v:
pair = list(filter(None, re.split('{|:| |}', v)))
assert len(pair) == 2
if pair[1].lower() == 'true':
pair[1] = True
v = {pair[0]: int(pair[1]) if isinstance(pair[1], str) and pair[1].isdigit() else pair[1]}
elif "[" in v:
v = re.sub("\[|\]", "", v).split(",")
try:
my_dict[k] = int(v)
except:
my_dict[k] = v
if k not in my_dict:
my_dict[k] = v
else:
my_dict[k].update(v)

setattr(namespace, self.dest, my_dict)

Expand Down
1 change: 1 addition & 0 deletions docs/content/extensive_docs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ The extensive docs serve to have all steps and configurable options documented.
cli.rst
all_rules.md
schemas.md
CHANGELOG.md
7 changes: 4 additions & 3 deletions requirements.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ channels:
dependencies:
- pkgs/main::python=3.6
- bioconda::snakemake>=5.18
- pip=20.0.2
- bioconda::sra-tools=2.9.1
- bioconda::entrez-direct=11.0
- bioconda::pysam=0.15.3
Expand All @@ -16,5 +15,7 @@ dependencies:
- bioconda::norns=0.1.5
- anaconda::biopython=1.74
- pkgs/main::filelock=3.0.12
- pip:
- git+https://github.com/daler/trackhub@params-overhaul
- pkgs/mean::pyyaml
- pkgs/main::beautifulsoup4=4.9.0
- conda-forge:pretty_html_table=0.9.dev0
- bioconda::trackhub=0.1.2019.12.24
3 changes: 2 additions & 1 deletion seq2science/envs/macs2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ channels:
- conda-forge
- defaults
dependencies:
- bioconda::macs2=2.2.4
- bioconda::macs2=2.2.7
- bioconda::khmer=2.0
- pkgs/main::setuptools=47.1.1 # missing dependency of macs2's unique-kmers.py
2 changes: 1 addition & 1 deletion seq2science/envs/qc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ channels:
- defaults
dependencies:
- bioconda::fastqc=0.11.8
- bioconda::multiqc=1.8
- bioconda::multiqc=1.9
4 changes: 2 additions & 2 deletions seq2science/rules/DGE_analysis.smk
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ rule deseq2:
resources:
R_scripts=1 # conda's R can have issues when starting multiple times
script:
"../scripts/deseq2.R"
f"{config['rule_dir']}/../scripts/deseq2.R"


rule blind_clustering:
Expand All @@ -91,4 +91,4 @@ rule blind_clustering:
resources:
R_scripts=1 # conda's R can have issues when starting multiple times
script:
"../scripts/deseq2_clustering.R"
f"{config['rule_dir']}/../scripts/deseq2_clustering.R"
2 changes: 1 addition & 1 deletion seq2science/rules/alignment.smk
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ elif config['aligner'] == 'star' or config.get('quantifier', '') == 'star':
input:
genome = expand("{genome_dir}/{{assembly}}/{{assembly}}.fa", **config),
sizefile= expand("{genome_dir}/{{assembly}}/{{assembly}}.fa.sizes", **config),
gtf = expand("{genome_dir}/{{assembly}}/{{assembly}}.gtf", **config)
gtf = expand("{genome_dir}/{{assembly}}/{{assembly}}.annotation.gtf", **config)
output:
directory(expand("{genome_dir}/{{assembly}}/index/{aligner}", **config))
log:
Expand Down
13 changes: 5 additions & 8 deletions seq2science/rules/bam_cleaning.smk
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
import gzip


def get_blacklist_files(wildcards):
files = {}
# ideally get genome is a checkpoint, however there are quite some Snakemake
Expand Down Expand Up @@ -30,9 +27,9 @@ rule setup_blacklist:
newblacklist = ""
if config.get('remove_blacklist') and wildcards.assembly.lower() in \
["ce10", "dm3", "hg38", "hg19", "mm9", "mm10"]:
blacklist = f"{config['genome_dir']}/{wildcards.assembly}/{wildcards.assembly}.blacklist.bed.gz"
with gzip.GzipFile(blacklist) as file:
newblacklist += file.read().decode('utf8')
blacklist = f"{config['genome_dir']}/{wildcards.assembly}/{wildcards.assembly}.blacklist.bed"
with open(blacklist) as file:
newblacklist += file.read()

if any('.fa.sizes' in inputfile for inputfile in input):
with open(input.sizes, 'r') as file:
Expand Down Expand Up @@ -62,7 +59,7 @@ rule complement_blacklist:
shell:
"""
sortBed -faidx {input.sizes} -i {input.blacklist} |
complementBed -i /dev/stdin -g {input.sizes} > {output} 2> {log}
complementBed -i stdin -g {input.sizes} > {output} 2> {log}
"""


Expand All @@ -88,7 +85,7 @@ rule sieve_bam:
expand("{benchmark_dir}/sieve_bam/{{assembly}}-{{sample}}.benchmark.txt", **config)[0]
params:
minqual=f"-q {config['min_mapping_quality']}",
atacshift=lambda wildcards, input: f"| ../../scripts/atacshift.pl /dev/stdin {input.sizes}" if config['tn5_shift'] else "",
atacshift=lambda wildcards, input: f"| {config['rule_dir']}/../scripts/atacshift.pl /dev/stdin {input.sizes}" if config['tn5_shift'] else "",
blacklist=lambda wildcards, input: f"-L {input.blacklist}",
prim_align=f"-F 256" if config["only_primary_align"] else ""
conda:
Expand Down
6 changes: 3 additions & 3 deletions seq2science/rules/bigfiles.smk
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,11 @@ def get_bigpeak_type(wildcards):

def get_bigpeak_schema(wildcards):
if get_ftype(wildcards.peak_caller) == "narrowPeak":
return "../../schemas/bignarrowPeak.as"
return f"{config['rule_dir']}/../schemas/bignarrowPeak.as"
if get_ftype(wildcards.peak_caller) == "broadPeak":
if len(treps_from_brep[(wildcards.sample, wildcards.assembly)]) == 1:
return "../../schemas/bigbroadPeak.as"
return "../../schemas/bigBed.as"
return f"{config['rule_dir']}/../schemas/bigbroadPeak.as"
return f"{config['rule_dir']}/../schemas/bigBed.as"
raise NotImplementedError()


Expand Down
Loading

0 comments on commit d4b79ab

Please sign in to comment.