Skip to content

Commit

Permalink
Version 3.1.1 (#20)
Browse files Browse the repository at this point in the history
Fix program error in low-depth or no-data regions. Completes analysis even when the input is a small bamlet (result is still a no-call).
  • Loading branch information
xiao-chen-xc authored Apr 18, 2024
1 parent dba17f8 commit 8de77bb
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 19 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ Please note that the input BAM should be one that's aligned to the ENTIRE refere
Optional parameters:
- `-g`: Region(s) to analyze, separated by comma. All supported [regions](docs/regions.md) will be analyzed if not specified. Please use region name, i.e. first column in the doc.
- `-t`: Number of threads.
- `-p`: Prefix of output files when the input is a single sample, i.e. use with `-b`. If not provided, prefix will be extracted from the name of the input BAM.
- `-p`: Prefix of output files when the input is a single sample, i.e. use with `-b`. If not provided, prefix will be extracted from the header of the input BAM.
- `--genome`: Genome reference build. Default is `38`. If `37` or `19` is specified, Paraphase will run the analysis for GRCh37 or hg19, respectively (note that only 11 medically relevant [regions](docs/regions.md) are supported now for GRCh37/hg19).
- `--gene1only`: If specified, variants calls will be made against the main gene only for SMN1, PMS2, STRC, NCF1 and IKBKG, see more information [here](docs/vcf.md).
- `--novcf`: If specified, no VCF files will be produced.
Expand Down
2 changes: 1 addition & 1 deletion paraphase/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "3.1.0"
__version__ = "3.1.1"
16 changes: 12 additions & 4 deletions paraphase/genes/cfhclust.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,20 @@ def __init__(

def call(self):
haps = {}
haps.update(self.cfh["final_haplotypes"])
haps.update(self.cfhr3["final_haplotypes"])
fusions = {}
fusions.update(self.cfh["fusions_called"])
fusions.update(self.cfhr3["fusions_called"])
total_cn = None
if (
self.cfh["final_haplotypes"] is not None
and self.cfhr3["final_haplotypes"] is not None
):
haps.update(self.cfh["final_haplotypes"])
haps.update(self.cfhr3["final_haplotypes"])
if (
self.cfh["fusions_called"] is not None
and self.cfhr3["fusions_called"] is not None
):
fusions.update(self.cfh["fusions_called"])
fusions.update(self.cfhr3["fusions_called"])
if self.cfh["total_cn"] is not None and self.cfhr3["total_cn"] is not None:
total_cn = min(self.cfh["total_cn"], self.cfhr3["total_cn"])
if (
Expand Down
38 changes: 27 additions & 11 deletions paraphase/genome_depth.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

import numpy as np
import pysam
import logging

logging.basicConfig(level=logging.INFO)


class GenomeDepth:
Expand Down Expand Up @@ -42,10 +45,15 @@ def get_genome_depth(self):
nchr = nchr.replace("chr", "")
pos1 = int(at[1])
for pos in [pos1, pos1 + 1600]:
site_depth = self._bamh.count(
nchr, pos - 1, pos, read_callback="all"
)
depth.append(site_depth)
try:
site_depth = self._bamh.count(
nchr, pos - 1, pos, read_callback="all"
)
depth.append(site_depth)
except Exception:
logging.info(
f"This BAM does not have data at {nchr}:{pos}, which is used for determining sample coverage."
)
self.mdepth = np.median(depth)
self.mad = np.median([abs(a - self.mdepth) for a in depth]) / self.mdepth

Expand All @@ -64,15 +72,23 @@ def check_sex(self):
if self.chr_in_name is False:
nchr = nchr.replace("chr", "")
pos1 = int(at[1])
site_depth = self._bamh.count(
nchr, pos1 - 1, pos1, read_callback="all"
)
if "X" in nchr:
self.x.append(site_depth)
elif "Y" in nchr:
self.y.append(site_depth)
try:
site_depth = self._bamh.count(
nchr, pos1 - 1, pos1, read_callback="all"
)
if "X" in nchr:
self.x.append(site_depth)
elif "Y" in nchr:
self.y.append(site_depth)
except Exception:
logging.info(
f"This BAM does not have data at {nchr}:{pos1}, which is used for determining sample sex."
)
cov_x, cov_y = np.median(self.x), np.median(self.y)
self._bamh.close()

if np.isnan(cov_x) or np.isnan(cov_y) or cov_x == 0:
return None
if cov_y / cov_x < 0.05:
return "female"
elif cov_y / cov_x > 0.1:
Expand Down
4 changes: 2 additions & 2 deletions paraphase/paraphase.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from paraphase import genes
from .phaser import Phaser

logging.basicConfig(level=logging.DEBUG)
logging.basicConfig(level=logging.INFO)


class Paraphase:
Expand Down Expand Up @@ -590,7 +590,7 @@ def load_parameters(self):
"-p",
"--prefix",
help="Prefix of output files for a single sample. Used with -b.\n"
+ "If not provided, prefix will be extracted from the name of the input BAM.\n",
+ "If not provided, prefix will be extracted from the header of the input BAM.\n",
required=False,
)
parser.add_argument(
Expand Down

0 comments on commit 8de77bb

Please sign in to comment.