Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pRegion for all #172

Merged
merged 37 commits into from
Jul 5, 2020
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
404c264
use pRegion for HiC tracks to speed up
lldelisle Jan 9, 2020
28d5653
added a test with a bed with multiple chr
lldelisle Jan 9, 2020
7e38dfa
added pybedtools
lldelisle Jan 9, 2020
9d7a3a4
added pRegion everywhere there is an intervaltree
lldelisle Jan 9, 2020
1cea057
remove try/catch but use UCSC format
lldelisle Jan 10, 2020
34cb06a
remove the chr added to the region
lldelisle Jan 10, 2020
3c0a900
use one cool in the test of hic rasterize
lldelisle Jan 20, 2020
3b051fa
deal with cases where the matrix is empty on the region
lldelisle Jan 20, 2020
de5b0fd
change error to warning and trycatch the vmax as percentile
lldelisle Jan 21, 2020
329b153
linting
lldelisle Jan 21, 2020
20c929f
new test
lldelisle Jan 21, 2020
0a3a3da
Merge branch 'develop' of https://github.com/deeptools/pyGenomeTracks…
lldelisle Jun 3, 2020
bd88273
remove ununsed matplotlib
lldelisle Jun 3, 2020
815c8ea
put back HUGE_NUMBER
lldelisle Jun 3, 2020
afa1fe2
update comment
lldelisle Jun 3, 2020
1ec106d
forgot a , in merge
lldelisle Jun 3, 2020
ca58b09
update gtfTrack
lldelisle Jun 3, 2020
8aa4c21
fix issues when plotting matrix on the chr end
lldelisle Jun 3, 2020
836e54d
update output of hic_small_test
lldelisle Jun 3, 2020
16c76b1
add test for bed vs region
lldelisle Jun 3, 2020
9e9c062
remove y_axis when no data
lldelisle Jun 3, 2020
3c39a02
adapt pChrnameList to exceptions
lldelisle Jun 3, 2020
40bc04b
update and add new tests
lldelisle Jun 3, 2020
f68a2b7
remove debug prints
lldelisle Jun 3, 2020
4f559b7
require hicmatrix>=13
lldelisle Jun 3, 2020
4144cc7
update doc
lldelisle Jun 3, 2020
0823bef
linting
lldelisle Jun 3, 2020
2318fa8
remove redundant test
lldelisle Jun 3, 2020
a140022
mv self.img=None on top
lldelisle Jun 3, 2020
fbf9eb5
remove useless comments
lldelisle Jun 3, 2020
5357f7d
linting
lldelisle Jun 4, 2020
c9a32f7
change pRegion to plot_regions and intersect with all plot_regions ex…
lldelisle Jun 4, 2020
96fbd6b
update tests/doc to use imbricated bed
lldelisle Jun 4, 2020
254c911
linting
lldelisle Jun 4, 2020
e46f72f
make prettier the coding
lldelisle Jun 4, 2020
a68dbbe
Merge branch 'develop' of https://github.com/deeptools/pyGenomeTracks…
lldelisle Jun 4, 2020
63e103d
Merge branch 'develop' of https://github.com/deeptools/pyGenomeTracks…
lldelisle Jul 5, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ not set means that by default the parameter is commented.
parameter | x-axis | epilogos | links | domains | bed | narrow_peak | bigwig | bedgraph | bedgraph_matrix | hlines | hic_matrix
-- | - | - | - | - | - | - | - | - | - | - | -
where | bottom | | | | | | | | | |
fontsize | 15 | | | 12 | 12 | | | | | |
fontsize | 15 | | | | 12 | | | | | |
categories_file | | not set | | | | | | | | |
orientation | | not set | not set | not set | not set | not set | not set | not set | not set | not set | not set
links_type | | | arcs | | | | | | | |
Expand Down Expand Up @@ -504,7 +504,6 @@ use_middle | | | | | | | | false | | |
rasterize | | | | | | | | false | true | | true
pos_score_in_bin | | | | | | | | | center | |
plot_horizontal_lines | | | | | | | | | false | |
region | | | | | | | | | | | not set
depth | | | | | | | | | | | 100000
show_masked_bins | | | | | | | | | | | false
scale_factor | | | | | | | | | | | 1
Expand Down
3 changes: 1 addition & 2 deletions docs/content/all_default_properties.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
parameter | x-axis | epilogos | links | domains | bed | narrow_peak | bigwig | bedgraph | bedgraph_matrix | hlines | hic_matrix
-- | - | - | - | - | - | - | - | - | - | - | -
where | bottom | | | | | | | | | |
fontsize | 15 | | | 12 | 12 | | | | | |
fontsize | 15 | | | | 12 | | | | | |
categories_file | | not set | | | | | | | | |
orientation | | not set | not set | not set | not set | not set | not set | not set | not set | not set | not set
links_type | | | arcs | | | | | | | |
Expand Down Expand Up @@ -38,7 +38,6 @@ use_middle | | | | | | | | false | | |
rasterize | | | | | | | | false | true | | true
pos_score_in_bin | | | | | | | | | center | |
plot_horizontal_lines | | | | | | | | | false | |
region | | | | | | | | | | | not set
depth | | | | | | | | | | | 100000
show_masked_bins | | | | | | | | | | | false
scale_factor | | | | | | | | | | | 1
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,6 @@ dependencies:
- pysam >=0.14
- pytest
- gffutils >=0.9
- pybedtools >=0.8.1
- pip:
- "git+https://github.com/deeptools/pyGenomeTracks.git"
7 changes: 4 additions & 3 deletions pygenometracks/getAllDefaultsAndPossible.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,10 @@ def main():
track_type = "x-axis"
has_default = False
for p, value in track_class.DEFAULTS_PROPERTIES.items():
all_default_parameters[p] = all_default_parameters.get(p, {})
all_default_parameters[p][track_type] = value
has_default = True
if p != 'region':
all_default_parameters[p] = all_default_parameters.get(p, {})
all_default_parameters[p][track_type] = value
has_default = True
if has_default:
all_tracks_with_default += [track_type]

Expand Down
66 changes: 46 additions & 20 deletions pygenometracks/plotTracks.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,16 @@
import sys
import argparse
import matplotlib
matplotlib.use('Agg')
import warnings

from pygenometracks.tracksClass import PlotTracks
from pygenometracks._version import __version__
from .utilities import InputError

matplotlib.use('Agg')

# Used in case no end of a genomic interval was set:
HUGE_NUMBER = 1e15 # also used in HiCMatrixTrack
DEFAULT_BED_COLOR = '#1f78b4'
DEFAULT_BIGWIG_COLOR = '#33a02c'
DEFAULT_BEDGRAPH_COLOR = '#a6cee3'
Expand Down Expand Up @@ -240,7 +244,11 @@ def get_region(region_string):
"""
if region_string:
# separate the chromosome name and the location using the ':' character
chrom, position = region_string.strip().split(":")
try:
chrom, position = region_string.strip().split(":")
except ValueError:
# It can be a full chromosome:
return region_string.strip(), 0, HUGE_NUMBER

# clean up the position
for char in ",.;|!{}()":
Expand All @@ -254,7 +262,7 @@ def get_region(region_string):
try:
region_end = int(position_list[1])
except IndexError:
region_end = 1e15 # a huge number
region_end = HUGE_NUMBER
if region_start < 0:
region_start = 0
if region_end <= region_start:
Expand All @@ -269,35 +277,53 @@ def get_region(region_string):
def main(args=None):

args = parse_arguments().parse_args(args)
trp = PlotTracks(args.tracks.name, args.width, fig_height=args.height, fontsize=args.fontSize, dpi=args.dpi, track_label_width=args.trackLabelFraction)

# Identify the regions to plot:
if args.BED:
count = 0
regions = []
for line in args.BED.readlines():
count += 1
try:
chrom, start, end = line.strip().split('\t')[0:3]
except ValueError:
continue
try:
start, end = map(int, [start, end])
except ValueError as detail:
sys.stderr.write("Invalid value found at line\t{}\t. {}\n".format(line, detail))
name = args.outFileName.split(".")
file_suffix = name[-1]
file_prefix = ".".join(name[:-1])

warnings.warn("Invalid value found at line\t{}\t. {}\n".format(line, detail))
continue
regions.append((chrom, start, end))
else:
regions = [get_region(args.region)]

if len(regions) == 0:
raise InputError("There is no valid regions to plot.")

# Try to find a region to get the data:
pRegion = None
if len(set([r[0] for r in regions])) == 1:
chrom = regions[0][0]
start = min([r[1] for r in regions])
end = max([r[2] for r in regions])
pRegion = [chrom, start, end]

# Create all the tracks
trp = PlotTracks(args.tracks.name, args.width, fig_height=args.height,
fontsize=args.fontSize, dpi=args.dpi,
track_label_width=args.trackLabelFraction,
pRegion=pRegion)

# Plot them
if args.BED:
name = args.outFileName.split(".")
file_suffix = name[-1]
file_prefix = ".".join(name[:-1])
for chrom, start, end in regions:
file_name = "{}_{}-{}-{}.{}".format(file_prefix, chrom, start, end, file_suffix)
if end - start < 200000:
sys.stderr.write("A region shorter than 200kb has been "
"detected! This can be too small to return "
"a proper TAD plot!\n")
# start -= 100000
# start = max(0, start)
# end += 100000
warnings.warn("A region shorter than 200kb has been "
"detected! This can be too small to return "
"a proper TAD plot!\n")
sys.stderr.write("saving {}\n".format(file_name))
# print("{} {} {}".format(chrom, start, end))
trp.plot(file_name, chrom, start, end, title=args.title)
else:
region = get_region(args.region)
trp.plot(args.outFileName, *region, title=args.title)
trp.plot(args.outFileName, *regions[0], title=args.title)
6 changes: 5 additions & 1 deletion pygenometracks/readBed.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@ def __init__(self, file_handle):
self.file_handle = file_handle
self.line_number = 0
# guess file type
fields = self.get_no_comment_line()
try:
fields = self.get_no_comment_line()
except StopIteration:
# The file is empty
fields = "chrDoesNotExists\t0\t1"
fields = to_string(fields)
fields = fields.split('\t')

Expand Down
2 changes: 1 addition & 1 deletion pygenometracks/tests/generateAllOutput.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pgt --tracks ./pygenometracks/tests/test_data/epilogos.ini --region X:3100000-31

# test_hiCMatrixTracks:
pgt --tracks ./pygenometracks/tests/test_data/browser_tracks_hic.ini --region X:2500000-3500000 --trackLabelFraction 0.23 --width 38 --dpi 130 -o ./pygenometracks/tests/test_data/master_plot_hic.png
pgt --tracks ./pygenometracks/tests/test_data/browser_tracks_hic_rasterize_height.ini --region X:2500000-2600000 --trackLabelFraction 0.23 --width 38 --dpi 10 -o ./pygenometracks/tests/test_data/master_plot_hic_rasterize_height.pdf
pgt --tracks ./pygenometracks/tests/test_data/browser_tracks_hic_rasterize_height.ini --BED ./pygenometracks/tests/test_data/regions_XY.bed --trackLabelFraction 0.23 --width 38 --dpi 10 -o ./pygenometracks/tests/test_data/master_plot_hic_rasterize_height.pdf

# test_make_tracks:
make_tracks_file --trackFiles pygenometracks/tests/test_data/Li_et_al_2015.h5 pygenometracks/tests/test_data/bigwig_chrx_2e6_5e6.bw pygenometracks/tests/test_data/tad_classification.bed pygenometracks/tests/test_data/epilog.qcat.bgz -o pygenometracks/tests/test_data/master_tracks.ini
Expand Down
Binary file not shown.
Binary file not shown.
2 changes: 2 additions & 0 deletions pygenometracks/tests/test_data/regions_XY.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
X 2500000 2600000
Y 0 1000000
34 changes: 33 additions & 1 deletion pygenometracks/tests/test_hiCMatrixTracks.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,40 @@ def test_plot_tracks_with_hic_rasterize_height():
outfile.name).split()
pygenometracks.plotTracks.main(args)
res = compare_images(os.path.join(ROOT,
'master_plot_hic_rasterize_height.pdf'),
'master_plot_hic_rasterize_height'
'_X-2500000-2600000.pdf'),
outfile.name, tolerance)
assert res is None, res

os.remove(outfile.name)


def test_plot_tracks_with_hic_rasterize_height_2chr():

outfile = NamedTemporaryFile(suffix='.pdf', prefix='pyGenomeTracks_test_',
delete=False)
args = "--tracks {0} --BED {1} "\
"--trackLabelFraction 0.23 --width 38 --dpi 130 "\
"--dpi 10 --outFileName {2}" \
"".format(os.path.join(ROOT,
'browser_tracks_hic_rasterize_height.ini'),
os.path.join(ROOT, 'regions_XY.bed'),
outfile.name).split()
pygenometracks.plotTracks.main(args)
first_file = outfile.name[:-4] + '_X-2500000-2600000.pdf'
res = compare_images(os.path.join(ROOT,
'master_plot_hic_rasterize_height'
'_X-2500000-2600000.pdf'),
first_file, tolerance)
assert res is None, res

os.remove(first_file)

second_file = outfile.name[:-4] + '_Y-0-1000000.pdf'
res = compare_images(os.path.join(ROOT,
'master_plot_hic_rasterize_height_Y'
'-0-1000000.pdf'),
second_file, tolerance)
assert res is None, res

os.remove(second_file)
3 changes: 2 additions & 1 deletion pygenometracks/tracks/BedGraphMatrixTrack.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ class BedGraphMatrixTrack(BedGraphTrack):
'show_data_range': True,
'plot_horizontal_lines': False,
'orientation': None,
'rasterize': True}
'rasterize': True,
'region': None} # Cannot be set manually but is set by tracksClass
NECESSARY_PROPERTIES = ['file']
SYNONYMOUS_PROPERTIES = {'max_value': {'auto': None},
'min_value': {'auto': None}}
Expand Down
9 changes: 6 additions & 3 deletions pygenometracks/tracks/BedGraphTrack.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ class BedGraphTrack(GenomeTrack):
'summary_method': None,
'rasterize': False,
'number_of_bins': 700,
'type': 'fill'}
'type': 'fill',
'region': None} # Cannot be set manually but is set by tracksClass
NECESSARY_PROPERTIES = ['file']
SYNONYMOUS_PROPERTIES = {'max_value': {'auto': None},
'min_value': {'auto': None}}
Expand Down Expand Up @@ -95,10 +96,12 @@ def __init__(self, properties_dict):
try:
self.tbx = pysam.TabixFile(self.properties['file'])
except IOError:
self.interval_tree, ymin, ymax = file_to_intervaltree(self.properties['file'])
self.interval_tree, __, __ = file_to_intervaltree(self.properties['file'],
self.properties['region'])
# load the file as an interval tree
else:
self.interval_tree, ymin, ymax = file_to_intervaltree(self.properties['file'])
self.interval_tree, __, __ = file_to_intervaltree(self.properties['file'],
self.properties['region'])

self.num_fields = None

Expand Down
47 changes: 38 additions & 9 deletions pygenometracks/tracks/BedTrack.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,15 @@
import matplotlib.pyplot as plt
from intervaltree import IntervalTree, Interval
import numpy as np
import pybedtools
import sys
import tempfile

DEFAULT_BED_COLOR = '#1f78b4'
DISPLAY_BED_VALID = ['collapsed', 'triangles', 'interleaved', 'stacked']
DISPLAY_BED_SYNONYMOUS = {'interlaced': 'interleaved', 'domain': 'interleaved'}
DEFAULT_DISPLAY_BED = 'stacked'
AROUND_REGION = 100000


class BedTrack(GenomeTrack):
Expand Down Expand Up @@ -121,7 +125,8 @@ class BedTrack(GenomeTrack):
'arrow_interval': 2,
'arrowhead_included': False,
'color_utr': 'grey',
'height_utr': 1}
'height_utr': 1,
'region': None} # Cannot be set manually but is set by tracksClass
NECESSARY_PROPERTIES = ['file']
SYNONYMOUS_PROPERTIES = {'max_value': {'auto': None},
'min_value': {'auto': None},
Expand Down Expand Up @@ -154,7 +159,7 @@ def __init__(self, *args, **kwarg):
# this is bed3, bed4, bed5, bed6, bed8, bed9 or bed12
self.len_w = None # this is the length of the letter 'w' given the font size
self.interval_tree = {} # interval tree of the bed regions
self.interval_tree, min_score, max_score = self.process_bed()
self.interval_tree, min_score, max_score = self.process_bed(self.properties['region'])
if self.colormap is not None:
if self.properties['min_value'] is not None:
min_score = self.properties['min_value']
Expand Down Expand Up @@ -225,15 +230,39 @@ def get_length_w(self, fig_width, region_start, region_end):

return self.len_w

def process_bed(self):

if self.properties['file'].endswith('gtf') or \
self.properties['file'].endswith('gtf.gz'):
bed_file_h = ReadGtf(self.properties['file'],
def process_bed(self, pRegion=None):
is_gtf = self.properties['file'].endswith('gtf') or \
self.properties['file'].endswith('gtf.gz')
file_to_open = self.properties['file']
# Check if we can restrict the interval tree to a region:
if pRegion is not None and not self.properties['global_max_row']:
# I increase the region to get the intervals:
pRegion[1] = max([0, pRegion[1] - AROUND_REGION])
pRegion[2] += AROUND_REGION
# We use pybedtools to overlap:
original_file = pybedtools.BedTool(file_to_open)
# We will overlap with both version of chromosome name:
chrom = self.change_chrom_names(pRegion[0])
bothRegions = ("{0} {1} {2}\n{3} {1} {2}"
.format(*pRegion,
chrom))
region = pybedtools.BedTool(bothRegions, from_string=True)
# Bedtools will put a warning because I am using inconsistent
# nomenclature (with and without chr)
sys.stderr = open(tempfile.NamedTemporaryFile().name, 'w')
try:
file_to_open = original_file.intersect(region, wa=True).fn
except pybedtools.helpers.BEDToolsError:
file_to_open = self.properties['file']
sys.stderr.close()
sys.stderr = sys.__stderr__

if is_gtf:
bed_file_h = ReadGtf(file_to_open,
self.properties['prefered_name'],
self.properties['merge_transcripts'])
else:
bed_file_h = ReadBed(opener(self.properties['file']))
bed_file_h = ReadBed(opener(file_to_open))
self.bed_type = bed_file_h.file_type

if self.properties['color'] == 'bed_rgb' and \
Expand Down Expand Up @@ -263,7 +292,7 @@ def process_bed(self):

if valid_intervals == 0:
self.log.warning("No valid intervals were found in file "
"{}".format(self.properties['file_name']))
"{}\n".format(self.properties['file']))

return interval_tree, min_score, max_score

Expand Down
3 changes: 2 additions & 1 deletion pygenometracks/tracks/EpilogosTrack.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ class EpilogosTrack(BedGraphTrack):
file_type = {}
""".format(TRACK_TYPE)
DEFAULTS_PROPERTIES = {'categories_file': None,
'orientation': None}
'orientation': None,
'region': None} # Cannot be set manually but is set by tracksClass
NECESSARY_PROPERTIES = ['file']
SYNONYMOUS_PROPERTIES = {}
POSSIBLE_PROPERTIES = {'orientation': [None, 'inverted']}
Expand Down
Loading