diff --git a/docs/content/all_default_properties_rst.txt b/docs/content/all_default_properties_rst.txt index c9fff667..7b8d962a 100644 --- a/docs/content/all_default_properties_rst.txt +++ b/docs/content/all_default_properties_rst.txt @@ -3,7 +3,7 @@ parameter :doc:`tracks/x_axis` :doc:`tracks/epilogos` ========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= overlay_previous no no no no no no no no no no no no no where bottom left -fontsize 15 12 12 12 12 +fontsize 15 12 12 12 categories_file not set orientation not set not set not set not set not set not set not set not set not set not set not set links_type arcs @@ -47,12 +47,10 @@ y_axis_values second_file* not set not set operation* file file grid false false -use_middle false rasterize false true true pos_score_in_bin center plot_horizontal_lines false colormap viridis RdYlBu_r -region not set depth 100000 show_masked_bins false scale_factor 1 diff --git a/docs/content/all_possible_properties.txt b/docs/content/all_possible_properties.txt index f1639a4a..1bf04eac 100644 --- a/docs/content/all_possible_properties.txt +++ b/docs/content/all_possible_properties.txt @@ -102,10 +102,6 @@ - for *bigwig, bedgraph*: true, false -- **use_middle**: - - - for *bedgraph*: true, false - - **rasterize**: - for *bedgraph, bedgraph_matrix, hic_matrix*: true, false diff --git a/docs/content/examples.rst b/docs/content/examples.rst index 94d02d1e..3bde59ec 100644 --- a/docs/content/examples.rst +++ b/docs/content/examples.rst @@ -106,12 +106,18 @@ The output file of some 4C-seq pipeline are bedgraph where the coordinates are t .. literalinclude:: ../../pygenometracks/tests/test_data/bedgraph_useMid.ini :language: INI + +We can generate two zooms using a bed instead of regions: + +.. literalinclude:: ../../pygenometracks/tests/test_data/regions_imbricated_chr2.bed .. code:: bash - $ pyGenomeTracks --tracks bedgraph_useMid.ini --region chr2:73,800,000-75,744,000 --trackLabelFraction 0.2 --width 38 --dpi 130 -o master_bedgraph_useMid.pdf + $ pyGenomeTracks --tracks bedgraph_useMid.ini --BED regions_imbricated_chr2.bed --trackLabelFraction 0.2 --width 38 --dpi 130 -o master_bedgraph_useMid.png + +.. image:: ../../pygenometracks/tests/test_data/master_bedgraph_useMid_chr2-73800000-75744000.png -The output is available `here `_. +.. image:: ../../pygenometracks/tests/test_data/master_bedgraph_useMid_chr2-74000000-74800000.png Examples with peaks diff --git a/docs/content/tracks/auto/bedgraph_deduced_from_code.txt b/docs/content/tracks/auto/bedgraph_deduced_from_code.txt index a4f835fe..35c47bb7 100644 --- a/docs/content/tracks/auto/bedgraph_deduced_from_code.txt +++ b/docs/content/tracks/auto/bedgraph_deduced_from_code.txt @@ -46,8 +46,6 @@ Optional: - **grid**: `false` (default) or true. -- **use_middle**: `false` (default) or true. - - **rasterize**: `false` (default) or true. diff --git a/docs/content/tracks/auto/domains_deduced_from_code.txt b/docs/content/tracks/auto/domains_deduced_from_code.txt index 0edb6be3..ac43aab7 100644 --- a/docs/content/tracks/auto/domains_deduced_from_code.txt +++ b/docs/content/tracks/auto/domains_deduced_from_code.txt @@ -10,8 +10,6 @@ Optional: - **overlay_previous**: `no` (default) or yes or share-y. -- **fontsize**: `12` (default) or any float above 0 - - **orientation**: by default this option is not set but you can also put: inverted. - **line_width**: `0.5` (default) or any float above 0 diff --git a/docs/content/tracks/auto/hic_matrix_deduced_from_code.txt b/docs/content/tracks/auto/hic_matrix_deduced_from_code.txt index 5b429b9e..0e1581dc 100644 --- a/docs/content/tracks/auto/hic_matrix_deduced_from_code.txt +++ b/docs/content/tracks/auto/hic_matrix_deduced_from_code.txt @@ -22,8 +22,6 @@ Optional: - **colormap**: `RdYlBu_r` (default) -- **region**: by default this option is not set - - **depth**: `100000` (default) or any integer above 1 - **show_masked_bins**: `false` (default) or true. diff --git a/environment.yml b/environment.yml index 07265480..da9796ed 100644 --- a/environment.yml +++ b/environment.yml @@ -11,10 +11,11 @@ dependencies: - intervaltree >=2.1.0 - pybigwig >=0.3.16 - future >=0.17.0 - - hicmatrix >=12 + - hicmatrix >=13 - pysam >=0.14 - pytest - gffutils >=0.9 + - pybedtools >=0.8.1 - tqdm >=4.20 - pip: - "git+https://github.com/deeptools/pyGenomeTracks.git" diff --git a/pygenometracks/getAllDefaultsAndPossible.py b/pygenometracks/getAllDefaultsAndPossible.py index 554bed32..46bd95f6 100644 --- a/pygenometracks/getAllDefaultsAndPossible.py +++ b/pygenometracks/getAllDefaultsAndPossible.py @@ -65,9 +65,10 @@ def main(): all_default_parameters[p][track_type] = value has_default = False for p, value in track_class.DEFAULTS_PROPERTIES.items(): - all_default_parameters[p] = all_default_parameters.get(p, {}) - all_default_parameters[p][track_type] = value - has_default = True + if p != 'region': + all_default_parameters[p] = all_default_parameters.get(p, {}) + all_default_parameters[p][track_type] = value + has_default = True if has_default: all_tracks_with_default += [track_type] diff --git a/pygenometracks/plotTracks.py b/pygenometracks/plotTracks.py index 05168eb4..b1e050d6 100644 --- a/pygenometracks/plotTracks.py +++ b/pygenometracks/plotTracks.py @@ -143,14 +143,14 @@ import sys import argparse -import matplotlib -matplotlib.use('Agg') +import warnings from pygenometracks.tracksClass import PlotTracks from pygenometracks._version import __version__ from .utilities import InputError DEFAULT_FIGURE_WIDTH = 40 # in centimeters +HUGE_NUMBER = 1e15 # Which should be above any chromosome size def parse_arguments(args=None): @@ -244,7 +244,11 @@ def get_region(region_string): """ if region_string: # separate the chromosome name and the location using the ':' character - chrom, position = region_string.strip().split(":") + try: + chrom, position = region_string.strip().split(":") + except ValueError: + # It can be a full chromosome: + return region_string.strip(), 0, HUGE_NUMBER # clean up the position for char in ",.;|!{}()": @@ -258,7 +262,7 @@ def get_region(region_string): try: region_end = int(position_list[1]) except IndexError: - region_end = 1e15 # a huge number + region_end = HUGE_NUMBER if region_start < 0: region_start = 0 if region_end <= region_start: @@ -273,14 +277,11 @@ def get_region(region_string): def main(args=None): args = parse_arguments().parse_args(args) - trp = PlotTracks(args.tracks.name, args.width, fig_height=args.height, - fontsize=args.fontSize, dpi=args.dpi, - track_label_width=args.trackLabelFraction) + # Identify the regions to plot: if args.BED: - count = 0 + regions = [] for line in args.BED.readlines(): - count += 1 try: chrom, start, end = line.strip().split('\t')[0:3] except ValueError: @@ -288,27 +289,38 @@ def main(args=None): try: start, end = map(int, [start, end]) except ValueError as detail: - sys.stderr.write("Invalid value found at line\t{}\t. {}\n".format(line, detail)) - name = args.outFileName.split(".") - file_suffix = name[-1] - file_prefix = ".".join(name[:-1]) + warnings.warn("Invalid value found at line\t{}\t. {}\n".format(line, detail)) + continue + regions.append((chrom, start, end)) + else: + regions = [get_region(args.region)] + if len(regions) == 0: + raise InputError("There is no valid regions to plot.") + + # Create all the tracks + trp = PlotTracks(args.tracks.name, args.width, fig_height=args.height, + fontsize=args.fontSize, dpi=args.dpi, + track_label_width=args.trackLabelFraction, + plot_regions=regions) + + # Plot them + if args.BED: + name = args.outFileName.split(".") + file_suffix = name[-1] + file_prefix = ".".join(name[:-1]) + for chrom, start, end in regions: file_name = "{}_{}-{}-{}.{}".format(file_prefix, chrom, start, end, file_suffix) if end - start < 200000: - sys.stderr.write("A region shorter than 200kb has been " - "detected! This can be too small to return " - "a proper TAD plot!\n") - # start -= 100000 - # start = max(0, start) - # end += 100000 + warnings.warn("A region shorter than 200kb has been " + "detected! This can be too small to return " + "a proper TAD plot!\n") sys.stderr.write("saving {}\n".format(file_name)) - # print("{} {} {}".format(chrom, start, end)) trp.plot(file_name, chrom, start, end, title=args.title, h_align_titles=args.trackLabelHAlign, decreasing_x_axis=args.decreasingXAxis) else: - region = get_region(args.region) - trp.plot(args.outFileName, *region, title=args.title, + trp.plot(args.outFileName, *regions[0], title=args.title, h_align_titles=args.trackLabelHAlign, decreasing_x_axis=args.decreasingXAxis) trp.close_files() diff --git a/pygenometracks/tests/generateAllOutput.sh b/pygenometracks/tests/generateAllOutput.sh index dee0be99..605b2887 100644 --- a/pygenometracks/tests/generateAllOutput.sh +++ b/pygenometracks/tests/generateAllOutput.sh @@ -18,8 +18,7 @@ bin/pgt --tracks ./pygenometracks/tests/test_data/bed_colormap_genes.ini --regio bin/pgt --tracks ./pygenometracks/tests/test_data/bedgraph.ini --region X:2850000-3150000 --trackLabelFraction 0.2 --dpi 130 -o ./pygenometracks/tests/test_data/master_bedgraph.png # test bedGraphTrack: -bin/pgt --tracks ./pygenometracks/tests/test_data/bedgraph_useMid.ini --region chr2:73,800,000-75,744,000 --trackLabelFraction 0.2 --width 38 --dpi 130 -o ./pygenometracks/tests/test_data/master_bedgraph_useMid.png -bin/pgt --tracks ./pygenometracks/tests/test_data/bedgraph_useMid.ini --region chr2:74,000,000-74,800,000 --trackLabelFraction 0.2 --width 38 --dpi 130 -o ./pygenometracks/tests/test_data/master_bedgraph_useMid_zoom.png +bin/pgt --tracks ./pygenometracks/tests/test_data/bedgraph_useMid.ini --BED ./pygenometracks/tests/test_data/regions_imbricated_chr2.bed --trackLabelFraction 0.2 --width 38 --dpi 130 -o ./pygenometracks/tests/test_data/master_bedgraph_useMid.png bin/pgt --tracks ./pygenometracks/tests/test_data/bedgraph_useMid.ini --region chr2:73,800,000-75,744,000 --trackLabelFraction 0.2 --width 38 --dpi 130 -o ./pygenometracks/tests/test_data/master_bedgraph_useMid.pdf bin/pgt --tracks ./pygenometracks/tests/test_data/operation_bdg.ini --region X:2700000-3100000 --trackLabelFraction 0.2 --dpi 130 -o ./pygenometracks/tests/test_data/master_operation_bdg.png @@ -35,11 +34,14 @@ bin/pgt --tracks ./pygenometracks/tests/test_data/epilogos.ini --region X:310000 # test_hiCMatrixTracks: bin/pgt --tracks ./pygenometracks/tests/test_data/browser_tracks_hic.ini --region X:2500000-3500000 --trackLabelFraction 0.23 --width 38 --dpi 130 -o ./pygenometracks/tests/test_data/master_plot_hic.png -bin/pgt --tracks ./pygenometracks/tests/test_data/browser_tracks_hic_rasterize_height.ini --region X:2500000-2600000 --trackLabelFraction 0.23 --width 38 --dpi 10 -o ./pygenometracks/tests/test_data/master_plot_hic_rasterize_height.pdf +bin/pgt --tracks ./pygenometracks/tests/test_data/browser_tracks_hic_rasterize_height.ini --BED ./pygenometracks/tests/test_data/regions_XY.bed --trackLabelFraction 0.23 --width 38 --dpi 10 -o ./pygenometracks/tests/test_data/master_plot_hic_rasterize_height.pdf bin/pgt --tracks ./pygenometracks/tests/test_data/browser_tracks_hic_log-log.ini --region X:2500000-3500000 --trackLabelFraction 0.23 --width 38 --dpi 130 -o ./pygenometracks/tests/test_data/master_plot_hic_log-log.png bin/pgt --tracks ./pygenometracks/tests/test_data/browser_tracks_hic.ini --region X:2500000-3500000 --trackLabelFraction 0.23 --width 38 --dpi 130 --decreasingXAxis -o ./pygenometracks/tests/test_data/master_plot_hic_dec.png -bin/pgt --tracks ./pygenometracks/tests/test_data/browser_tracks_hic_small_test_2.ini --region chr1:0-100000 --trackLabelFraction 0.23 --width 38 --dpi 130 -o ./pygenometracks/tests/test_data/master_hic_small_test_2.png +bin/pgt --tracks ./pygenometracks/tests/test_data/browser_tracks_hic_small_test.ini --BED ./pygenometracks/tests/test_data/regions_chr1XY.bed --trackLabelFraction 0.23 --width 38 -o ./pygenometracks/tests/test_data/master_plot_hic_small_test.png +bin/pgt --tracks ./pygenometracks/tests/test_data/browser_tracks_hic_small_test.ini --region 1:0-200000 --trackLabelFraction 0.23 --width 38 -o ./pygenometracks/tests/test_data/master_plot_hic_small_test.png +bin/pgt --tracks ./pygenometracks/tests/test_data/browser_tracks_hic_small_test.ini --region chrM:0-20000 --trackLabelFraction 0.23 --width 38 -o ./pygenometracks/tests/test_data/master_plot_hic_small_test_chrM.png bin/pgt --tracks ./pygenometracks/tests/test_data/browser_tracks_hic_small_test.ini --region chr1:0-5000 --trackLabelFraction 0.23 --width 38 --dpi 130 -o ./pygenometracks/tests/test_data/master_hic_small_test_small_region.png +bin/pgt --tracks ./pygenometracks/tests/test_data/browser_tracks_hic_small_test_2.ini --region chr1:0-100000 --trackLabelFraction 0.23 --width 38 --dpi 130 -o ./pygenometracks/tests/test_data/master_hic_small_test_2.png # test_logScale: bin/pgt --tracks ./pygenometracks/tests/test_data/log1p.ini --region X:2700000-3100000 --trackLabelFraction 0.2 --dpi 130 -o ./pygenometracks/tests/test_data/master_log1p.png diff --git a/pygenometracks/tests/test_bedGraphTrack.py b/pygenometracks/tests/test_bedGraphTrack.py index ba8389ca..c408bc5d 100644 --- a/pygenometracks/tests/test_bedGraphTrack.py +++ b/pygenometracks/tests/test_bedGraphTrack.py @@ -134,38 +134,51 @@ tolerance = 13 # default matplotlib pixed difference tolerance -def test_plot_bedgraph_tracks(): +def test_plot_bedgraph_tracks_with_bed(): + extension = '.png' - outfile = NamedTemporaryFile(suffix='.png', prefix='pyGenomeTracks_test_', delete=False) - args = "--tracks {0} --region chr2:73,800,000-75,744,000 "\ - "--trackLabelFraction 0.2 --width 38 --dpi 130 " \ - "--outFileName {1}" \ - "".format(os.path.join(ROOT, "bedgraph_useMid.ini"), - outfile.name).split() - pygenometracks.plotTracks.main(args) - print("saving test to {}".format(outfile.name)) - res = compare_images(os.path.join(ROOT, 'master_bedgraph_useMid.png'), - outfile.name, tolerance) - assert res is None, res - - os.remove(outfile.name) - - -def test_plot_bedgraph_tracks_zoom(): - - outfile = NamedTemporaryFile(suffix='.png', prefix='pyGenomeTracks_test_', delete=False) - args = "--tracks {0} --region chr2:74,000,000-74,800,000 "\ + outfile = NamedTemporaryFile(suffix=extension, prefix='pyGenomeTracks_test_', + delete=False) + args = "--tracks {0} --BED {1} "\ "--trackLabelFraction 0.2 --width 38 --dpi 130 " \ - "--outFileName {1}" \ + "--outFileName {2}" \ "".format(os.path.join(ROOT, "bedgraph_useMid.ini"), + os.path.join(ROOT, 'regions_imbricated_chr2.bed'), outfile.name).split() pygenometracks.plotTracks.main(args) - print("saving test to {}".format(outfile.name)) - res = compare_images(os.path.join(ROOT, 'master_bedgraph_useMid_zoom.png'), - outfile.name, tolerance) - assert res is None, res - - os.remove(outfile.name) + for region in ['chr2:73800000-75744000', 'chr2:74000000-74800000']: + region_str = region.replace(':', '-') + output_file = outfile.name[:-4] + '_' + region_str + extension + res = compare_images(os.path.join(ROOT, + 'master_bedgraph_useMid_' + + region_str + extension), + output_file, tolerance) + assert res is None, res + + os.remove(output_file) + + +def test_plot_bedgraph_tracks_individual(): + extension = '.png' + + for region in ['chr2:73800000-75744000', 'chr2:74000000-74800000']: + outfile = NamedTemporaryFile(suffix=extension, prefix='pyGenomeTracks_test_', + delete=False) + args = "--tracks {0} --region {1} "\ + "--trackLabelFraction 0.2 --width 38 --dpi 130 " \ + "--outFileName {2}" \ + "".format(os.path.join(ROOT, "bedgraph_useMid.ini"), + region, + outfile.name).split() + pygenometracks.plotTracks.main(args) + region_str = region.replace(':', '-') + res = compare_images(os.path.join(ROOT, + 'master_bedgraph_useMid_' + + region_str + extension), + outfile.name, tolerance) + assert res is None, res + + os.remove(outfile.name) def test_plot_bedgraph_tracks_rasterize(): diff --git a/pygenometracks/tests/test_data/browser_tracks_hic_rasterize_height.ini b/pygenometracks/tests/test_data/browser_tracks_hic_rasterize_height.ini index e9d46f00..6bca07c6 100644 --- a/pygenometracks/tests/test_data/browser_tracks_hic_rasterize_height.ini +++ b/pygenometracks/tests/test_data/browser_tracks_hic_rasterize_height.ini @@ -1,6 +1,6 @@ [hic matrix] -file = Li_et_al_2015.h5 +file = Li_et_al_2015.cool title = depth = 200000; transform = log1p; min_value = 5; height = 5 depth = 200000 min_value = 5 diff --git a/pygenometracks/tests/test_data/master_bedgraph_useMid.png b/pygenometracks/tests/test_data/master_bedgraph_useMid.png deleted file mode 100644 index d8c3149f..00000000 Binary files a/pygenometracks/tests/test_data/master_bedgraph_useMid.png and /dev/null differ diff --git a/pygenometracks/tests/test_data/master_bedgraph_useMid_chr2-73800000-75744000.png b/pygenometracks/tests/test_data/master_bedgraph_useMid_chr2-73800000-75744000.png new file mode 100644 index 00000000..e4d331ba Binary files /dev/null and b/pygenometracks/tests/test_data/master_bedgraph_useMid_chr2-73800000-75744000.png differ diff --git a/pygenometracks/tests/test_data/master_bedgraph_useMid_chr2-74000000-74800000.png b/pygenometracks/tests/test_data/master_bedgraph_useMid_chr2-74000000-74800000.png new file mode 100644 index 00000000..b8276f2e Binary files /dev/null and b/pygenometracks/tests/test_data/master_bedgraph_useMid_chr2-74000000-74800000.png differ diff --git a/pygenometracks/tests/test_data/master_bedgraph_useMid_pdf.png b/pygenometracks/tests/test_data/master_bedgraph_useMid_pdf.png new file mode 100644 index 00000000..42c40e2c Binary files /dev/null and b/pygenometracks/tests/test_data/master_bedgraph_useMid_pdf.png differ diff --git a/pygenometracks/tests/test_data/master_bedgraph_useMid_zoom.png b/pygenometracks/tests/test_data/master_bedgraph_useMid_zoom.png deleted file mode 100644 index 9d296191..00000000 Binary files a/pygenometracks/tests/test_data/master_bedgraph_useMid_zoom.png and /dev/null differ diff --git a/pygenometracks/tests/test_data/master_plot_hic_rasterize_height.pdf b/pygenometracks/tests/test_data/master_plot_hic_rasterize_height_X-2500000-2600000.pdf similarity index 99% rename from pygenometracks/tests/test_data/master_plot_hic_rasterize_height.pdf rename to pygenometracks/tests/test_data/master_plot_hic_rasterize_height_X-2500000-2600000.pdf index 41562985..1b15f730 100644 Binary files a/pygenometracks/tests/test_data/master_plot_hic_rasterize_height.pdf and b/pygenometracks/tests/test_data/master_plot_hic_rasterize_height_X-2500000-2600000.pdf differ diff --git a/pygenometracks/tests/test_data/master_plot_hic_rasterize_height_Y-0-1000000.pdf b/pygenometracks/tests/test_data/master_plot_hic_rasterize_height_Y-0-1000000.pdf new file mode 100644 index 00000000..7734e447 Binary files /dev/null and b/pygenometracks/tests/test_data/master_plot_hic_rasterize_height_Y-0-1000000.pdf differ diff --git a/pygenometracks/tests/test_data/master_plot_hic_rasterize_height_pdf.png b/pygenometracks/tests/test_data/master_plot_hic_rasterize_height_pdf.png deleted file mode 100644 index 64effd42..00000000 Binary files a/pygenometracks/tests/test_data/master_plot_hic_rasterize_height_pdf.png and /dev/null differ diff --git a/pygenometracks/tests/test_data/master_plot_hic_small_test.png b/pygenometracks/tests/test_data/master_plot_hic_small_test.png new file mode 100644 index 00000000..53dcfcf4 Binary files /dev/null and b/pygenometracks/tests/test_data/master_plot_hic_small_test.png differ diff --git a/pygenometracks/tests/test_data/master_plot_hic_small_test_chr1-0-500000.png b/pygenometracks/tests/test_data/master_plot_hic_small_test_chr1-0-500000.png new file mode 100644 index 00000000..d15892f0 Binary files /dev/null and b/pygenometracks/tests/test_data/master_plot_hic_small_test_chr1-0-500000.png differ diff --git a/pygenometracks/tests/test_data/master_plot_hic_small_test_chrM.png b/pygenometracks/tests/test_data/master_plot_hic_small_test_chrM.png new file mode 100644 index 00000000..b705d689 Binary files /dev/null and b/pygenometracks/tests/test_data/master_plot_hic_small_test_chrM.png differ diff --git a/pygenometracks/tests/test_data/master_plot_hic_small_test_chrX-2500000-2600000.png b/pygenometracks/tests/test_data/master_plot_hic_small_test_chrX-2500000-2600000.png new file mode 100644 index 00000000..fea77fc9 Binary files /dev/null and b/pygenometracks/tests/test_data/master_plot_hic_small_test_chrX-2500000-2600000.png differ diff --git a/pygenometracks/tests/test_data/master_plot_hic_small_test_chrY-0-1000000.png b/pygenometracks/tests/test_data/master_plot_hic_small_test_chrY-0-1000000.png new file mode 100644 index 00000000..a1d1d5bc Binary files /dev/null and b/pygenometracks/tests/test_data/master_plot_hic_small_test_chrY-0-1000000.png differ diff --git a/pygenometracks/tests/test_data/regions_XY.bed b/pygenometracks/tests/test_data/regions_XY.bed new file mode 100644 index 00000000..16267802 --- /dev/null +++ b/pygenometracks/tests/test_data/regions_XY.bed @@ -0,0 +1,2 @@ +X 2500000 2600000 +Y 0 1000000 diff --git a/pygenometracks/tests/test_data/regions_chr1XY.bed b/pygenometracks/tests/test_data/regions_chr1XY.bed new file mode 100644 index 00000000..a1f74c4b --- /dev/null +++ b/pygenometracks/tests/test_data/regions_chr1XY.bed @@ -0,0 +1,3 @@ +chr1 0 500000 +chrX 2500000 2600000 +chrY 0 1000000 diff --git a/pygenometracks/tests/test_data/regions_imbricated_chr2.bed b/pygenometracks/tests/test_data/regions_imbricated_chr2.bed new file mode 100644 index 00000000..adef7d87 --- /dev/null +++ b/pygenometracks/tests/test_data/regions_imbricated_chr2.bed @@ -0,0 +1,3 @@ +track type=bed name=regions_to_plot +chr2 73800000 75744000 +chr2 74000000 74800000 \ No newline at end of file diff --git a/pygenometracks/tests/test_hiCMatrixTracks.py b/pygenometracks/tests/test_hiCMatrixTracks.py index 4f09201f..adffaacf 100644 --- a/pygenometracks/tests/test_hiCMatrixTracks.py +++ b/pygenometracks/tests/test_hiCMatrixTracks.py @@ -119,7 +119,7 @@ browser_tracks_with_hic = """ [hic matrix] -file = Li_et_al_2015.h5 +file = Li_et_al_2015.cool title = depth = 200000; transform = log1p; min_value = 5; height = 5 depth = 200000 min_value = 5 @@ -298,19 +298,138 @@ def test_plot_hic_logmlog(): os.remove(outfile.name) -def test_plot_tracks_with_hic_rasterize_height(): +def test_plot_tracks_with_hic_rasterize_height_2chr(): + extension = '.pdf' - outfile = NamedTemporaryFile(suffix='.pdf', prefix='pyGenomeTracks_test_', + outfile = NamedTemporaryFile(suffix=extension, prefix='pyGenomeTracks_test_', delete=False) - args = "--tracks {0} --region X:2500000-2600000 "\ - "--trackLabelFraction 0.23 --width 38 " \ - "--dpi 10 --outFileName {1}" \ + args = "--tracks {0} --BED {1} "\ + "--trackLabelFraction 0.23 --width 38 "\ + "--dpi 10 --outFileName {2}" \ "".format(os.path.join(ROOT, 'browser_tracks_hic_rasterize_height.ini'), + os.path.join(ROOT, 'regions_XY.bed'), + outfile.name).split() + pygenometracks.plotTracks.main(args) + for region in ['X:2500000-2600000', 'Y:0-1000000']: + region_str = region.replace(':', '-') + output_file = outfile.name[:-4] + '_' + region_str + extension + res = compare_images(os.path.join(ROOT, + 'master_plot_hic_rasterize_height_' + + region_str + extension), + output_file, tolerance) + assert res is None, res + + os.remove(output_file) + + +def test_plot_tracks_with_hic_rasterize_height_2chr_individual(): + for region in ['X:2500000-2600000', 'Y:0-1000000']: + outfile = NamedTemporaryFile(suffix='.pdf', prefix='pyGenomeTracks_test_', + delete=False) + args = "--tracks {0} --region {1} "\ + "--trackLabelFraction 0.23 --width 38 "\ + "--dpi 10 --outFileName {2}" \ + "".format(os.path.join(ROOT, + 'browser_tracks_hic_rasterize_height.ini'), + region, + outfile.name).split() + pygenometracks.plotTracks.main(args) + res = compare_images(os.path.join(ROOT, + 'master_plot_hic_rasterize_height_' + + region.replace(':', '-') + '.pdf'), + outfile.name, tolerance) + assert res is None, res + + +def test_plot_tracks_with_hic_small_test(): + extension = '.png' + + outfile = NamedTemporaryFile(suffix=extension, prefix='pyGenomeTracks_test_', + delete=False) + args = "--tracks {} --BED {} "\ + "--trackLabelFraction 0.23 --width 38 " \ + "--outFileName {}" \ + "".format(os.path.join(ROOT, + 'browser_tracks_hic_small_test.ini'), + os.path.join(ROOT, 'regions_chr1XY.bed'), + outfile.name).split() + pygenometracks.plotTracks.main(args) + for region in ['chr1:0-500000', 'chrX:2500000-2600000', 'chrY:0-1000000']: + region_str = region.replace(':', '-') + output_file = outfile.name[:-4] + '_' + region_str + extension + res = compare_images(os.path.join(ROOT, + 'master_plot_hic_small_test_' + + region_str + extension), + output_file, tolerance) + assert res is None, res + + os.remove(output_file) + + +# The tests with individual chromosome does not give the same result: +# For the empty the problem is the colorbar which is different +# when you do not load all data and the transformation of nan to 0 +# when the matrix is not empty + +def test_plot_tracks_with_hic_small_test_individual(): + extension = '.png' + for region in ['chr1:0-500000']: # , 'chrX:2500000-2600000', 'chrY:-0-1000000']: + + outfile = NamedTemporaryFile(suffix=extension, prefix='pyGenomeTracks_test_', + delete=False) + args = "--tracks {} --region {} "\ + "--trackLabelFraction 0.23 --width 38 " \ + "--outFileName {}" \ + "".format(os.path.join(ROOT, + 'browser_tracks_hic_small_test.ini'), + region, + outfile.name).split() + pygenometracks.plotTracks.main(args) + region_str = region.replace(':', '-') + res = compare_images(os.path.join(ROOT, + 'master_plot_hic_small_test_' + + region_str + extension), + outfile.name, tolerance) + assert res is None, res + + os.remove(outfile.name) + + +def test_plot_tracks_with_hic_small_other_chr_name(): + region = '1:0-200000' + outfile = NamedTemporaryFile(suffix='.png', prefix='pyGenomeTracks_test_', + delete=False) + args = "--tracks {} --region {} "\ + "--trackLabelFraction 0.23 --width 38 " \ + "--outFileName {}" \ + "".format(os.path.join(ROOT, + 'browser_tracks_hic_small_test.ini'), + region, + outfile.name).split() + pygenometracks.plotTracks.main(args) + res = compare_images(os.path.join(ROOT, + 'master_plot_hic_small_test.png'), + outfile.name, tolerance) + assert res is None, res + + os.remove(outfile.name) + + +def test_plot_tracks_with_hic_small_above_chr_length(): + region = 'chrM:0-20000' + outfile = NamedTemporaryFile(suffix='.png', prefix='pyGenomeTracks_test_', + delete=False) + args = "--tracks {} --region {} "\ + "--trackLabelFraction 0.23 --width 38 " \ + "--outFileName {}" \ + "".format(os.path.join(ROOT, + 'browser_tracks_hic_small_test.ini'), + region, outfile.name).split() pygenometracks.plotTracks.main(args) res = compare_images(os.path.join(ROOT, - 'master_plot_hic_rasterize_height.pdf'), + 'master_plot_hic_small_test_chrM.png'), outfile.name, tolerance) assert res is None, res diff --git a/pygenometracks/tracks/BedGraphMatrixTrack.py b/pygenometracks/tracks/BedGraphMatrixTrack.py index 5594fe30..07b5961c 100644 --- a/pygenometracks/tracks/BedGraphMatrixTrack.py +++ b/pygenometracks/tracks/BedGraphMatrixTrack.py @@ -52,6 +52,7 @@ class BedGraphMatrixTrack(BedGraphTrack): 'plot_horizontal_lines': False, 'orientation': None, 'rasterize': True, + 'region': None, # Cannot be set manually but is set by tracksClass 'colormap': DEFAULT_BEDGRAPHMATRIX_COLORMAP} NECESSARY_PROPERTIES = ['file'] SYNONYMOUS_PROPERTIES = {'max_value': {'auto': None}, diff --git a/pygenometracks/tracks/BedGraphTrack.py b/pygenometracks/tracks/BedGraphTrack.py index 3d2dbe6d..30349a7d 100644 --- a/pygenometracks/tracks/BedGraphTrack.py +++ b/pygenometracks/tracks/BedGraphTrack.py @@ -1,5 +1,5 @@ from . GenomeTrack import GenomeTrack -from .. utilities import file_to_intervaltree, plot_coverage, InputError, transform +from .. utilities import file_to_intervaltree, plot_coverage, InputError, transform, change_chrom_names import numpy as np import pyBigWig import tempfile @@ -94,6 +94,7 @@ class BedGraphTrack(GenomeTrack): 'rasterize': False, 'number_of_bins': 700, 'type': 'fill', + 'region': None, # Cannot be set manually but is set by tracksClass 'transform': 'no', 'log_pseudocount': 0, 'y_axis_values': 'transformed', @@ -189,10 +190,13 @@ def load_file(self): try: self.tbx = pysam.TabixFile(self.properties['file']) except IOError: - self.interval_tree, __, __ = file_to_intervaltree(self.properties['file']) + self.interval_tree, __, __ = file_to_intervaltree(self.properties['file'], + self.properties['region']) # load the file as an interval tree else: - self.interval_tree, __, __ = file_to_intervaltree(self.properties['file']) + self.interval_tree, __, __ = file_to_intervaltree(self.properties['file'], + self.properties['region']) + self.num_fields = None def _get_row_data(self, row, tbx_var='self.tbx'): @@ -247,7 +251,7 @@ def get_scores(self, chrom_region, start_region, end_region, if tbx is not None: if chrom_region not in tbx.contigs: chrom_region_before = chrom_region - chrom_region = self.change_chrom_names(chrom_region) + chrom_region = change_chrom_names(chrom_region) if chrom_region not in tbx.contigs: self.log.warning("*Warning*\nNeither " + chrom_region_before + " nor " @@ -265,7 +269,7 @@ def get_scores(self, chrom_region, start_region, end_region, inttree = eval(inttree_var) if chrom_region not in list(inttree): chrom_region_before = chrom_region - chrom_region = self.change_chrom_names(chrom_region) + chrom_region = change_chrom_names(chrom_region) if chrom_region not in list(inttree): self.log.warning("*Warning*\nNeither " + chrom_region_before + " nor " diff --git a/pygenometracks/tracks/BedTrack.py b/pygenometracks/tracks/BedTrack.py index f90f94ef..1d983f8f 100644 --- a/pygenometracks/tracks/BedTrack.py +++ b/pygenometracks/tracks/BedTrack.py @@ -3,7 +3,7 @@ # To remove next 1.0 from .. readGtf import ReadGtf # End to remove -from .. utilities import opener, get_length_w, count_lines +from .. utilities import opener, get_length_w, count_lines, temp_file_from_intersect, change_chrom_names import matplotlib from matplotlib import font_manager from matplotlib.patches import Rectangle, Polygon @@ -17,6 +17,7 @@ DISPLAY_BED_VALID = ['collapsed', 'triangles', 'interleaved', 'stacked'] DISPLAY_BED_SYNONYMOUS = {'interlaced': 'interleaved', 'domain': 'interleaved'} DEFAULT_DISPLAY_BED = 'stacked' +AROUND_REGION = 100000 class BedTrack(GenomeTrack): @@ -124,6 +125,7 @@ class BedTrack(GenomeTrack): 'arrowhead_included': False, 'color_utr': 'grey', 'height_utr': 1, + 'region': None, # Cannot be set manually but is set by tracksClass 'arrow_length': None, 'all_labels_inside': False, 'labels_in_margin': False} @@ -162,7 +164,7 @@ def __init__(self, *args, **kwarg): # this is bed3, bed4, bed5, bed6, bed8, bed9 or bed12 self.len_w = None # this is the length of the letter 'w' given the font size self.interval_tree = {} # interval tree of the bed regions - self.interval_tree, min_score, max_score = self.process_bed() + self.interval_tree, min_score, max_score = self.process_bed(self.properties['region']) if self.colormap is not None: if self.properties['min_value'] is not None: min_score = self.properties['min_value'] @@ -217,7 +219,13 @@ def set_properties_defaults(self): # to set the distance between rows self.row_scale = 2.3 - def get_bed_handler(self): + def get_bed_handler(self, plot_regions=None): + if not self.properties['global_max_row']: + # I do the intersection: + file_to_open = temp_file_from_intersect(self.properties['file'], + plot_regions, AROUND_REGION) + else: + file_to_open = self.properties['file'] # To remove in next 1.0 if self.properties['file'].endswith('gtf') or \ self.properties['file'].endswith('gtf.gz'): @@ -228,21 +236,21 @@ def get_bed_handler(self): " use file_type = gtf." "".format(self.properties['section_name'], self.TRACK_TYPE)) - bed_file_h = ReadGtf(self.properties['file'], + bed_file_h = ReadGtf(file_to_open, self.properties['prefered_name'], self.properties['merge_transcripts']) total_length = bed_file_h.length else: # end of remove - total_length = count_lines(opener(self.properties['file']), + total_length = count_lines(opener(file_to_open), asBed=True) - bed_file_h = ReadBed(opener(self.properties['file'])) + bed_file_h = ReadBed(opener(file_to_open)) return(bed_file_h, total_length) - def process_bed(self): + def process_bed(self, plot_regions=None): - bed_file_h, total_length = self.get_bed_handler() + bed_file_h, total_length = self.get_bed_handler(plot_regions) self.bed_type = bed_file_h.file_type if self.properties['color'] == 'bed_rgb' and \ @@ -277,7 +285,7 @@ def process_bed(self): if valid_intervals == 0: self.log.warning("No valid intervals were found in file " - "{}".format(self.properties['file'])) + "{}\n".format(self.properties['file'])) return interval_tree, min_score, max_score @@ -351,7 +359,7 @@ def get_y_pos(self, free_row): def plot(self, ax, chrom_region, start_region, end_region): if chrom_region not in self.interval_tree.keys(): chrom_region_before = chrom_region - chrom_region = self.change_chrom_names(chrom_region) + chrom_region = change_chrom_names(chrom_region) if chrom_region not in self.interval_tree.keys(): self.log.warning("*Warning*\nNeither " + chrom_region_before + " nor " + chrom_region + " existss as a " diff --git a/pygenometracks/tracks/BigWigTrack.py b/pygenometracks/tracks/BigWigTrack.py index 5371cd99..88a3038c 100644 --- a/pygenometracks/tracks/BigWigTrack.py +++ b/pygenometracks/tracks/BigWigTrack.py @@ -1,6 +1,6 @@ from . GenomeTrack import GenomeTrack import numpy as np -from .. utilities import plot_coverage, InputError, transform +from .. utilities import plot_coverage, InputError, transform, change_chrom_names import pyBigWig DEFAULT_BIGWIG_COLOR = '#33a02c' @@ -148,7 +148,7 @@ def plot(self, ax, chrom_region, start_region, end_region): if chrom_region not in self.bw.chroms().keys(): chrom_region_before = chrom_region - chrom_region = self.change_chrom_names(chrom_region) + chrom_region = change_chrom_names(chrom_region) if chrom_region not in self.bw.chroms().keys(): self.log.warning("*Warning*\nNeither " + chrom_region_before + " nor " + chrom_region + " existss as a " @@ -210,7 +210,7 @@ def plot(self, ax, chrom_region, start_region, end_region): chrom_region2 = chrom_region if chrom_region2 not in self.bw2.chroms().keys(): chrom_region_before2 = chrom_region2 - chrom_region2 = self.change_chrom_names(chrom_region2) + chrom_region2 = change_chrom_names(chrom_region2) if chrom_region2 not in self.bw2.chroms().keys(): self.log.warning("*Warning*\nNeither " + chrom_region_before2 + " nor " diff --git a/pygenometracks/tracks/EpilogosTrack.py b/pygenometracks/tracks/EpilogosTrack.py index 90ea0960..06e66fea 100644 --- a/pygenometracks/tracks/EpilogosTrack.py +++ b/pygenometracks/tracks/EpilogosTrack.py @@ -39,7 +39,8 @@ class EpilogosTrack(BedGraphTrack): file_type = {} """.format(TRACK_TYPE) DEFAULTS_PROPERTIES = {'categories_file': None, - 'orientation': None} + 'orientation': None, + 'region': None} # Cannot be set manually but is set by tracksClass NECESSARY_PROPERTIES = ['file'] SYNONYMOUS_PROPERTIES = {} POSSIBLE_PROPERTIES = {'orientation': [None, 'inverted']} diff --git a/pygenometracks/tracks/GenomeTrack.py b/pygenometracks/tracks/GenomeTrack.py index 22658c55..3ba744c5 100644 --- a/pygenometracks/tracks/GenomeTrack.py +++ b/pygenometracks/tracks/GenomeTrack.py @@ -403,22 +403,6 @@ def process_color(self, param, colormap_possible=False, self.properties[param] = valid_colormap return True - @staticmethod - def change_chrom_names(chrom): - """ - Changes UCSC chromosome names to ensembl chromosome names - and vice versa. - """ - # TODO: mapping from chromosome names like mithocondria is missing - if chrom.startswith('chr'): - # remove the chr part from chromosome name - chrom = chrom[3:] - else: - # prefix with 'chr' the chromosome name - chrom = 'chr' + chrom - - return chrom - @staticmethod def check_chrom_str_bytes(iteratable_obj, p_obj): # determine type diff --git a/pygenometracks/tracks/GtfTrack.py b/pygenometracks/tracks/GtfTrack.py index c91b3cbf..a8ff9369 100644 --- a/pygenometracks/tracks/GtfTrack.py +++ b/pygenometracks/tracks/GtfTrack.py @@ -2,12 +2,14 @@ from . BedTrack import BedTrack from .. readGtf import ReadGtf from matplotlib import font_manager +from .. utilities import temp_file_from_intersect import numpy as np DEFAULT_BED_COLOR = '#1f78b4' DISPLAY_BED_VALID = ['collapsed', 'triangles', 'interleaved', 'stacked'] DISPLAY_BED_SYNONYMOUS = {'interlaced': 'interleaved', 'domain': 'interleaved'} DEFAULT_DISPLAY_BED = 'stacked' +AROUND_REGION = 100000 class GtfTrack(BedTrack): @@ -104,6 +106,7 @@ class GtfTrack(BedTrack): 'color_utr': 'grey', 'height_utr': 1, 'arrow_length': None, + 'region': None, # Cannot be set manually but is set by tracksClass 'all_labels_inside': False, 'labels_in_margin': False} NECESSARY_PROPERTIES = ['file'] @@ -147,8 +150,15 @@ def set_properties_defaults(self): # to set the distance between rows self.row_scale = 2.3 - def get_bed_handler(self): - bed_file_h = ReadGtf(self.properties['file'], + def get_bed_handler(self, plot_regions=None): + if not self.properties['global_max_row']: + # I do the intersection: + file_to_open = temp_file_from_intersect(self.properties['file'], + plot_regions, AROUND_REGION) + else: + file_to_open = self.properties['file'] + + bed_file_h = ReadGtf(file_to_open, self.properties['prefered_name'], self.properties['merge_transcripts']) total_length = bed_file_h.length diff --git a/pygenometracks/tracks/HiCMatrixTrack.py b/pygenometracks/tracks/HiCMatrixTrack.py index 62e71f33..c7796883 100644 --- a/pygenometracks/tracks/HiCMatrixTrack.py +++ b/pygenometracks/tracks/HiCMatrixTrack.py @@ -7,6 +7,7 @@ import numpy as np from matplotlib.ticker import LogFormatter from . GenomeTrack import GenomeTrack +from .. utilities import change_chrom_names import logging import itertools @@ -52,7 +53,7 @@ class HiCMatrixTrack(GenomeTrack): # rasterize = false file_type = {} """.format(TRACK_TYPE) - DEFAULTS_PROPERTIES = {'region': None, + DEFAULTS_PROPERTIES = {'region': None, # Cannot be set manually but is set by tracksClass 'depth': 100000, 'orientation': None, 'show_masked_bins': False, @@ -84,27 +85,81 @@ def __init__(self, *args, **kwargs): def set_properties_defaults(self): super(HiCMatrixTrack, self).set_properties_defaults() + # Put default img to None for y axis + self.img = None region = None if self.properties['region'] is not None: - if self.properties['region'][2] == 1e15: - region = [str(self.properties['region'][0])] - elif len(self.properties['region']) == 3: - start = int(self.properties['region'][1]) - self.properties['depth'] - if start < 0: - start = 0 - end = int(self.properties['region'][2]) + self.properties['depth'] - - region = [str(self.properties['region'][0]) + ':' + str(start) + '-' + str(end)] - # try to open with end region + depth to avoid triangle effect in the plot - # if it fails open it with given end region. + # We need to restrict it to a single region because + # HiCMatrix does not accept more + # We check if everything is on a single chrom: + if len(set([r[0] for r in self.properties['region']])) == 1: + chrom = self.properties['region'][0][0] + start = min([r[1] for r in self.properties['region']]) + end = max([r[2] for r in self.properties['region']]) + # I extend of depth to avoid triangle effect in the plot + start = max(0, start - self.properties['depth']) + end += self.properties['depth'] + region = ["{}:{}-{}".format(chrom, start, end)] + # Cooler and thus HiCMatrix with cool file will raise an error if: + # - the file is a cool file and: + # - the region goes over the chromosome size + # or + # - the chromosome is not part of the matrix + + # We need to change the log level because we don't want + # the user to see all the errors raised during the try except + logging.getLogger('hicmatrix').setLevel(logging.CRITICAL) try: - self.hic_ma = HiCMatrix.hiCMatrix(self.properties['file'], pChrnameList=region) - except Exception: - region = [str(self.properties['region'][0]) + ':' + str(start) + '-' + str(self.properties['region'][2])] - self.hic_ma = HiCMatrix.hiCMatrix(self.properties['file'], pChrnameList=region) + self.hic_ma = HiCMatrix.hiCMatrix(self.properties['file'], + pChrnameList=region) + except ValueError as ve: + if region is not None: + if "Unknown sequence label" in str(ve): + rs = region[0].split(':') + chrom_region = rs[0] + chrom_region_before = chrom_region + chrom_region = change_chrom_names(chrom_region) + if len(rs) == 2: + region = ["{}:{}".format(chrom_region, rs[1])] + else: + region = [chrom_region] + try: + self.hic_ma = HiCMatrix.hiCMatrix(self.properties['file'], + pChrnameList=region) + except ValueError as ve2: + if "Unknown sequence label" in str(ve2): + self.log.warning("*Warning*\nNeither " + chrom_region_before + + " nor " + chrom_region + " exists as a " + "chromosome name on the matrix. " + "This will generate an empty track!!\n") + self.hic_ma = HiCMatrix.hiCMatrix() + self.hic_ma.matrix = scipy.sparse.csr_matrix((0, 0)) + elif "Genomic region out of bounds" in str(ve2): + region = [chrom_region] + self.hic_ma = HiCMatrix.hiCMatrix(self.properties['file'], + pChrnameList=region) + else: + raise ve2 + elif "Genomic region out of bounds" in str(ve): + region = [region[0].split(':')[0]] + self.hic_ma = HiCMatrix.hiCMatrix(self.properties['file'], + pChrnameList=region) + else: + raise ve + else: + raise ve + # We put back the log to warning + logging.getLogger('hicmatrix').setLevel(logging.WARNING) if len(self.hic_ma.matrix.data) == 0: - raise Exception("Matrix {} is empty".format(self.properties['file'])) + if region is None: + # This is not due to a restriction of the matrix + raise Exception("Matrix {} is empty".format(self.properties['file'])) + else: + return + # We need to get the size before masking bins because + # HiCMatrix v13 give smaller chromosome_sizes after: + self.chrom_sizes = self.hic_ma.get_chromosome_sizes() if self.properties['show_masked_bins']: pass else: @@ -166,28 +221,38 @@ def set_properties_defaults(self): self.cmap.set_bad('black') def plot(self, ax, chrom_region, region_start, region_end): + if len(self.hic_ma.matrix.data) == 0: + self.log.warning("*Warning*\nThere is no data for the region " + "considered on the matrix. " + "This will generate an empty track!!\n") + self.img = None + return log.debug('chrom_region {}, region_start {}, region_end {}'.format(chrom_region, region_start, region_end)) - chrom_sizes = self.hic_ma.get_chromosome_sizes() - if chrom_region not in chrom_sizes: + if chrom_region not in self.chrom_sizes: chrom_region_before = chrom_region - chrom_region = self.change_chrom_names(chrom_region) - if chrom_region not in chrom_sizes: + chrom_region = change_chrom_names(chrom_region) + if chrom_region not in self.chrom_sizes: self.log.warning("*Warning*\nNeither " + chrom_region_before - + " nor " + chrom_region + " existss as a " + + " nor " + chrom_region + " exists as a " "chromosome name on the matrix. " "This will generate an empty track!!\n") + self.img = None return - chrom_region = self.check_chrom_str_bytes(chrom_sizes, chrom_region) - if region_end > chrom_sizes[chrom_region]: - raise Exception("*Error*\nThe region to plot extends beyond the chromosome size. Please check.\n" - "{} size: {}. Region to plot {}-{}\n".format(chrom_region, chrom_sizes[chrom_region], - region_start, region_end)) + chrom_region = self.check_chrom_str_bytes(self.chrom_sizes, chrom_region) + if region_end > self.chrom_sizes[chrom_region]: + self.log.warning("*Warning*\nThe region to plot extends beyond the chromosome size. Please check.\n" + "{} size: {}. Region to plot {}-{}\n".format(chrom_region, self.chrom_sizes[chrom_region], + region_start, region_end)) - # if self.properties['file'].endswith('.cool'): - # # load now the region to be plotted - # pass + # A chromosome may disappear if it was full of Nan and nan bins were masked: + if chrom_region not in self.hic_ma.get_chromosome_sizes(): + self.log.warning("*Warning*\nThere is no data for the region " + "considered on the matrix. " + "This will generate an empty track!!\n") + self.img = None + return # expand region to plus depth on both sides # to avoid a 45 degree 'cut' on the edges @@ -246,7 +311,10 @@ def plot(self, ax, chrom_region, region_start, region_end): else: # try to use a 'aesthetically pleasant' max value - vmax = np.percentile(matrix.diagonal(1), 80) + try: + vmax = np.percentile(matrix.diagonal(1), 80) + except Exception: + vmax = None if self.properties['min_value'] is not None: vmin = self.properties['min_value'] @@ -277,6 +345,8 @@ def plot(self, ax, chrom_region, region_start, region_end): ax.set_ylim(0, depth) def plot_y_axis(self, cbar_ax, plot_ax): + if self.img is None: + return if self.properties['transform'] in ['log', 'log1p']: # get a useful log scale diff --git a/pygenometracks/tracks/LinksTrack.py b/pygenometracks/tracks/LinksTrack.py index 76bf630f..d27f1d74 100644 --- a/pygenometracks/tracks/LinksTrack.py +++ b/pygenometracks/tracks/LinksTrack.py @@ -5,9 +5,11 @@ import numpy as np import matplotlib.pyplot as plt from matplotlib.patches import Arc, Polygon +from .. utilities import opener, to_string, change_chrom_names, temp_file_from_intersect from tqdm import tqdm DEFAULT_LINKS_COLOR = 'blue' +HUGE_NUMBER = 1e15 # Which should be above any chromosome size class LinksTrack(GenomeTrack): @@ -70,6 +72,7 @@ class LinksTrack(GenomeTrack): 'alpha': 0.8, 'max_value': None, 'min_value': None, + 'region': None, # Cannot be set manually but is set by tracksClass 'ylim': None, 'compact_arcs_level': '0', 'use_middle': False} @@ -98,7 +101,7 @@ class LinksTrack(GenomeTrack): def set_properties_defaults(self): super(LinksTrack, self).set_properties_defaults() self.max_height = None - self.interval_tree, min_score, max_score, has_score = self.process_link_file() + self.interval_tree, min_score, max_score, has_score = self.process_link_file(self.properties['region']) if self.properties['line_width'] is None and not has_score: self.log.warning("*WARNING* for section {}" " no line_width has been set but some " @@ -156,7 +159,7 @@ def plot(self, ax, chrom_region, region_start, region_end): if chrom_region not in list(self.interval_tree): chrom_region_before = chrom_region - chrom_region = self.change_chrom_names(chrom_region) + chrom_region = change_chrom_names(chrom_region) if chrom_region not in list(self.interval_tree): self.log.warning("*Warning*\nNeither " + chrom_region_before + " nor " + chrom_region + " existss as a " @@ -318,81 +321,92 @@ def plot_loops(self, ax, loop): if y2 > self.max_height: self.max_height = y2 - def process_link_file(self): + def process_link_file(self, plot_regions): # the file format expected is similar to file format of links in # circos: # chr1 100 200 chr1 250 300 0.5 # where the last value is a score. + + if plot_regions is None: + file_to_open = self.properties['file'] + else: + # To be sure we do not miss links we will intersect with bed with + # only chromosomes used in plot_regions + plot_regions_adapted = [(chrom, 0, HUGE_NUMBER) for chrom, __, __ in plot_regions] + file_to_open = temp_file_from_intersect(self.properties['file'], + plot_regions_adapted) + valid_intervals = 0 interval_tree = {} line_number = 0 has_score = True max_score = float('-inf') min_score = float('inf') - with open(self.properties['file'], 'r') as file_h: - for line in tqdm(file_h.readlines()): - line_number += 1 - if line.startswith('browser') or line.startswith('track') or line.startswith('#'): - continue - try: - chrom1, start1, end1, chrom2, start2, end2 = line.strip().split('\t')[:6] - except Exception as detail: - raise InputError('File not valid. The format is chrom1 start1, end1, ' - 'chrom2, start2, end2\nError: {}\n in line\n {}'.format(detail, line)) - try: - score = line.strip().split('\t')[6] - except IndexError: - has_score = False - score = np.nan + file_h = opener(file_to_open) + for line in tqdm(file_h.readlines()): + line_number += 1 + line = to_string(line) + if line.startswith('browser') or line.startswith('track') or line.startswith('#'): + continue + try: + chrom1, start1, end1, chrom2, start2, end2 = line.strip().split('\t')[:6] + except Exception as detail: + raise InputError('File not valid. The format is chrom1 start1, end1, ' + 'chrom2, start2, end2\nError: {}\n in line\n {}'.format(detail, line)) + if chrom1 != chrom2: + self.log.warning("Only links in same chromosome are used. Skipping line\n{}\n".format(line)) + continue + try: + score = line.strip().split('\t')[6] + except IndexError: + has_score = False + score = np.nan + + try: + start1 = int(start1) + end1 = int(end1) + start2 = int(start2) + end2 = int(end2) + except ValueError as detail: + raise InputError("Error reading line: {}. One of the fields is not " + "an integer.\nError message: {}".format(line_number, detail)) + + assert start1 <= end1, "Error in line #{}, end1 larger than start1 in {}".format(line_number, line) + assert start2 <= end2, "Error in line #{}, end2 larger than start2 in {}".format(line_number, line) + + if has_score: try: - start1 = int(start1) - end1 = int(end1) - start2 = int(start2) - end2 = int(end2) + score = float(score) except ValueError as detail: - raise InputError("Error reading line: {}. One of the fields is not " - "an integer.\nError message: {}".format(line_number, detail)) - - assert start1 <= end1, "Error in line #{}, end1 larger than start1 in {}".format(line_number, line) - assert start2 <= end2, "Error in line #{}, end2 larger than start2 in {}".format(line_number, line) - - if has_score: - try: - score = float(score) - except ValueError as detail: - self.log.warning("Warning: reading line: {}. The score is not valid {} will not be used. " - "\nError message: {}".format(line_number, score, detail)) - score = np.nan - has_score = False - else: - if score < min_score: - min_score = score - if score > max_score: - max_score = score - - if chrom1 != chrom2: - self.log.warning("Only links in same chromosome are used. Skipping line\n{}\n".format(line)) - continue - - if chrom1 not in interval_tree: - interval_tree[chrom1] = IntervalTree() - - if start2 < start1: - start1, start2 = start2, start1 - end1, end2 = end2, end1 - - if self.properties['use_middle']: - mid1 = (start1 + end1) / 2 - mid2 = (start2 + end2) / 2 - interval_tree[chrom1].add(Interval(mid1, mid2, [start1, end1, start2, end2, score])) + self.log.warning("Warning: reading line: {}. The score is not valid {} will not be used. " + "\nError message: {}".format(line_number, score, detail)) + score = np.nan + has_score = False else: - # each interval spans from the smallest start to the largest end - interval_tree[chrom1].add(Interval(start1, end2, [start1, end1, start2, end2, score])) - valid_intervals += 1 + if score < min_score: + min_score = score + if score > max_score: + max_score = score + + if chrom1 not in interval_tree: + interval_tree[chrom1] = IntervalTree() + + if start2 < start1: + start1, start2 = start2, start1 + end1, end2 = end2, end1 + + if self.properties['use_middle']: + mid1 = (start1 + end1) / 2 + mid2 = (start2 + end2) / 2 + interval_tree[chrom1].add(Interval(mid1, mid2, [start1, end1, start2, end2, score])) + else: + # each interval spans from the smallest start to the largest end + interval_tree[chrom1].add(Interval(start1, end2, [start1, end1, start2, end2, score])) + valid_intervals += 1 if valid_intervals == 0: - self.log.warning("No valid intervals were found in file {}".format(self.properties['file'])) + self.log.warning("No valid intervals were found in file {}\n".format(self.properties['file'])) file_h.close() return(interval_tree, min_score, max_score, has_score) diff --git a/pygenometracks/tracks/NarrowPeakTrack.py b/pygenometracks/tracks/NarrowPeakTrack.py index c02f0b26..fd1a2ca8 100644 --- a/pygenometracks/tracks/NarrowPeakTrack.py +++ b/pygenometracks/tracks/NarrowPeakTrack.py @@ -43,6 +43,7 @@ class NarrowPeakTrack(BedGraphTrack): 'use_summit': True, 'width_adjust': 1.5, 'type': 'peak', + 'region': None, # Cannot be set manually but is set by tracksClass 'line_width': 1} NECESSARY_PROPERTIES = ['file'] SYNONYMOUS_PROPERTIES = {'max_value': {'auto': None}} diff --git a/pygenometracks/tracks/TADsTrack.py b/pygenometracks/tracks/TADsTrack.py index 1550fcb6..75c7b081 100644 --- a/pygenometracks/tracks/TADsTrack.py +++ b/pygenometracks/tracks/TADsTrack.py @@ -29,8 +29,7 @@ class TADsTrack(BedTrack): file_type = {} """.format(TRACK_TYPE) - DEFAULTS_PROPERTIES = {'fontsize': 12, - 'orientation': None, + DEFAULTS_PROPERTIES = {'orientation': None, 'color': DEFAULT_BED_COLOR, 'border_color': 'black', 'line_width': 0.5, @@ -39,7 +38,8 @@ class TADsTrack(BedTrack): 'merge_transcripts': False, # End to remove 'max_value': None, - 'min_value': None} + 'min_value': None, + 'region': None} # Cannot be set manually but is set by tracksClass NECESSARY_PROPERTIES = ['file'] SYNONYMOUS_PROPERTIES = {'max_value': {'auto': None}, 'min_value': {'auto': None}} @@ -63,3 +63,8 @@ class TADsTrack(BedTrack): def __init__(self, *args, **kwarg): super(TADsTrack, self).__init__(*args, **kwarg) self.properties['display'] = 'triangles' + + def set_properties_defaults(self): + self.properties['fontsize'] = 12 + super(TADsTrack, self).set_properties_defaults() + self.properties['global_max_row'] = False diff --git a/pygenometracks/tracksClass.py b/pygenometracks/tracksClass.py index bc10db07..c8677c30 100644 --- a/pygenometracks/tracksClass.py +++ b/pygenometracks/tracksClass.py @@ -12,7 +12,7 @@ import matplotlib.gridspec import matplotlib.cm import mpl_toolkits.axisartist as axisartist -from . utilities import file_to_intervaltree +from . utilities import file_to_intervaltree, change_chrom_names from collections import OrderedDict from pygenometracks.tracks.GenomeTrack import GenomeTrack from pygenometracks.utilities import InputError @@ -66,7 +66,7 @@ class PlotTracks(object): def __init__(self, tracks_file, fig_width=DEFAULT_FIGURE_WIDTH, fig_height=None, fontsize=None, dpi=None, track_label_width=None, - pRegion=None): + plot_regions=None): self.fig_width = fig_width self.fig_height = fig_height self.dpi = dpi @@ -75,7 +75,7 @@ def __init__(self, tracks_file, fig_width=DEFAULT_FIGURE_WIDTH, self.track_list = None start = self.print_elapsed(None) self.available_tracks = self.get_available_tracks() - self.parse_tracks(tracks_file) + self.parse_tracks(tracks_file, plot_regions=plot_regions) if fontsize: fontsize = fontsize else: @@ -98,11 +98,8 @@ def __init__(self, tracks_file, fig_width=DEFAULT_FIGURE_WIDTH, log.info("initialize {}".format(properties['section_name'])) # the track_class is obtained from the available tracks track_class = self.available_tracks[properties['file_type']] - if properties['file_type'] == 'hic_matrix': - properties['region'] = pRegion - self.track_obj_list.append(track_class(properties)) - else: - self.track_obj_list.append(track_class(properties)) + properties['region'] = plot_regions + self.track_obj_list.append(track_class(properties)) log.info("time initializing track(s):") self.print_elapsed(start) @@ -302,7 +299,7 @@ def plot_vlines(self, axis_list, chrom_region, start_region, end_region): if chrom_region not in list(self.vlines_intval_tree): chrom_region_before = chrom_region - chrom_region = GenomeTrack.change_chrom_names(chrom_region) + chrom_region = change_chrom_names(chrom_region) if chrom_region not in list(self.vlines_intval_tree): log.warning("*Warning*\nNeither " + chrom_region_before + " nor " @@ -326,11 +323,14 @@ def plot_vlines(self, axis_list, chrom_region, start_region, end_region): return - def parse_tracks(self, tracks_file): + def parse_tracks(self, tracks_file, plot_regions=None): """ Parses a configuration file :param tracks_file: file path containing the track configuration + :param plot_regions: a list of tuple [(chrom1, start1, end1), (chrom2, start2, end2)] + on which the data should be loaded + here the vlines :return: array of dictionaries and vlines_file. One dictionary per track """ @@ -522,7 +522,8 @@ def parse_tracks(self, tracks_file): self.track_list = track_list if self.vlines_properties: self.vlines_intval_tree, __, __ = \ - file_to_intervaltree(self.vlines_properties['file']) + file_to_intervaltree(self.vlines_properties['file'], + plot_regions) def close_files(self): """ diff --git a/pygenometracks/utilities.py b/pygenometracks/utilities.py index a738b7d6..4f715599 100644 --- a/pygenometracks/utilities.py +++ b/pygenometracks/utilities.py @@ -3,6 +3,8 @@ import numpy as np from tqdm import tqdm from intervaltree import IntervalTree, Interval +import pybedtools +import tempfile import warnings @@ -56,17 +58,51 @@ def opener(filename): return f -def file_to_intervaltree(file_name): +def temp_file_from_intersect(file_name, plot_regions=None, around_region=0): + """ + intersect file_name with the plot_regions +/- around_region + :param file_name: string file name + :param plot_regions:a list of tuple [(chrom1, start1, end1), (chrom2, start2, end2)] + with the region to restrict the data to. + :param around_region: integer with the bp to extend to plot_regions + :return: temporary file with the intersection + """ + file_to_open = file_name + # Check if we can restrict the interval tree to a region: + if plot_regions is not None: + # We use pybedtools to overlap: + original_file = pybedtools.BedTool(file_name) + # We extend the start and end: + plot_regions_ext = [(chrom, max(0, start - around_region), end + around_region) for chrom, start, end in plot_regions] + # We will overlap with both version of chromosome name: + plot_regions_as_bed = '\n'.join([f'{chrom} {start} {end}\n{change_chrom_names(chrom)} {start} {end}' for chrom, start, end in plot_regions_ext]) + regions = pybedtools.BedTool(plot_regions_as_bed, from_string=True) + # Bedtools will put a warning because we are using inconsistent + # nomenclature (with and without chr) + sys.stderr = open(tempfile.NamedTemporaryFile().name, 'w') + try: + file_to_open = original_file.intersect(regions, wa=True, u=True).fn + except pybedtools.helpers.BEDToolsError: + file_to_open = file_name + sys.stderr.close() + sys.stderr = sys.__stderr__ + return file_to_open + + +def file_to_intervaltree(file_name, plot_regions=None): """ converts a BED like file into a bx python interval tree :param file_name: string file name + :param plot_regions:a list of tuple [(chrom1, start1, end1), (chrom2, start2, end2)] + with the region to restrict the data to. :return: interval tree dictionary. They key is the chromosome/contig name and the value is an IntervalTree. Each of the intervals have as 'value' the fields[3:] if any. """ + file_to_open = temp_file_from_intersect(file_name, plot_regions, 0) # iterate over a BED like file # saving the data into an interval tree # for quick retrieval - file_h = opener(file_name) + file_h = opener(file_to_open) line_number = 0 valid_intervals = 0 prev_chrom = None @@ -130,7 +166,7 @@ def file_to_intervaltree(file_name): valid_intervals += 1 if valid_intervals == 0: - sys.stderr.write("No valid intervals were found in file {}".format(file_name)) + sys.stderr.write("No valid intervals were found in file {}\n".format(file_name)) file_h.close() return interval_tree, min_value, max_value @@ -262,3 +298,19 @@ def count_lines(file_h, asBed=False): n += 1 file_h.close() return(n) + + +def change_chrom_names(chrom): + """ + Changes UCSC chromosome names to ensembl chromosome names + and vice versa. + """ + # TODO: mapping from chromosome names like mithocondria is missing + if chrom.startswith('chr'): + # remove the chr part from chromosome name + chrom = chrom[3:] + else: + # prefix with 'chr' the chromosome name + chrom = 'chr' + chrom + + return chrom diff --git a/requirements.txt b/requirements.txt index efb6a1e5..a781c25b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,8 @@ matplotlib =3.1.1 intervaltree >=2.1.0 pybigwig >=0.3.16 future >=0.17.0 -hicmatrix >=12 +hicmatrix >=13 pysam >=0.14 gffutils >=0.9 +pybedtools >=0.8.1 tqdm >=4.20 diff --git a/setup.py b/setup.py index 44f08df5..6bf3dbee 100644 --- a/setup.py +++ b/setup.py @@ -98,10 +98,11 @@ def checkProgramIsInstalled(self, program, args, where_to_download, "intervaltree >=2.1.0", "pyBigWig >=0.3.16", "future >=0.17.0", - "hicmatrix >=12", + "hicmatrix >=13", "pysam >=0.14", "pytest", "gffutils >=0.9", + "pybedtools >=0.8.1", "tqdm >=4.20" ]