From feaf7f45456fe49285fbfbd8e7cf1d2256dde65e Mon Sep 17 00:00:00 2001 From: Samarendra Date: Fri, 16 Aug 2024 16:54:25 +0200 Subject: [PATCH] adding an exception case to get_path For the GRCh38 paths in the CHM13-based graph of HPRC, it does not form a path. Hence conversion now uses an exception case to not throw this warning. --- gaftools/cli/view.py | 5 ++--- gaftools/gfa.py | 23 ++++++++++++++++------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/gaftools/cli/view.py b/gaftools/cli/view.py index 5eaefa6..3091181 100644 --- a/gaftools/cli/view.py +++ b/gaftools/cli/view.py @@ -65,8 +65,7 @@ def run(gaf_path, gfa=None, output=None, index=None, nodes=[], regions=[], forma } ref_contig = [contig for contig in gfa_file.contigs if gfa_file.contigs[contig] == 0] for contig in gfa_file.contigs: - contig_len[contig] = gfa_file.get_contig_length(contig) - print(contig_len) + contig_len[contig] = gfa_file.get_contig_length(contig, throw_warning=False) del gfa_file else: assert format == "unstable" @@ -79,7 +78,7 @@ def run(gaf_path, gfa=None, output=None, index=None, nodes=[], regions=[], forma gfa_file = GFA(graph_file=gfa, low_memory=True) contigs = list(gfa_file.contigs.keys()) for contig in contigs: - path = gfa_file.get_path(contig) + path = gfa_file.get_path(contig, throw_warning=False) for node in path: reference[contig].append(gfa_file[node]) del gfa_file diff --git a/gaftools/gfa.py b/gaftools/gfa.py index 1b91a50..20a4e85 100644 --- a/gaftools/gfa.py +++ b/gaftools/gfa.py @@ -678,7 +678,10 @@ def return_gfa_path(self, list_of_nodes): return ",".join(path) - def get_path(self, chrom): + # TODO: Need to deal with non-path contigs like GRCh38-based paths in the CHM13-based minigraph rGFAs of HPRC. + # The nodes have the same SN tag but do not form a path. Need to work with such cases. + # Currently just create an exception case to use. + def get_path(self, chrom, throw_warning=True): """ takes a chromosome name (matching the SN tag) and returns the path of that chromosome """ @@ -699,16 +702,22 @@ def get_path(self, chrom): if self.list_is_path(sorted_nodes): return sorted_nodes else: - logging.warning( - f"The sorted nodes with SN tag {chrom} did not create a linear path. Stopping! Returning empty list" - ) - return list() + if throw_warning: + logging.warning( + f"The sorted nodes with SN tag {chrom} did not create a linear path. Stopping! Returning empty list" + ) + return list() + else: + logging.warning( + f"The sorted nodes with SN tag {chrom} did not create a linear path. The sorted node list is returned for conversion." + ) + return sorted_nodes - def get_contig_length(self, chrom): + def get_contig_length(self, chrom, throw_warning=True): """ returns the length of the chromosome or contig name """ - sorted_nodes = self.get_path(chrom) + sorted_nodes = self.get_path(chrom, throw_warning) if not sorted_nodes: logging.error( "Was not able to return the length of the chromosome, check warning message(s)"