From 39c5c1d780a64447296d9985965facfce7ba69c8 Mon Sep 17 00:00:00 2001 From: Ung0d Date: Wed, 14 Jun 2023 05:45:29 +0000 Subject: [PATCH] another hotfix for a rare aligned insertions bug --- learnMSA/_version.py | 2 +- learnMSA/msa_hmm/AlignInsertions.py | 9 +++++---- test/data/egf.out.fasta | 24 ++++++++++++------------ 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/learnMSA/_version.py b/learnMSA/_version.py index cd7e2da..fecd8c0 100644 --- a/learnMSA/_version.py +++ b/learnMSA/_version.py @@ -1 +1 @@ -__version__ = "1.2.3" \ No newline at end of file +__version__ = "1.2.4" \ No newline at end of file diff --git a/learnMSA/msa_hmm/AlignInsertions.py b/learnMSA/msa_hmm/AlignInsertions.py index 49f2d88..b237c1d 100644 --- a/learnMSA/msa_hmm/AlignInsertions.py +++ b/learnMSA/msa_hmm/AlignInsertions.py @@ -16,9 +16,7 @@ def find_long_insertions_and_write_slice(fasta_file, lens, starts, name, directo """ at_least_t = lens >= t lengths = lens[at_least_t] - if lengths.size > k: - if verbose: - print(f"Long insertions found at {name}: {lengths.size}.") + if lengths.size > 1: which = np.squeeze(np.argwhere(at_least_t)) start = starts[at_least_t] filename = f"{directory}/slice_{name}" @@ -38,7 +36,10 @@ def find_long_insertions_and_write_slice(fasta_file, lens, starts, name, directo slice_file.write(">"+fasta_file.seq_ids[which[j]]+"\n") slice_file.write(segment+"\n") which = np.delete(which, to_delete) - return (which, filename) + if which.size > k: + if verbose: + print(f"Long insertions found at {name}: {which.size}.") + return (which, filename) def make_aligned_insertions(am, directory, method="famsa", threads=0, verbose=True): diff --git a/test/data/egf.out.fasta b/test/data/egf.out.fasta index e5c1a96..5f70f65 100644 --- a/test/data/egf.out.fasta +++ b/test/data/egf.out.fasta @@ -1,24 +1,24 @@ >1ixa -..vdgdqCESN...PCLNGGSC..-..KD...DIN..S.YECWCPFGFEGKNcel.................... +..vdgdqCESN...PCLNGGSCK.--.DDIN..S.YECWCPFGFEGKNcel.................... >1apo -..kdgdqCEGH...PCLNQGHC..-..KD...GIG..D.YTCTCAEGFEGKNcefstr................. +..kdgdqCEGH...PCLNQGHCK.--.DGIG..D.YTCTCAEGFEGKNcefstr................. >1urk -......qVPSNc..DCLNGGTC..-..VSnkyFSN..I.HWCNCPKKFGGQHceidk.................. +.......QVPSnc.DCLNGGTCVsNK.-YFS..NiHWCNCPKKFGGQHceidk.................. >1fsb -....tasCQDM...SCSKQGEC..-..LE...TIG..N.YTCSCYPGFYGPEceyvre................. +....tasCQDM...SCSKQGECL.--.ETIG..N.YTCSCYPGFYGPEceyvre................. >1esl -......aCTNT...SCSGHGEC..-..VE...TIN..N.YTCKCDPGFSGLKceqiv.................. +......aCTNT...SCSGHGECV.--.ETIN..N.YTCKCDPGFSGLKceqiv.................. >1hre -gtshlvkCAEKektFCVNGGEC..FmvKD...LSNpsR.YLCKCQPGFTGARctenvpmkvqnqekaeelyqk.. +gtshlvkCAEKektFCVNGGECF.MVkDLSNpsR.YLCKCQPGFTGARctenvpmkvqnqekaeelyqk.. >1epi -..nsypgCPSSydgYCLNGGVC..Mh.IE...SLD..S.YTCNCVIGYSGDRcqtrdlrwwelr........... +..nsypgCPSSydgYCLNGGVCM.HI.ESLD..S.YTCNCVIGYSGDRcqtrdlrwwelr........... >4tgf -vvshfndCPDShtqFCFH-GTCrfL..-V...QED..K.PACVCHSGYVGARcehadlla............... +vvshfndCPDShtqFCFH-GTCRfLV.-QED..K.PACVCHSGYVGARcehadlla............... >1hcgb -.......CSLD...NGDCDQFC..-..HE...EQN..S.VVCSCARGYTLADngkaciptgpypcgkqtler... +.......CSLD...NGDCDQFCH.--.EEQN..S.VVCSCARGYTLADngkaciptgpypcgkqtler... >1dan1 -....gdqCASS...PCQNGGSC..-..KD...QLQ..S.YICFCLPAFEGRNcethkd................. +....gdqCASS...PCQNGGSCK.--.DQLQ..S.YICFCLPAFEGRNcethkd................. >1dan2 -...dqliCVNE...NGGCEQYC..-..SD...HTG..TkRSCRCHEGYSLLAdgvsctptveypcgkipile... +...dqliCVNE...NGGCEQYCS.--.DHTG..TkRSCRCHEGYSLLAdgvsctptveypcgkipile... >1rfnb -.....mtCNIK...NGRCEQFC..-..KN...SAD..NkVVCSCTEGYRLAEnqkscepavpfpcgrvsvsqtsk +.....mtCNIK...NGRCEQFCK.NS.-ADN..K.VVCSCTEGYRLAEnqkscepavpfpcgrvsvsqtsk