Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rename output gene multfastas and pass all sequences through #64

Merged
merged 2 commits into from
Dec 13, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,8 @@ example_2.faa.tmp.filtered.fa
pan_genome.fa
query_1.fa.tmp.filtered.fa
query_2.fa.tmp.filtered.fa
query_3.fa.tmp.filtered.fa
query_3.fa.tmp.filtered.fa
accessory.header.embl
blast_identity_frequency.Rtab
core_accessory.header.embl
reannotated_groups_file
2 changes: 1 addition & 1 deletion lib/Bio/PanGenome/External/Muscle.pm
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ sub _build__memory_required_in_mb {
sub _command_to_run {
my ( $self, $fasta_file, $output_file ) = @_;
return
join( " ", ( $self->exec, '-in', $fasta_file, '-out', $output_file, '-quiet', '-maxhours', 7, ) );
join( " ", ( $self->exec, '-in', $fasta_file, '-out', $output_file, '-quiet', '-maxhours', 7, '> /dev/null 2>&1') );
}

sub run {
Expand Down
4 changes: 3 additions & 1 deletion lib/Bio/PanGenome/External/Revtrans.pm
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,16 @@ sub _command_to_run {
$self->nucleotide_filename,
$self->protein_filename,
'-mtx', 11,
'-readthroughstop',
'-allinternal',
'-match', 'name',
'>', $self->output_filename
)
);
}

sub run {
my ($self) = @_;
my @commands_to_run;
my $cmd = $self->_command_to_run;
system($cmd);
1;
Expand Down
3 changes: 1 addition & 2 deletions lib/Bio/PanGenome/Output/GroupsMultifastaNucleotide.pm
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,8 @@ sub _group_file_name
{
my ($self,$group_name,$num_group_genes) = @_;
my $annotated_group_name = $self->annotate_groups->_groups_to_consensus_gene_names->{$group_name};
my $num_group_genes_leading_zeros = sprintf("%05d", $num_group_genes);
$annotated_group_name =~ s!\W!_!gi;
my $filename = join('-', ($num_group_genes_leading_zeros,$annotated_group_name)).'.fa';
my $filename = $annotated_group_name.'.fa';
my $group_file_name = join('/',($self->output_directory, $filename ));
return $group_file_name;
}
Expand Down
31 changes: 18 additions & 13 deletions lib/Bio/PanGenome/Output/GroupsMultifastaProtein.pm
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ package Bio::PanGenome::Output::GroupsMultifastaProtein;

Take a multifasta nucleotide file and output it as proteins.
use Bio::PanGenome::Output::GroupsMultifastaProtein;

my $obj = Bio::PanGenome::Output::GroupsMultifastaProtein->new(
nucleotide_fasta_file => 'example.fa'
);
Expand All @@ -29,29 +29,34 @@ sub _build_output_filename
{
my ($self) = @_;
my ( $filename, $directories, $suffix ) = fileparse($self->nucleotide_fasta_file, qr/\.[^.]*/);

return join('',($directories, $filename.$self->_suffix));
}

sub _fastatranslate_filename
# Read all the sequences for a gene into memory to sort them - very small files so shouldnt be a problem
sub _fastatranslate
{
my ($self) = @_;
return $self->output_filename.".intermediate";
}
my $input_fasta_file_obj = Bio::SeqIO->new(-file => $self->nucleotide_fasta_file, -format => 'Fasta' );
my $output_protein_file_obj = Bio::SeqIO->new(-file =>">".$self->output_filename, -format => 'Fasta', -alphabet => 'protein' );

sub _fastatranslate_cmd
{
my ($self) = @_;
return 'fastatranslate --geneticcode 11 -f '. $self->nucleotide_fasta_file.' > '.$self->_fastatranslate_filename;
my %protein_sequence_objs;
while (my $seq = $input_fasta_file_obj->next_seq){
$protein_sequence_objs{$seq->display_id} = $seq->translate(-codontable_id => 11 );
}

for my $sequence_name ( sort keys %protein_sequence_objs)
{
$output_protein_file_obj->write_seq($protein_sequence_objs{$sequence_name});
}

return 1;
}

sub convert_nucleotide_to_protein
{
my ($self) = @_;
system($self->_fastatranslate_cmd());
my $cmd = 'fasta_grep -f '.$self->_fastatranslate_filename.' | sed \'s/*//\' > '.$self->output_filename;
system($cmd);
unlink($self->_fastatranslate_filename);
$self->_fastatranslate();
1;
}

Expand Down
2 changes: 1 addition & 1 deletion t/Bio/PanGenome/CommandLine/CreatePanGenome.t
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ cleanup_files();

%scripts_and_expected_files = (
' -j Local --dont_create_rplots --output_multifasta_files t/data/query_1.gff t/data/query_2.gff t/data/query_6.gff ' =>
[ 'pan_genome_sequences/00002-speH.fa.aln', 't/data/00002-speH.fa.aln' ],
[ 'pan_genome_sequences/speH.fa.aln', 't/data/speH.fa.aln' ],
);
mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files );

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ my %scripts_and_expected_files = (
[ 't/data/nuc_multifasta.fa.aln', 't/data/expected_nuc_multifasta.fa.aln' ],
);



unlink('t/data/nuc_multifasta.fa.aln');
mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files );
unlink('t/data/nuc_multifasta.fa.aln');
Expand Down
12 changes: 6 additions & 6 deletions t/Bio/PanGenome/Output/GroupsMultifastasNucleotide.t
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ ok(
);
ok( $obj->create_files(), 'Create multiple fasta files' );

is(read_file('pan_genome_sequences/00003-hly.fa'), read_file('t/data/pan_genome_sequences/00003-hly.fa' ), 'Check multifasta content is correct for 3-hly.fa ');
is(read_file('pan_genome_sequences/00002-speH.fa'), read_file('t/data/pan_genome_sequences/00002-speH.fa' ), 'Check multifasta content is correct for 2-speH.fa ');
is(read_file('pan_genome_sequences/00002-argF.fa'), read_file('t/data/pan_genome_sequences/00002-argF.fa' ), 'Check multifasta content is correct for 2-argF.fa ');
is(read_file('pan_genome_sequences/00001-group_7.fa'), read_file('t/data/pan_genome_sequences/00001-group_7.fa' ), 'Check multifasta content is correct for 1-group_7.fa ');
is(read_file('pan_genome_sequences/00001-group_6.fa'), read_file('t/data/pan_genome_sequences/00001-group_6.fa' ), 'Check multifasta content is correct for 1-group_6.fa ');
is(read_file('pan_genome_sequences/00001-yfnB.fa'), read_file('t/data/pan_genome_sequences/00001-yfnB.fa' ), 'Check multifasta content is correct for 1-yfnB.fa ');
is(read_file('pan_genome_sequences/hly.fa'), read_file('t/data/pan_genome_sequences/hly.fa' ), 'Check multifasta content is correct for 3-hly.fa ');
is(read_file('pan_genome_sequences/speH.fa'), read_file('t/data/pan_genome_sequences/speH.fa' ), 'Check multifasta content is correct for 2-speH.fa ');
is(read_file('pan_genome_sequences/argF.fa'), read_file('t/data/pan_genome_sequences/argF.fa' ), 'Check multifasta content is correct for 2-argF.fa ');
is(read_file('pan_genome_sequences/group_7.fa'), read_file('t/data/pan_genome_sequences/group_7.fa' ), 'Check multifasta content is correct for 1-group_7.fa ');
is(read_file('pan_genome_sequences/group_6.fa'), read_file('t/data/pan_genome_sequences/group_6.fa' ), 'Check multifasta content is correct for 1-group_6.fa ');
is(read_file('pan_genome_sequences/yfnB.fa'), read_file('t/data/pan_genome_sequences/yfnB.fa' ), 'Check multifasta content is correct for 1-yfnB.fa ');
remove_tree('pan_genome_sequences');

done_testing();
171 changes: 140 additions & 31 deletions t/data/expected_nuc_multifasta.fa.aln
Original file line number Diff line number Diff line change
Expand Up @@ -2,50 +2,159 @@
ATGTCAGTACCACTTATTCTGACCTTACTGGCGGGCGCTGCCACCTTTAT
TGGCGCGTTTCTTGGCGTTCTTGGCCAAAAACCGTCTAACCGCGTGCTGG
CTTTTTCGCTGGGCTTCGCCGCAGGGATCATGCTGCTCATCTCGCTGATG
GAGATGCTGCCCGCCGCGTTAGATACCGAGGGGATGTCGCCTGTACTGGG
CTACGGGATGTTTATTATCGGCCTGTTGGGCTACTTCGGGCTGGATCGTC
TGCTTCCTCACGCTCATCCGCAGGATCTGGTGCAAAAAAGGCAGCAGCCG
CTTCCCGGCTCGATAAAACGCACTGCGATTTTATTGACGCTCGGCATTAG
CCTGCACAACTTTCCGGAAGGAATCGCCACCTTTGTCACTGCCAGCAGCA
ATCTTGAACTGGGTTTCGGCATCGCACTGGCGGTGGCGTTGCACAATATT
CCTGAAGGGCTGGCGGTTGCCGGCCCGGTTTATGCCGCGACGGGCTCAAA
ACGTACCGCGATTTTTTGGGCCGGTATCTCCGGCATGGCGGAAATTCTC-
--GGCGTGCTGGCGTGGCTGATTTTGGGCAGCCTGGTTTCACCGATCGTT
ATGGCGGCTATCATGGCAGCAGTCGCCGGCATTATGGTGGCGCTCTCCGT
GAGATGCTGCCCGCCGCG---------------TTAGATACCGAGGGGAT
GTCGCCTGTACTGGGCTAC------GGGATGTTTATTATCGGCCTGTTGG
GCTACTTCGGGCTGGATCGTCTGCTTCCTCACGCTCATCCGCAGGATCTG
GTGCAAAAAAGGCAGCAGCCGCTTCCCGGCTCGATAAAACGCACTGCGAT
TTTATTGACGCTCGGCATTAGCCTGCACAACTTTCCGGAAGGAATCGCCA
CCTTTGTCACTGCCAGCAGCAATCTTGAACTGGGTTTCGGCATCGCA---
--------------------------------------------------
-CTGGCGGTGGCGTTGCACAATATTCCTGAAGGG------CTGGCGGTTG
CCGGCCCGGTTTATGCCGCGACGGGCTCAAAACGTACCGCGATTTTTTGG
GCC---------------GGTATCTCCGGCATGGCGGAAATTCTCGGCGT
GCTGGCG---TGGCTGATTTTGGGCAGCCTGGTTTCACCGATC-------
--------------------------------GTTATGGCGGCTATCATG
GCAGCA---------------GTCGCCGGCATTATGGTGGCGCTCTCCGT
CGATGAACTGATGCCGTTGGCAAAAGAGATCGATCCTAACAATAACCCCA
GCTATGGTGTGCTTTGCGGTATGTCCATCATGGGGCTCAGTCTCGTCATT
TTGCAGACGATAGGTATCGGT
>DDDD#77_01105
>HHHH
ATGTCAGTACCACTTATTCTGACCTTACTGGCGGGCGCTGCCACCTTTAT
TGGCGCGTTTCTTGGCGTTCTTGGCCAAAAACCGTCTAACCGCGTGCTGG
CTTTTTCGCTGGGCTTCGCCGCAGGGATCATGCTGCTCATCTCGCTGATG
GAGATGCTGCCCGCCGCGTTAGATACCGAGGGGATGTCGCCTGTACTGGG
CTACGGGATGTTTATTATCGGCCTGTTGGGCTACTTCGGGCTGGATCGTC
TGCTTCCTCACGCTCATCCGCAGGATCTGGTGCAAAAAAGGCAGCAGCCG
CTTCCCGGCTCGATAAAACGCACTGCGATTTTATTGACGCTCGGCATTAG
CCTGCACAACTTTCCGGAAGGAATCGCCACCTTTGTCACTGCCAGCAGCA
ATCTTGAACTGGGTTTCGGCATCGCACTGGCGGTGGCGTTGCACAATATT
CCTGAAGGGCTGGCGGTTGCCGGCCCGGTTTATGCCGCGACGGGCTCAAA
ACGTACCGCGATTTTTTGGGCCGGTATCTCCGGCATGGCGGAAATTCTTG
GCGGCGTGCTGGCGTGGCTGATTTTGGGCAGCCTGGTTTCACCGATCGTT
GAGATGCTGCCCGCCGCG---------------TTAGATACCGAGGGGAT
GTCGCCTGTACTGGGCTAC------GGGATGTTTATTATCGGCCTGTTGG
GCTACTTCGGGCTGGATCGTCTGCTTCCTCACGCT---------------
------------------TCCGCAGGATCTGGTGCAAAAAAGGCAGCAGC
CGCTTCCCGGCTCGATAAAACGCACTGCGATTTTATTGACGCTCGGCATT
AGCCTGCACAACTTTCCGGAAGGAATCGCCACCTTTGTCACTGCCAG---
------------------------------------CAGCAATCTTGAAC
TGGGTTTCGGCATCGCACTGGCGGTGGCGTTGCACAATATTCCTGAAGGG
CTGGCGGTTGCCGGCCCGGTTTATGCCGCGACGGGCTCAAAACGTACCGC
GATTTTTTGGGCCGGTATCTC------CGGCATGGCGGAAATTCTTGGCG
GCGTGCT---GGCGTGGCTGATTTTGGGCAGCCTGGTTTCACCGATCGTT
ATGGCGGCTATCATGGCAGCAGTCGCCGGCATTATGGTGGCGCTCTCCGT
CGATGAACTGATGCCGTTGGCAAAAGAGATCGATCCTAACAATAACCCCA
G------------------------------------------CTATGGT
GTGCTTTGCGGTATGTCCATCATGGGGCTCAG------------------
---TCTCGTCATTTTGCA---
>FFFF
ATGTCAGTACCACTTATTCTGACCTTACTGGCGGGCGCTGCCACCTTTAT
TGGCGCGTTTCTTGGCGTTCTTGGCCAAAAACCGTCTAACCGCGTGCTGG
CTTTTTCGCTGGGCTTCGCCGCAGGGATCATGCTGCTCATCTCGCTGATG
GAGATGCTGCCCGCCGCG---------------TTAGATACCGAGGGGAT
GTCGCCTGTACTGGGCTAC------GGGATGTTTATTATCGGCCTGTTGG
GCTACTTCGGGCTGGATCGTCTGCTTCCTCACGCTCATCCGCAGGATCTG
GTGCAAAAAAGGCAGCAGCCGCTTCCCGGCTCGATAAAACGCACTGCGAT
TTTATTGACGCTCGGCATTAGCCTGCACAACTTTCCGGAAGGAATCGCCA
CCTTTGTCACTGCCAGCAGCAATCTTGAACTGGGTTTCGGCATCGCA---
--------------------------------------------------
-CTGGCGGTGGCGTTGCACAATATTCCTGAAGGG------CTGGCGGTTG
CCGGCCCGGTTTATGCCGCGACGGGCTCAAAACGTACCGCGATTTTTTGG
GCC---------------GGTATCTCCGGCATGGCGGAAATTCTTGGCGG
CGTGCTGGCGTGGCTGATTTTGGGCAGCCTG---GCACCGATC-------
--------------------------------GTTATGGCGGCTATCATG
GCAGCA---------------GTCGCCGGCATTATGGTGGCGCTCTCCGT
CGATGAACTGATGCCGTTGGCAAAAGAGATCGATCCTAACAATAACCCCA
GCTATGGTGTGCTTTGCGGTATGTCCATCATGGGGCTCAGTCTCGTCATT
TTGCAGACGATAGGTATCGGT
>FFFF
>DDDD#77_01105
ATGTCAGTACCACTTATTCTGACCTTACTGGCGGGCGCTGCCACCTTTAT
TGGCGCGTTTCTTGGCGTTCTTGGCCAAAAACCGTCTAACCGCGTGCTGG
CTTTTTCGCTGGGCTTCGCCGCAGGGATCATGCTGCTCATCTCGCTGATG
GAGATGCTGCCCGCCGCGTTAGATACCGAGGGGATGTCGCCTGTACTGGG
CTACGGGATGTTTATTATCGGCCTGTTGGGCTACTTCGGGCTGGATCGTC
TGCTTCCTCACGCTCATCCGCAGGATCTGGTGCAAAAAAGGCAGCAGCCG
CTTCCCGGCTCGATAAAACGCACTGCGATTTTATTGACGCTCGGCATTAG
CCTGCACAACTTTCCGGAAGGAATCGCCACCTTTGTCACTGCCAGCAGCA
ATCTTGAACTGGGTTTCGGCATCGCACTGGCGGTGGCGTTGCACAATATT
CCTGAAGGGCTGGCGGTTGCCGGCCCGGTTTATGCCGCGACGGGCTCAAA
ACGTACCGCGATTTTTTGGGCCGGTATCTCCGGCATGGCGGAAATTCTTG
GCGGCGTGCTGGCGTGGCTGATTTTGGGCAGCCTG---GCACCGATCGTT
ATGGCGGCTATCATGGCAGCAGTCGCCGGCATTATGGTGGCGCTCTCCGT
GAGATGCTGCCCGCCGCG---------------TTAGATACCGAGGGGAT
GTCGCCTGTACTGGGCTAC------GGGATGTTTATTATCGGCCTGTTGG
GCTACTTCGGGCTGGATCGTCTGCTTCCTCACGCTCATCCGCAGGATCTG
GTGCAAAAAAGGCAGCAGCCGCTTCCCGGCTCGATAAAACGCACTGCGAT
TTTATTGACGCTCGGCATTAGCCTGCACAACTTTCCGGAAGGAATCGCCA
CCTTTGTCACTGCCAGCAGCAATCTTGAACTGGGTTTCGGCATCGCA---
--------------------------------------------------
-CTGGCGGTGGCGTTGCACAATATTCCTGAAGGG------CTGGCGGTTG
CCGGCCCGGTTTATGCCGCGACGGGCTCAAAACGTACCGCGATTTTTTGG
GCC---------------GGTATCTCCGGCATGGCGGAAATTCTTGGCGG
CGTGCTGGCGTGGCTGATTTTGGGCAGCCTGGTTTCACCGATC-------
--------------------------------GTTATGGCGGCTATCATG
GCAGCA---------------GTCGCCGGCATTATGGTGGCGCTCTCCGT
CGATGAACTGATGCCGTTGGCAAAAGAGATCGATCCTAACAATAACCCCA
GCTATGGTGTGCTTTGCGGTATGTCCATCATGGGGCTCAGTCTCGTCATT
TTGCAGACGATAGGTATCGGT
>GGGG
ATGTCAGTACCACTTATTCTGACCTTACTGGCGGGCGCTGCCACCTTTAT
TGGCGCGTTTCTTGGCGTTCTTGGCCAAAAACCGTCTAACCGCGTGCTGG
CTTTTTCGCTGGGCTTCGCCGCAGGGATCATGCTGCTCATCTCGCTGATG
GAGATGCTGCCCGCCGCG---------------TTAGATACCGAGGGGAT
GTCGCCTGTACTGGGCTAC------GGGATGTTTATTATCGGCCTGTTGG
GCTACTTCGGGCTGGATCGTCTGCTTCCTCACGCTCATCCGCAGGATCTG
GTGCAAAAAAGGCAGCAGCCGCTTCCCGGCTCGATAAAACGCACTGCGAT
TTTATTGACGCTCGGCATTAGCCTGCACAACTTTCCGGAAGGAATCGCCA
CCTTTGTCACTGCCAGCAGCAATCTTGAACTGGGTTTCGGCATCGCACTG
GGGTGGCGTTGCACAATATTCCTGAAGGGCTGGCGGTTGCCGGCCCGGTT
TATGCCGCGACGGGCTCAAAACGTACCGCGATTT------TTTGGGCCGG
TATCTCCGGCATGGCGGAAATTCTTGGCGGCGTGCTGGCGTGGCTGATTT
TGGGCAGCCTGGTTTCACCGATCGTTATGGCGGCTATCATGGCAGCAGTC
GCCGGCA---TTATGGTGGCGCTCT---CCGATGAACTGATGC-------
--------------------------------CGTTGGCAAAAGAGATCG
ATCCTA------------------ACAATAACCCCAGCTATGGTGTGCTT
T------------------------------------------GCGGTAT
GTCCATCATGGGGCTCAGTCTCGTCA------------------------
---TTTTGCAGACGATAG---
>CCCC#76_00877
ATGTCAGTACCACTTATTCTGACCTTACTGGCGGGCGCTGCCACCTTTAT
TGGCGCGTTTCTTGGCGTTCTTGGCCAAAAACCGTCTAACCGCGTGCTGG
CTTTTTCGCTGGGCTTCGCCGCAGGGATCATGCTGCTCATCTCGCTGATG
GAGATGCTGCCCGCCGCG---------------TTAGATACCGAGGGGAT
GTCGCCTGTACTGGGCTAC------GGGAGTTTATTATCGGCCTGTTGGG
CTACTTCGGGCTGGATCGTCTGCTTCCTCACGCTCATCCGCAGGATCTGG
TGCAAAAAAGGCAGCAGCCGCTTCCCG------GCTCGATAAAACGCACT
GCGATTTTATTGACGCTCGGCATTAGCCTGCACAACTTTCCGG---AAGG
AATCACCTTTGTCACTGCCAGCAGCAATCTTGAACTGGGTTTCGGCATCG
CACTGGCGGTGGCGTTGCACAATATTCCTGAAGGGCTGGCGGTTGCCGGC
CCGGTTTATGCCGCGACGGGCTCAAAACGTACCG------CGATTTTTTG
GGCCGGTATCTCCGGCATGGCGGAAATTCTTGGCGGCGTGCTGGCGTGGC
TGATTTTGGGCAGCCTGGTTTCACCGATCGTTATGGCGGCTATCATGGCA
GCAGTCG---CCGGCATTATGGTGGCGCTCTCCGTCGATGAAC-------
--------------------------------TGATGCCGTTGGCAAAAG
AGATCG------------------ATCCTAACAATAACCCCAGCTATGGT
G------------------------------------------TGCTTTG
CGGTATGTCCATCATGGGGCTCAGTC------------------------
---TCGTCATTTTGCAGA---
>AAAA#74_01075
TGTCAGTACCACTTATTCTGACCTTACTGGCGGGCGCTGCCA---CCTTT
ATTGGCGCGTTTCTTGGCGTTCTTGGCCAAAAACCGTCTAACCGCGTGCT
GGCTTTTTCGCTGGGCTTCGCCGCAGGGATCATGCTGCTCATCTCGCTGA
TGGAGATGCTGCCCGCCG---------------------CGTTAGATACC
GAGGGGATGTCGCCTGTAC------TGGGCTACGGGATGTTTATTATCGG
CCTGTTGGGCTACTTCGGGCTGGATCGTCTGCTTCCTCACGCTCATCCGC
AGGATCTGGTGCAAAAAAGGCAGCAGC------CGCTTCCCGGCTCGATA
AAACGCACTGCGATTTTATTGACGCTCGGCATTAGCCTGCACAACTTTCC
GGAAGGAATCGCCACCTTTGTCACTGCCAGCAGCAATCTTGAACTGGGTT
TCGGCATCGCACTGGCGGTGGCGTTGCACAATATTCCTGAAGGGCTGGCG
GTTGCCGGCCCGGTTTATGCCGCGACGGGCTCAA------AACGTACCGC
GATTTTTTGGGCCGGTATCTCCGGCATGGCGGAAATTCTTGGCGGCGTGC
TGGCGTGGCTGATTTTGGGCAGCCTGGTTTCACCGATCGTTATGGCGGCT
ATCATGG---CAGCAGTCGCCGGCATTATGGTGGCGCTCTCCG-------
--------------------------------TCGATGAACTGATGCCGT
TGGCAA------------------AAGAGATCGATCCTAACAATAACCCC
A------------------------------------------GCTATGG
TGTGCTTTGCGGTATGTCCATCATGG------------------------
---GGCTCAGTCTCGTCA---
>BBBB#75_01314
ATGTCAGTACCACTTATTCTGACCTTACTGGCGGGCGCTGCCACCTTTAT
TGGCGCGTTTCTTGGCGTTCTTGGCCAAAAACCGTCTAACCGCGTGCTGG
CTTTTTCGCTGGGCTTCGCCGCAGGGATCATGCTGCTCATCTCGCTGATG
GATGCTGCCCGCCGCGTTAGATACCGAGGGGATGTCGCCTGTACTGGGCT
ACGGGATGTTTATTATCGGCCTGTTGGGCTACTTCGGGCTGGATCGTCTG
CTTCCTCA------------------------------------------
------------CGCTCATCCGCAGGATCTGGTGCAAAAAAGGCAGCAGC
CGCTTCCCGGCTCGATAAAACGCACTGCGATTTTATTGACGCTCGGCATT
AGCCTGCACAACTTTCCGGAAGGAATCGCCACCTTTGTCACTGCCAG---
------------------------------------CAGCAATCTTGAAC
TGGGTTTCGGCATCGCACTGGCGGTGGCGTTGCACAATATTCCTGAAGGG
CTGGCGGTTGCCGGCCCGGTTTATGCCGCGACGGGCTCAAAACGTACCGC
GATTTTTTGGGCCGGTATCTC------CGGCATGGCGGAAATTCTTGGCG
GCGTGCT---GGCGTGGCTGATTTTGGGCAGCCTGGTTTCACCGATCGTT
ATGGCGGCTATCATGGCAGCAGTCGCCGGCATTATGGTGGCGCTCTCCGT
CGATGAACTGATGCCGTTGGCAAAAGAGATCGATCCTAACAATAACCCCA
G------------------------------------------CTATGGT
GTGCTTTGCGGTATGTCCATCATGGGGCTCAG------------------
---TCTCGTCATTTTGCA---
56 changes: 48 additions & 8 deletions t/data/expected_nuc_multifasta.faa
Original file line number Diff line number Diff line change
@@ -1,8 +1,48 @@
>AAAA#74_01075 [translate(3)]
SVPLILTLLAGAATFIGAFLGVLGQKPSNRVLAFSLGFAAGIMLLISLMEMLPAALDTEGMSPVLGYGMFIIGLLGYFGLDRLLPHAHPQDLVQKRQQPLPGSIKRTAILLTLGISLHNFPEGIATFVTASSNLELGFGIALAVALHNIPEGLAVAGPVYAATGSKRTAIFWAGISGMAEILGGVLAWLILGSLVSPIVMAAIMAAVAGIMVALSVDELMPLAKEIDPNNNPSYGVLCGMSIMGLSLVILQTIGIG
>DDDD#77_01105 [translate(1)]
MSVPLILTLLAGAATFIGAFLGVLGQKPSNRVLAFSLGFAAGIMLLISLMEMLPAALDTEGMSPVLGYGMFIIGLLGYFGLDRLLPHAHPQDLVQKRQQPLPGSIKRTAILLTLGISLHNFPEGIATFVTASSNLELGFGIALAVALHNIPEGLAVAGPVYAATGSKRTAIFWAGISGMAEILGGVLAWLILGSLVSPIVMAAIMAAVAGIMVALSVDELMPLAKEIDPNNNPSYGVLCGMSIMGLSLVILQTIGIG
>EEEE [translate(1)]
MSVPLILTLLAGAATFIGAFLGVLGQKPSNRVLAFSLGFAAGIMLLISLMEMLPAALDTEGMSPVLGYGMFIIGLLGYFGLDRLLPHAHPQDLVQKRQQPLPGSIKRTAILLTLGISLHNFPEGIATFVTASSNLELGFGIALAVALHNIPEGLAVAGPVYAATGSKRTAIFWAGISGMAEILGVLAWLILGSLVSPIVMAAIMAAVAGIMVALSVDELMPLAKEIDPNNNPSYGVLCGMSIMGLSLVILQTIGIG
>FFFF [translate(1)]
MSVPLILTLLAGAATFIGAFLGVLGQKPSNRVLAFSLGFAAGIMLLISLMEMLPAALDTEGMSPVLGYGMFIIGLLGYFGLDRLLPHAHPQDLVQKRQQPLPGSIKRTAILLTLGISLHNFPEGIATFVTASSNLELGFGIALAVALHNIPEGLAVAGPVYAATGSKRTAIFWAGISGMAEILGGVLAWLILGSLAPIVMAAIMAAVAGIMVALSVDELMPLAKEIDPNNNPSYGVLCGMSIMGLSLVILQTIGIG
>AAAA#74_01075
CQYHLF*PYWRALPPLLARFLAFLAKNRLTACWLFRWASPQGSCCSSR*WRCCPPR*IPR
GCRLYWATGCLLSACWATSGWIVCFLTLIRRIWCKKGSSRFPAR*NALRFY*RSALACTT
FRKESPPLSLPAAILNWVSASHWRWRCTIFLKGWRLPARFMPRRAQNVPRFFGPVSPAWR
KFLAACWRG*FWAAWFHRSLWRLSWQQSPALWWRSPSMN*CRWQKRSILTITPAMVCFAV
CPSWGSVSSFCRR*VSV
>BBBB#75_01314
MSVPLILTLLAGAATFIGAFLGVLGQKPSNRVLAFSLGFAAGIMLLISLMDAARRVRYRG
DVACTGLRDVYYRPVGLLRAGSSASSRSSAGSGAKKAAAASRLDKTHCDFIDARH*PAQL
SGRNRHLCHCQQQS*TGFRHRTGGGVAQYS*RAGGCRPGLCRDGLKTYRDFLGRYLRHGG
NSWRRAGVADFGQPGFTDRYGGYHGSSRRHYGGALRR*TDAVGKRDRS*Q*PQLWCALRY
VHHGAQSRHFADDRYRL
>CCCC#76_00877
MSVPLILTLLAGAATFIGAFLGVLGQKPSNRVLAFSLGFAAGIMLLISLMEMLPAALDTE
GMSPVLGYGSLLSACWATSGWIVCFLTLIRRIWCKKGSSRFPAR*NALRFY*RSALACTT
FRKESPLSLPAAILNWVSASHWRWRCTIFLKGWRLPARFMPRRAQNVPRFFGPVSPAWRK
FLAACWRG*FWAAWFHRSLWRLSWQQSPALWWRSPSMN*CRWQKRSILTITPAMVCFAVC
PSWGSVSSFCRR*VSV
>DDDD#77_01105
MSVPLILTLLAGAATFIGAFLGVLGQKPSNRVLAFSLGFAAGIMLLISLMEMLPAALDTE
GMSPVLGYGMFIIGLLGYFGLDRLLPHAHPQDLVQKRQQPLPGSIKRTAILLTLGISLHN
FPEGIATFVTASSNLELGFGIALAVALHNIPEGLAVAGPVYAATGSKRTAIFWAGISGMA
EILGGVLAWLILGSLVSPIVMAAIMAAVAGIMVALSVDELMPLAKEIDPNNNPSYGVLCG
MSIMGLSLVILQTIGIG*
>EEEE
MSVPLILTLLAGAATFIGAFLGVLGQKPSNRVLAFSLGFAAGIMLLISLMEMLPAALDTE
GMSPVLGYGMFIIGLLGYFGLDRLLPHAHPQDLVQKRQQPLPGSIKRTAILLTLGISLHN
FPEGIATFVTASSNLELGFGIALAVALHNIPEGLAVAGPVYAATGSKRTAIFWAGISGMA
EILGVLAWLILGSLVSPIVMAAIMAAVAGIMVALSVDELMPLAKEIDPNNNPSYGVLCGM
SIMGLSLVILQTIGIG*
>FFFF
MSVPLILTLLAGAATFIGAFLGVLGQKPSNRVLAFSLGFAAGIMLLISLMEMLPAALDTE
GMSPVLGYGMFIIGLLGYFGLDRLLPHAHPQDLVQKRQQPLPGSIKRTAILLTLGISLHN
FPEGIATFVTASSNLELGFGIALAVALHNIPEGLAVAGPVYAATGSKRTAIFWAGISGMA
EILGGVLAWLILGSLAPIVMAAIMAAVAGIMVALSVDELMPLAKEIDPNNNPSYGVLCGM
SIMGLSLVILQTIGIG*
>GGGG
MSVPLILTLLAGAATFIGAFLGVLGQKPSNRVLAFSLGFAAGIMLLISLMEMLPAALDTE
GMSPVLGYGMFIIGLLGYFGLDRLLPHAHPQDLVQKRQQPLPGSIKRTAILLTLGISLHN
FPEGIATFVTASSNLELGFGIALGWRCTIFLKGWRLPARFMPRRAQNVPRFFGPVSPAWR
KFLAACWRG*FWAAWFHRSLWRLSWQQSPALWWRSPMN*CRWQKRSILTITPAMVCFAVC
PSWGSVSSFCRR*VSV
>HHHH
MSVPLILTLLAGAATFIGAFLGVLGQKPSNRVLAFSLGFAAGIMLLISLMEMLPAALDTE
GMSPVLGYGMFIIGLLGYFGLDRLLPHASAGSGAKKAAAASRLDKTHCDFIDARH*PAQL
SGRNRHLCHCQQQS*TGFRHRTGGGVAQYS*RAGGCRPGLCRDGLKTYRDFLGRYLRHGG
NSWRRAGVADFGQPGFTDRYGGYHGSSRRHYGGALRR*TDAVGKRDRS*Q*PQLWCALRY
VHHGAQSRHFADDRYRL
File renamed without changes.
File renamed without changes.
File renamed without changes.