diff --git a/dist.ini b/dist.ini index e1942c2..ececa65 100644 --- a/dist.ini +++ b/dist.ini @@ -1,5 +1,5 @@ name = Bio-Roary -version = 3.5.7 +version = 3.5.8 author = Andrew J. Page license = GPL_3 copyright_holder = Wellcome Trust Sanger Institute diff --git a/install_dependencies.sh b/install_dependencies.sh index 1dc2914..e53cf89 100755 --- a/install_dependencies.sh +++ b/install_dependencies.sh @@ -36,7 +36,7 @@ FASTTREE_VERSION="2.1.8" FASTTREE_DOWNLOAD_FILENAME="FastTree-${FASTTREE_VERSION}.c" FASTTREE_URL="http://microbesonline.org/fasttree/FastTree-${FASTTREE_VERSION}.c" -MAFFT_VERSION="7.221" +MAFFT_VERSION="7.271" MAFFT_DOWNLOAD_FILENAME="mafft-${MAFFT_VERSION}-without-extensions-src.tgz" MAFFT_URL="http://mafft.cbrc.jp/alignment/software/${MAFFT_DOWNLOAD_FILENAME}" diff --git a/lib/Bio/Roary/AccessoryBinaryFasta.pm b/lib/Bio/Roary/AccessoryBinaryFasta.pm index b398878..9f7fd16 100644 --- a/lib/Bio/Roary/AccessoryBinaryFasta.pm +++ b/lib/Bio/Roary/AccessoryBinaryFasta.pm @@ -20,16 +20,13 @@ use Bio::Roary::Exceptions; use Bio::SeqIO; use File::Basename; -has 'input_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); -has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 ); -has 'analyse_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 ); -has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'accessory_binary_genes.fa' ); -has 'lower_bound_percentage' => ( is => 'ro', isa => 'Int', default => 5 ); -has 'upper_bound_percentage' => ( is => 'ro', isa => 'Int', default => 5 ); -has 'max_accessory_to_include' => ( is => 'ro', isa => 'Int', default => 4000 ); -has 'groups_to_files' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__groups_to_files' ); -has '_lower_bound_value' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__lower_bound_value' ); -has '_upper_bound_value' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__upper_bound_value' ); +has 'input_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); +has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 ); +has 'analyse_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 ); +has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'accessory_binary_genes.fa' ); +has 'groups_to_files' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__groups_to_files' ); +has '_lower_bound_value' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__lower_bound_value' ); +has '_upper_bound_value' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__upper_bound_value' ); sub _build__groups_to_files { my ($self) = @_; @@ -47,37 +44,22 @@ sub _build__groups_to_files { return \%groups_to_files; } -sub _build__lower_bound_value { - my ($self) = @_; - my $num_files = @{ $self->input_files }; - return ceil( $num_files * ( $self->lower_bound_percentage / 100 ) ); -} - -sub _build__upper_bound_value { - my ($self) = @_; - my $num_files = @{ $self->input_files }; - return $num_files - ceil( $num_files * ( $self->upper_bound_percentage / 100 ) ); -} - sub create_accessory_binary_fasta { my ($self) = @_; my $out_seq_io = Bio::SeqIO->new( -file => ">" . $self->output_filename, -format => 'Fasta' ); for my $full_filename ( @{ $self->input_files } ) { - my($filename, $dirs, $suffix) = fileparse($full_filename); - + my ( $filename, $dirs, $suffix ) = fileparse($full_filename); + my $output_sequence = ''; my $sample_name = $filename; $sample_name =~ s!\.gff\.proteome\.faa!!; - my $gene_count = 0; + my $gene_count = 0; for my $group ( sort keys %{ $self->groups_to_files } ) { - last if($gene_count > $self->max_accessory_to_include); my @files = keys %{ $self->groups_to_files->{$group} }; - next if ( @files <= $self->_lower_bound_value || @files > $self->_upper_bound_value ); - my $group_to_file_genes = $self->groups_to_files->{$group}->{$full_filename}; if ( defined($group_to_file_genes) && @{$group_to_file_genes} > 0 ) { $output_sequence .= 'A'; @@ -85,10 +67,10 @@ sub create_accessory_binary_fasta { else { $output_sequence .= 'C'; } - $gene_count++; - + $gene_count++; + } - next if($output_sequence eq ''); + next if ( $output_sequence eq '' ); $out_seq_io->write_seq( Bio::Seq->new( -display_id => $sample_name, -seq => $output_sequence ) ); } return 1; diff --git a/t/Bio/Roary/AccessoryBinaryFasta.t b/t/Bio/Roary/AccessoryBinaryFasta.t index 435839b..1c0870e 100755 --- a/t/Bio/Roary/AccessoryBinaryFasta.t +++ b/t/Bio/Roary/AccessoryBinaryFasta.t @@ -33,8 +33,6 @@ ok( group_3 => { 't/abc/aaa' => [1], 't/abc/bbb' => [2], 't/abc/ccc' => [3] }, group_4 => { 't/abc/aaa' => [1], 't/abc/bbb' => [2], 't/abc/ccc' => [3], 't/abc/ddd' => [4] }, }, - _lower_bound_value => 0, - _upper_bound_value => 4, annotate_groups_obj => $dummy_annotate_groups, analyse_groups_obj => $dummy_analyse_groups ), @@ -46,27 +44,4 @@ ok( $obj->create_accessory_binary_fasta(), 'create output file' ); compare_ok( 'accessory_binary_genes.fa', 't/data/expected_accessory_binary_genes.fa','binary accessory fasta file created'); -ok( - $obj = Bio::Roary::AccessoryBinaryFasta->new( - input_files => [ 'aaa', 'bbb', 'ccc', 'ddd' ], - groups_to_files => - { - group_1 => { 'aaa' => [1] }, - group_2 => { 'aaa' => [1], 'bbb' => [2] }, - group_3 => { 'aaa' => [1], 'bbb' => [2], 'ccc' => [3] }, - group_4 => { 'aaa' => [1], 'bbb' => [2], 'ccc' => [3], 'ddd' => [4] }, - }, - annotate_groups_obj => $dummy_annotate_groups, - analyse_groups_obj => $dummy_analyse_groups - ), - 'initialise accessory binary fasta file bounded' -); - -is($obj->_lower_bound_value, 1, 'lower bound value'); -is($obj->_upper_bound_value, 3, 'upper bound value'); -ok( $obj->create_accessory_binary_fasta(), 'create output file bounded' ); - -compare_ok( 'accessory_binary_genes.fa', 't/data/expected_accessory_binary_genes_bounded.fa','binary accessory fasta file created bounded'); - - done_testing();