Skip to content

Commit

Permalink
Merge pull request #227 from andrewjpage/use_full_accessory_for_tree
Browse files Browse the repository at this point in the history
Provide full accessory for building binary tree
  • Loading branch information
aslett1 committed Jan 20, 2016
2 parents f7770a6 + 58056e4 commit cdc517b
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 58 deletions.
2 changes: 1 addition & 1 deletion dist.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name = Bio-Roary
version = 3.5.7
version = 3.5.8
author = Andrew J. Page <ap13@sanger.ac.uk>
license = GPL_3
copyright_holder = Wellcome Trust Sanger Institute
Expand Down
2 changes: 1 addition & 1 deletion install_dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ FASTTREE_VERSION="2.1.8"
FASTTREE_DOWNLOAD_FILENAME="FastTree-${FASTTREE_VERSION}.c"
FASTTREE_URL="http://microbesonline.org/fasttree/FastTree-${FASTTREE_VERSION}.c"

MAFFT_VERSION="7.221"
MAFFT_VERSION="7.271"
MAFFT_DOWNLOAD_FILENAME="mafft-${MAFFT_VERSION}-without-extensions-src.tgz"
MAFFT_URL="http://mafft.cbrc.jp/alignment/software/${MAFFT_DOWNLOAD_FILENAME}"

Expand Down
44 changes: 13 additions & 31 deletions lib/Bio/Roary/AccessoryBinaryFasta.pm
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,13 @@ use Bio::Roary::Exceptions;
use Bio::SeqIO;
use File::Basename;

has 'input_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
has 'analyse_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 );
has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'accessory_binary_genes.fa' );
has 'lower_bound_percentage' => ( is => 'ro', isa => 'Int', default => 5 );
has 'upper_bound_percentage' => ( is => 'ro', isa => 'Int', default => 5 );
has 'max_accessory_to_include' => ( is => 'ro', isa => 'Int', default => 4000 );
has 'groups_to_files' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__groups_to_files' );
has '_lower_bound_value' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__lower_bound_value' );
has '_upper_bound_value' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__upper_bound_value' );
has 'input_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
has 'analyse_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 );
has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'accessory_binary_genes.fa' );
has 'groups_to_files' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__groups_to_files' );
has '_lower_bound_value' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__lower_bound_value' );
has '_upper_bound_value' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__upper_bound_value' );

sub _build__groups_to_files {
my ($self) = @_;
Expand All @@ -47,48 +44,33 @@ sub _build__groups_to_files {
return \%groups_to_files;
}

sub _build__lower_bound_value {
my ($self) = @_;
my $num_files = @{ $self->input_files };
return ceil( $num_files * ( $self->lower_bound_percentage / 100 ) );
}

sub _build__upper_bound_value {
my ($self) = @_;
my $num_files = @{ $self->input_files };
return $num_files - ceil( $num_files * ( $self->upper_bound_percentage / 100 ) );
}

sub create_accessory_binary_fasta {
my ($self) = @_;
my $out_seq_io = Bio::SeqIO->new( -file => ">" . $self->output_filename, -format => 'Fasta' );

for my $full_filename ( @{ $self->input_files } ) {
my($filename, $dirs, $suffix) = fileparse($full_filename);
my ( $filename, $dirs, $suffix ) = fileparse($full_filename);

my $output_sequence = '';
my $sample_name = $filename;
$sample_name =~ s!\.gff\.proteome\.faa!!;

my $gene_count = 0;
my $gene_count = 0;
for my $group ( sort keys %{ $self->groups_to_files } ) {
last if($gene_count > $self->max_accessory_to_include);

my @files = keys %{ $self->groups_to_files->{$group} };

next if ( @files <= $self->_lower_bound_value || @files > $self->_upper_bound_value );

my $group_to_file_genes = $self->groups_to_files->{$group}->{$full_filename};
if ( defined($group_to_file_genes) && @{$group_to_file_genes} > 0 ) {
$output_sequence .= 'A';
}
else {
$output_sequence .= 'C';
}
$gene_count++;
$gene_count++;

}
next if($output_sequence eq '');
next if ( $output_sequence eq '' );
$out_seq_io->write_seq( Bio::Seq->new( -display_id => $sample_name, -seq => $output_sequence ) );
}
return 1;
Expand Down
25 changes: 0 additions & 25 deletions t/Bio/Roary/AccessoryBinaryFasta.t
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ ok(
group_3 => { 't/abc/aaa' => [1], 't/abc/bbb' => [2], 't/abc/ccc' => [3] },
group_4 => { 't/abc/aaa' => [1], 't/abc/bbb' => [2], 't/abc/ccc' => [3], 't/abc/ddd' => [4] },
},
_lower_bound_value => 0,
_upper_bound_value => 4,
annotate_groups_obj => $dummy_annotate_groups,
analyse_groups_obj => $dummy_analyse_groups
),
Expand All @@ -46,27 +44,4 @@ ok( $obj->create_accessory_binary_fasta(), 'create output file' );
compare_ok( 'accessory_binary_genes.fa', 't/data/expected_accessory_binary_genes.fa','binary accessory fasta file created');


ok(
$obj = Bio::Roary::AccessoryBinaryFasta->new(
input_files => [ 'aaa', 'bbb', 'ccc', 'ddd' ],
groups_to_files =>
{
group_1 => { 'aaa' => [1] },
group_2 => { 'aaa' => [1], 'bbb' => [2] },
group_3 => { 'aaa' => [1], 'bbb' => [2], 'ccc' => [3] },
group_4 => { 'aaa' => [1], 'bbb' => [2], 'ccc' => [3], 'ddd' => [4] },
},
annotate_groups_obj => $dummy_annotate_groups,
analyse_groups_obj => $dummy_analyse_groups
),
'initialise accessory binary fasta file bounded'
);

is($obj->_lower_bound_value, 1, 'lower bound value');
is($obj->_upper_bound_value, 3, 'upper bound value');
ok( $obj->create_accessory_binary_fasta(), 'create output file bounded' );

compare_ok( 'accessory_binary_genes.fa', 't/data/expected_accessory_binary_genes_bounded.fa','binary accessory fasta file created bounded');


done_testing();

0 comments on commit cdc517b

Please sign in to comment.