Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dont split groups #46

Merged
merged 1 commit into from
Oct 31, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions lib/Bio/PanGenome/AnnotateGroups.pm
Original file line number Diff line number Diff line change
Expand Up @@ -243,11 +243,6 @@ sub _split_groups_with_min_sub_group_size {
sub _split_groups {
my ($self) = @_;

# Split off the largest groups first
for ( my $i = $self->_number_of_files ; $i >= (($self->_number_of_files / 32)) && $i >= 1 ; $i /= 2 ) {
$self->_split_groups_with_min_sub_group_size($i);
}

$self->_groups_to_consensus_gene_names( $self->_generate_groups_to_consensus_gene_names );
$self->_ids_to_groups( $self->_generate__ids_to_groups );
}
Expand Down
4 changes: 2 additions & 2 deletions lib/Bio/PanGenome/External/Cdhit.pm
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ has 'exec' => ( is => 'ro', isa => 'Str', default => '
has '_number_of_threads' => ( is => 'ro', isa => 'Int', default => 1 );
has '_max_available_memory_in_mb' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__max_available_memory_in_mb' );
has '_use_most_similar_clustering' => ( is => 'ro', isa => 'Bool', default => 1 );
has '_length_difference_cutoff' => ( is => 'ro', isa => 'Num', default => 1 );
has '_sequence_identity_threshold' => ( is => 'ro', isa => 'Num', default => 1 );
has '_length_difference_cutoff' => ( is => 'ro', isa => 'Num', default => 0.99 );
has '_sequence_identity_threshold' => ( is => 'ro', isa => 'Num', default => 0.99 );
has '_description_length' => ( is => 'ro', isa => 'Int', default => 256 );
has '_logging' => ( is => 'ro', isa => 'Str', default => '2> /dev/null' );

Expand Down
14 changes: 13 additions & 1 deletion lib/Bio/PanGenome/Output/NumberOfGroups.pm
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use Moose;
use List::Util qw(shuffle);

has 'group_statistics_obj' => ( is => 'ro', isa => 'Bio::PanGenome::GroupStatistics', required => 1 );
has 'number_of_iterations' => ( is => 'ro', isa => 'Int', default => 100 );
has 'number_of_iterations' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build_number_of_iterations' );
has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'number_of_new_genes.png' );
has 'output_raw_filename_conserved_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_conserved_genes.tab' );
has 'output_raw_filename_unique_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_unique_genes.tab' );
Expand All @@ -29,6 +29,18 @@ has '_unique_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
has '_total_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
has '_new_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );

sub _build_number_of_iterations
{
my ($self) = @_;
my $number_of_iterations = 100;
my $number_of_files = @{ $self->group_statistics_obj->_sorted_file_names };
if($number_of_files > $number_of_iterations)
{
$number_of_iterations = $number_of_files;
}
return $number_of_iterations;
}

sub create_output_files {
my ($self) = @_;

Expand Down
6 changes: 0 additions & 6 deletions t/Bio/PanGenome/CommandLine/CreatePanGenome.t
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,6 @@ my %scripts_and_expected_files = (
mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files );
cleanup_files();

%scripts_and_expected_files = (
' -j Local --output_multifasta_files t/data/query_1.gff t/data/query_2.gff t/data/query_3.gff ' =>
[ 'pan_genome_sequences/00006-group_1.fa.aln', 't/data/00006-group_1.fa.aln' ],
);
mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files );
cleanup_files();

%scripts_and_expected_files = (
' -j Local --output_multifasta_files t/data/query_1.gff t/data/query_2.gff t/data/query_3.gff ' =>
Expand Down
2 changes: 1 addition & 1 deletion t/Bio/PanGenome/External/Cdhit.t
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ ok($obj = Bio::PanGenome::External::Cdhit->new(
exec => $cwd.'/t/bin/dummy_cd-hit',
),'initialise object');

is($obj->_command_to_run, $cwd.'/t/bin/dummy_cd-hit -i t/data/some_fasta_file.fa -o output -T 1 -M 900 -g 1 -s 1 -d 256 -c 1 2> /dev/null', 'Command constructed as expected');
is($obj->_command_to_run, $cwd.'/t/bin/dummy_cd-hit -i t/data/some_fasta_file.fa -o output -T 1 -M 900 -g 1 -s 0.99 -d 256 -c 0.99 2> /dev/null', 'Command constructed as expected');
ok($obj->run(), 'run dummy command');
unlink('output');
unlink('output.clstr');
Expand Down
24 changes: 0 additions & 24 deletions t/Bio/PanGenome/Output/GroupsMultifastasNucleotide.t
Original file line number Diff line number Diff line change
Expand Up @@ -43,28 +43,4 @@ is(read_file('pan_genome_sequences/00001-group_6.fa'), read_file('t/data/pan_gen
is(read_file('pan_genome_sequences/00001-yfnB.fa'), read_file('t/data/pan_genome_sequences/00001-yfnB.fa' ), 'Check multifasta content is correct for 1-yfnB.fa ');
remove_tree('pan_genome_sequences');

my $annotate_groups_all_merged = Bio::PanGenome::AnnotateGroups->new(
gff_files => $gff_files,
groups_filename => 't/data/query_groups_all_merged',
);
$annotate_groups_all_merged->reannotate;

ok(
my $obj_all_merged = Bio::PanGenome::Output::GroupsMultifastasNucleotide->new(
group_names => [ 'group_2', 'group_5' ],
gff_files => $gff_files,
annotate_groups => $annotate_groups_all_merged
),
'All groups are merged into one so it needs to be deconvoluted'
);
ok( $obj_all_merged->create_files(), 'Split out the annotation into separate group files' );


is(read_file('pan_genome_sequences/00006-different.fa'), read_file('t/data/split_pan_genome_sequences/00006-different.fa'), 'Check multifasta content correct for 00006-different.fa' );
is(read_file('pan_genome_sequences/00002-speH.fa'), read_file('t/data/split_pan_genome_sequences/00002-speH.fa'), 'Check multifasta content correct for speH.fa ' );
is(read_file('pan_genome_sequences/00002-hly.fa'), read_file('t/data/split_pan_genome_sequences/00002-hly.fa'), 'Check multifasta content correct for hly.fa ' );
is(read_file('pan_genome_sequences/00002-argF.fa'), read_file('t/data/split_pan_genome_sequences/00002-argF.fa'), 'Check multifasta content correct for argF.fa ' );
is(read_file('reannotated_groups_file'), read_file('t/data/split_pan_genome_sequences/reannotated_groups_file'),'Check multifasta content correct for reannotated_groups_file' );

remove_tree('pan_genome_sequences');
done_testing();
3 changes: 1 addition & 2 deletions t/data/clustered_proteins_pan_genome
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
group_1: abc_00004 abc_00004 abc_00004 abc_00006 abc_00006 abc_00006
speH: abc_00004 abc_00004 abc_00004 abc_00006 abc_00006 abc_00006 1_2 2_2 3_2
group_2: abc_00014 abc_00014 abc_00014 1_6 2_7 abc_00015
speH: 1_2 2_2 3_2
group_12: abc_00013 abc_00013 abc_00013
yfnB: abc_00016 abc_00016 3_5
group_5: abc_00003 abc_00003 abc_00003
Expand Down
3 changes: 1 addition & 2 deletions t/data/overall_group_statisics.csv
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","query_1","query_2","query_3"
"speH","","hypothetical protein","3","3","1","1_2","2_2","3_2"
"argF","","Ornithine carbamoyltransferase","3","3","1","1_3","2_3","3_3"
"speH","","superantigen-like protein","3","9","3","1_2","2_2","abc_00004 abc_00004 abc_00004 abc_00006 abc_00006 abc_00006 3_2"
"group_2","","Gonococcal growth inhibitor III","3","6","2","1_6","2_7","abc_00014 abc_00014 abc_00014 abc_00015"
"hly","","Alpha-toxin","3","3","1","1_1","2_1","3_1"
"yfnB","","Putative HAD-hydrolase yfnB","2","3","1.5","","abc_00016 abc_00016","3_5"
"group_12","","","1","3","3","","","abc_00013 abc_00013 abc_00013"
"group_5","","hypothetical protein","1","3","3","","","abc_00003 abc_00003 abc_00003"
"group_1","","superantigen-like protein","1","6","6","","","abc_00004 abc_00004 abc_00004 abc_00006 abc_00006 abc_00006"
"group_9","","hypothetical protein","1","3","3","","","abc_00010 abc_00010 abc_00010"
"group_8","","","1","3","3","","","abc_01705 abc_01705 abc_01705"
"group_10","","C4-dicarboxylate transporter/malic acid transport protein","1","3","3","","","abc_00011 abc_00011 abc_00011"
Expand Down