Skip to content

Commit

Permalink
Merge pull request #126 from andrewjpage/cleanup_protein_files
Browse files Browse the repository at this point in the history
Cleanup files
  • Loading branch information
martinghunt committed May 21, 2015
2 parents c5251f4 + df023e0 commit 92293bb
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 39 deletions.
2 changes: 1 addition & 1 deletion dist.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name = Bio-Roary
version = 2.2.2
version = 2.2.3
author = Andrew J. Page <ap13@sanger.ac.uk>
license = GPL_3
copyright_holder = Wellcome Trust Sanger Institute
Expand Down
2 changes: 1 addition & 1 deletion lib/Bio/Roary/External/Mcl.pm
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ sub _command_to_run {
" ",
(
$self->mcxdeblast_exec, '-m9', '--score='.$self->_score,
'--line-mode=abc', $self->blast_results,
'--line-mode=abc', $self->blast_results, '2> /dev/null',
'|', $self->mcl_exec, '-', '--abc',
'-I', $self->_inflation_value, '-o', $self->output_file,
$self->_logging
Expand Down
5 changes: 5 additions & 0 deletions lib/Bio/Roary/PostAnalysis.pm
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,11 @@ sub _delete_intermediate_files
my ($self) = @_;
return if($self->dont_delete_files == 1);

for my $fasta_file (@{$self->fasta_files})
{
unlink($fasta_file) if(-e $fasta_file);
}

unlink($self->_output_mcl_filename) ;
unlink($self->_output_inflate_clusters_filename) ;
unlink($self->_output_group_labels_filename) ;
Expand Down
29 changes: 8 additions & 21 deletions lib/Bio/Roary/SplitGroups.pm
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ has 'outfile' => ( is => 'ro', isa => 'Str', required => 1 );
has 'iterations' => ( is => 'ro', isa => 'Int', default => 5 );
has 'dont_delete' => ( is => 'ro', isa => 'Bool', default => 0 );

has '_outfile_handle' => ( is => 'ro', lazy_build => 1 );
has '_neighbourhood_size' => ( is => 'ro', isa => 'Int', default => 5 );

has '_group_filelist' => ( is => 'rw', isa => 'ArrayRef', lazy_build => 1 );
has '_tmp_dir' => ( is => 'ro', isa => 'Str', default => 'split_groups' );
has '_tmp_dir_object' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
has '_tmp_dir' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__tmp_dir' );

has '_analyse_groups_obj' => ( is => 'ro', lazy_build => 1 );
has '_genes_to_files' => ( is => 'ro', lazy_build => 1 );
Expand All @@ -45,11 +45,9 @@ has '_gene_files_temp_dir_obj' =>

has '_do_sorting' => ( is => 'rw', isa => 'Bool', default => 0 ); # set to 1 for testing only

sub _build__outfile_handle {
my ( $self ) = @_;

open( my $fh, '>', $self->outfile );
return $fh;
sub _build__tmp_dir {
my ($self) = @_;
return $self->_tmp_dir_object->dirname();
}

sub _build__analyse_groups_obj {
Expand Down Expand Up @@ -79,14 +77,6 @@ sub _build__group_filelist {
return \@filelist;
}

sub _make_tmp_dir {
my ( $self ) = @_;
my $dir = $self->_tmp_dir;
unless ( -e $dir ) {
make_path($dir) or die "Cannot make dir: $dir\n" ;
}
}

sub _build__genes_to_neighbourhood
{
my ( $self ) = @_;
Expand Down Expand Up @@ -119,8 +109,6 @@ sub _build__genes_to_neighbourhood
sub split_groups {
my ( $self ) = @_;

$self->_make_tmp_dir;

# iteratively
for my $x ( 0..($self->iterations - 1) ){
my ( $in_groups, $out_groups ) = $self->_get_files_for_iteration( $x );
Expand Down Expand Up @@ -168,24 +156,23 @@ sub split_groups {
}
close( $outfile_handle );
}

remove_tree( $self->_tmp_dir ) unless ( $self->dont_delete );
}

sub _set_genes_to_groups {
my ( $self, $groupfile ) = @_;

my %genes2groups;
my $c = 0;
open( GFH, '<', $groupfile );
while( my $line = <GFH> ){
open( my $gfh, '<', $groupfile );
while( my $line = <$gfh> ){
chomp $line;
my @genes = split( /\s+/, $line );
for my $g ( @genes ){
$genes2groups{$g} = $c;
}
$c++;
}
close($gfh);
$self->_genes_to_groups( \%genes2groups );
}

Expand Down
56 changes: 41 additions & 15 deletions t/Bio/Roary/CommandLine/Roary.t
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,24 @@ cleanup_files();
);

mock_execute_script_and_check_output_sorted_groups( $script_name, \%scripts_and_expected_files, [0,6,7,8,9] );

# Make sure faa files are cleaned up automatically
ok(!(-e 'query_1.gff.proteome.faa'),'Check protein query_1.gff.proteome.faa is cleaned up');
ok(!(-e 'query_2.gff.proteome.faa'),'Check protein query_2.gff.proteome.faa is cleaned up');
ok(!(-e 'query_5.gff.proteome.faa'),'Check protein query_5.gff.proteome.faa is cleaned up');


%scripts_and_expected_files = (
'-j Local --dont_delete_files t/data/query_1.gff t/data/query_2.gff t/data/query_5.gff ' =>
[ 'empty_file', 't/data/empty_file' ],
);
mock_execute_script_and_check_output_sorted_groups( $script_name, \%scripts_and_expected_files, [0,6,7,8,9] );
ok((-e 'query_1.gff.proteome.faa'),'Check protein query_1.gff.proteome.faa is not cleaned up');
ok((-e 'query_2.gff.proteome.faa'),'Check protein query_2.gff.proteome.faa is not cleaned up');
ok((-e 'query_5.gff.proteome.faa'),'Check protein query_5.gff.proteome.faa is not cleaned up');

cleanup_files();

SKIP:
{

Expand Down Expand Up @@ -89,32 +105,42 @@ done_testing();
sub cleanup_files
{
remove_tree('pan_genome_sequences');
unlink('_blast_results');
unlink('_clustered');
unlink('_clustered.bak.clstr');
unlink('_clustered.clstr');
unlink('_combined_files');
unlink('_combined_files.groups');
unlink('_fasta_files');
unlink('_gff_files');
unlink('_inflated_mcl_groups');
unlink('_inflated_unsplit_mcl_groups');
unlink('_labeled_mcl_groups');
unlink('_uninflated_mcl_groups');
unlink('accessory.header.embl');
unlink('accessory.header.tab');
unlink('accessory.tab');
unlink('blast_identity_frequency.Rtab');
unlink('clustered_proteins');
unlink('core_accessory.header.embl');
unlink('core_accessory.header.tab');
unlink('core_accessory.tab');
unlink('core_gene_alignment.aln');
unlink('database_masking.asnb');
unlink('example_1.faa.tmp.filtered.fa');
unlink('example_2.faa.tmp.filtered.fa');
unlink('example_3.faa.tmp.filtered.fa');
unlink('gene_presence_absence.csv');
unlink('query_1.gff.proteome.faa');
unlink('query_2.gff.proteome.faa');
unlink('query_3.gff.proteome.faa');
unlink('_clustered');
unlink('_clustered.bak.clstr');
unlink('pan_genome.fa');
unlink('core_accessory.header.tab');
unlink('accessory.header.tab');
unlink('accessory.tab');
unlink('core_accessory.tab');
unlink('number_of_conserved_genes.Rtab');
unlink('number_of_genes_in_pan_genome.Rtab');
unlink('number_of_new_genes.Rtab');
unlink('number_of_unique_genes.Rtab');
unlink('pan_genome.fa');
unlink('query_1.gff.proteome.faa');
unlink('query_2.gff.proteome.faa');
unlink('query_3.gff.proteome.faa');
unlink('query_5.gff.proteome.faa');
unlink('core_gene_alignment.aln');
unlink('blast_identity_frequency.Rtab');
unlink('real_data_1.gff.proteome.faa');
unlink('real_data_2.gff.proteome.faa');
unlink('accessory.header.embl');
unlink('core_accessory.header.embl');

}
2 changes: 1 addition & 1 deletion t/Bio/Roary/External/Mcl.t
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ ok(
is(
$obj->_command_to_run,
$cwd
. '/t/bin/dummy_mcxdeblast -m9 --score=r --line-mode=abc some_blast_results | '
. '/t/bin/dummy_mcxdeblast -m9 --score=r --line-mode=abc some_blast_results 2> /dev/null | '
. $cwd
. '/t/bin/dummy_mcl - --abc -I 1.5 -o output.groups > /dev/null 2>&1',
'Command constructed as expected'
Expand Down

0 comments on commit 92293bb

Please sign in to comment.