Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Core alignment missing file #70

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/Bio/PanGenome/CommandLine/CreatePanGenome.pm
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ sub BUILD {
'h|help' => \$help,
);

$self->help($help) if(defined($help));
if ( @{ $self->args } == 0 ) {
$self->_error_message("Error: You need to provide a GFF file");
}
Expand Down
1 change: 1 addition & 0 deletions lib/Bio/PanGenome/CommandLine/ExtractProteomeFromGff.pm
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ sub BUILD {
'h|help' => \$help,
);

$self->help($help) if(defined($help));
if ( @{ $self->args } == 0 ) {
$self->_error_message("Error: You need to provide a GFF file");
}
Expand Down
1 change: 1 addition & 0 deletions lib/Bio/PanGenome/CommandLine/IterativeCdhit.pm
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ sub BUILD {
'h|help' => \$help,
);

$self->help($help) if(defined($help));
$self->lower_bound_percentage($lower_bound_percentage/100) if ( defined($lower_bound_percentage) );
$self->upper_bound_percentage($upper_bound_percentage/100) if ( defined($upper_bound_percentage) );
$self->step_size_percentage($step_size_percentage/100) if ( defined($step_size_percentage) );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ sub BUILD {
'h|help' => \$help,
);

$self->help($help) if(defined($help));
if ( @{ $self->args } < 2 ) {
$self->_error_message("Error: You need to provide at least 2 FASTA files");
}
Expand Down
2 changes: 2 additions & 0 deletions lib/Bio/PanGenome/CommandLine/PanGenomeCoreAlignment.pm
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ sub BUILD {
'o|output_filename=s' => \$output_filename,
'h|help' => \$help,
);

$self->help($help) if(defined($help));

if ( defined($multifasta_base_directory) && ( -d $multifasta_base_directory ) ) {
$self->multifasta_base_directory( abs_path($multifasta_base_directory));
Expand Down
1 change: 1 addition & 0 deletions lib/Bio/PanGenome/CommandLine/PanGenomePostAnalysis.pm
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ sub BUILD {
'h|help' => \$help,
);

$self->help($help) if(defined($help));
$self->job_runner($job_runner) if (defined($job_runner) );
$self->fasta_files($fasta_files) if (defined($fasta_files));
$self->input_files($input_files) if (defined($input_files));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ sub BUILD {
'h|help' => \$help,
);

$self->help($help) if(defined($help));
$self->output_filename($output_filename) if ( defined($output_filename) );
$self->tree_file($tree_file) if ( defined($tree_file) );
$self->tree_format($tree_format) if ( defined($tree_format) );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ sub BUILD {
$self->_error_message("Error: You need to provide a FASTA file");
}

$self->help($help) if(defined($help));
$self->output_filename($output_filename) if ( defined($output_filename) );
$self->job_runner($job_runner) if ( defined($job_runner) );
$self->makeblastdb_exec($makeblastdb_exec) if ( defined($makeblastdb_exec) );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use Bio::PanGenome::AnnotateGroups;
use Bio::PanGenome::External::Muscle;
use Bio::PanGenome::External::Revtrans;
use Bio::PanGenome::Output::GroupsMultifastaProtein;
use Bio::PanGenome::SortFasta;


has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
Expand All @@ -33,6 +34,7 @@ sub BUILD {
'h|help' => \$help,
);

$self->help($help) if(defined($help));
if ( @{ $self->args } == 0 ) {
$self->_error_message("Error: You need to provide at least 1 FASTA file");
}
Expand All @@ -57,6 +59,12 @@ sub run {

for my $fasta_file (@{$self->nucleotide_fasta_files})
{

my $sort_fasta_before = Bio::PanGenome::SortFasta->new(
input_filename => $fasta_file,
);
$sort_fasta_before->sort_fasta->replace_input_with_output_file;

my $multifasta_protein_obj = Bio::PanGenome::Output::GroupsMultifastaProtein->new(
nucleotide_fasta_file => $fasta_file,
);
Expand All @@ -67,13 +75,24 @@ sub run {
job_runner => 'Local'
);
$seg->run();

my $sort_fasta_after_muscle = Bio::PanGenome::SortFasta->new(
input_filename => $multifasta_protein_obj->output_filename. '.aln',
);
$sort_fasta_after_muscle->sort_fasta->replace_input_with_output_file;

my $revtrans= Bio::PanGenome::External::Revtrans->new(
nucleotide_filename => $fasta_file,
protein_filename => $multifasta_protein_obj->output_filename. '.aln',
output_filename => $fasta_file.'.aln'
);
$revtrans->run();

my $sort_fasta_after_revtrans = Bio::PanGenome::SortFasta->new(
input_filename => $fasta_file.'.aln',
);
$sort_fasta_after_revtrans->sort_fasta->replace_input_with_output_file;

unlink($multifasta_protein_obj->output_filename);
unlink($multifasta_protein_obj->output_filename. '.aln');
}
Expand Down
6 changes: 4 additions & 2 deletions lib/Bio/PanGenome/CommandLine/QueryPanGenome.pm
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,16 @@ sub BUILD {
'h|help' => \$help,
);

$self->help($help) if(defined($help));

$self->output_filename($output_filename) if ( defined($output_filename) );
$self->action($action) if ( defined($action) );
if ( defined($groups_filename) && ( -e $groups_filename ) ) {
$self->groups_filename($groups_filename);
}

if(! (-e $self->groups_filename($groups_filename))) {
$self->_error_message("Error: Cant access the groups file $groups_filename");
if(! (-e $self->groups_filename)) {
$self->_error_message("Error: Cant access the groups file: ".$self->groups_filename);
}

@group_names = split( /,/, join( ',', @group_names ) );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ sub BUILD {
'h|help' => \$help,
);

$self->help($help) if(defined($help));
if ( @{ $self->args } == 0 ) {
$self->_error_message("Error: You need to provide a FASTA file");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ has 'exec' => ( is => 'ro', isa => 'Str', default => 'protein_mus
# Overload Role
has '_memory_required_in_mb' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__memory_required_in_mb' );
has '_queue' => ( is => 'rw', isa => 'Str', default => 'normal' );
has '_files_per_chunk' => ( is => 'ro', isa => 'Int', default => 20 );
has '_files_per_chunk' => ( is => 'ro', isa => 'Int', default => 25 );

sub _build__memory_required_in_mb {
my ($self) = @_;
Expand Down Expand Up @@ -68,7 +68,8 @@ sub run {
dont_wait => $self->dont_wait,
);
$job_runner_obj->run();


$job_runner_obj->submit_dependancy_job('pan_genome_core_alignment');
1;
}

Expand Down
32 changes: 30 additions & 2 deletions lib/Bio/PanGenome/JobRunner/LSF.pm
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ has 'commands_to_run' => ( is => 'ro', isa => 'ArrayRef', required => 1 )
has 'memory_in_mb' => ( is => 'ro', isa => 'Int', default => 500 );
has 'queue' => ( is => 'ro', isa => 'Str', default => 'normal' );
has '_job_manager' => ( is => 'ro', isa => 'LSF::JobManager', lazy => 1, builder => '_build__job_manager' );
has 'dont_wait' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'dont_wait' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'job_ids' => ( is => 'ro', isa => 'ArrayRef', default => {[]} );

sub _build__job_manager {
my ($self) = @_;
Expand All @@ -46,11 +47,25 @@ sub _submit_job {
);
}

sub _construct_dependancy_params
{
my ($self, $ids) = @_;
return '' if((! defined($ids)) || @{$ids} == 0);

my @done_ids;
for my $id ( @{$ids})
{
push(@done_ids, 'done('.$id.')');
}
return join('&&', @done_ids);
}


sub run {
my ($self) = @_;
for my $command_to_run ( @{ $self->commands_to_run } ) {
$self->_submit_job($command_to_run);
my $job_id = $self->_submit_job($command_to_run);
push(@{$self->job_ids}, $job_id);
}

if(!(defined($self->dont_wait) && $self->dont_wait == 1 ))
Expand All @@ -60,6 +75,19 @@ sub run {
1;
}

sub submit_dependancy_job {
my ( $self,$command_to_run) = @_;
$self->_job_manager->submit(
-o => "out.o",
-e => "out.e",
-M => $self->memory_in_mb,
-R => $self->_generate_memory_parameter,
-w => $self->_construct_dependancy_params($self->job_ids),
$command_to_run
);
}


no Moose;
__PACKAGE__->meta->make_immutable;

Expand Down
13 changes: 13 additions & 0 deletions lib/Bio/PanGenome/JobRunner/Local.pm
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,19 @@ sub run {
}
1;
}


sub _construct_dependancy_params
{
my ($self) = @_;
return '';
}

sub submit_dependancy_job {
my ( $self,$command_to_run) = @_;
system($command_to_run );
}

no Moose;
__PACKAGE__->meta->make_immutable;

Expand Down
68 changes: 68 additions & 0 deletions lib/Bio/PanGenome/SortFasta.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package Bio::PanGenome::SortFasta;

# ABSTRACT: sort a fasta file by name

=head1 SYNOPSIS

sort a fasta file by name
use Bio::PanGenome::SortFasta;

my $obj = Bio::PanGenome::SortFasta->new(
input_filename => 'infasta.fa',
);
$obj->sort_fasta->replace_input_with_output_file;

=cut

use Moose;
use File::Copy;
use Bio::SeqIO;

has 'input_filename' => ( is => 'ro', isa => 'Str', required => 1 );
has 'output_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build_output_filename' );

has '_input_seqio' => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__input_seqio' );
has '_output_seqio' => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__output_seqio' );

sub _build_output_filename
{
my ($self) = @_;
return $self->input_filename.".sorted.fa";
}

sub _build__input_seqio {
my ($self) = @_;
return Bio::SeqIO->new( -file => $self->input_filename, -format => 'Fasta' );
}

sub _build__output_seqio {
my ( $self) = @_;
return Bio::SeqIO->new( -file => ">".$self->output_filename, -format => 'Fasta' );
}

sub sort_fasta {
my ($self) = @_;

my %input_sequences;
while ( my $input_seq = $self->_input_seqio->next_seq() ) {
$input_sequences{$input_seq->display_id} = $input_seq;
}

for my $sequence_name(sort keys %input_sequences)
{
$self->_output_seqio->write_seq($input_sequences{$sequence_name});
}
return $self;
}

sub replace_input_with_output_file
{
my ($self) = @_;
move($self->output_filename, $self->input_filename);
return $self;
}

no Moose;
__PACKAGE__->meta->make_immutable;

1;
24 changes: 22 additions & 2 deletions t/Bio/PanGenome/CommandLine/CreatePanGenome.t
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,42 @@ with 'TestHelper';
BEGIN {
use Test::Most;
use_ok('Bio::PanGenome::CommandLine::CreatePanGenome');
use Bio::PanGenome::SequenceLengths;
}
my $script_name = 'Bio::PanGenome::CommandLine::CreatePanGenome';
my $cwd = getcwd();

local $ENV{PATH} = "$ENV{PATH}:./bin";
my %scripts_and_expected_files;
system('touch empty_file');

%scripts_and_expected_files = (
' -j Local --dont_create_rplots t/data/query_1.gff t/data/query_2.gff t/data/query_6.gff ' =>
[ 'clustered_proteins', 't/data/clustered_proteins_pan_genome' ],
' -j Local --dont_create_rplots t/data/query_1.gff t/data/query_2.gff t/data/query_6.gff ' =>
[ 'group_statisics.csv', 't/data/overall_group_statisics.csv' ],
'-h' =>
[ 'empty_file', 't/data/empty_file' ],
);
mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files, [6,7,8,9] );
cleanup_files();


%scripts_and_expected_files = (
' -j Local --dont_create_rplots --output_multifasta_files t/data/query_1.gff t/data/query_2.gff t/data/query_6.gff ' =>
[ 'pan_genome_sequences/speH.fa.aln', 't/data/speH.fa.aln' ],
' -j Local --dont_create_rplots --output_multifasta_files t/data/real_data_1.gff t/data/real_data_2.gff' =>
[ 'pan_genome_sequences/sopB.fa.aln', 't/data/sopB.fa.aln' ],
);
mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files );


ok(my $seq_len = Bio::PanGenome::SequenceLengths->new(
fasta_file => 'core_gene_alignment.aln',
), 'Check size of the core_gene_alignment.aln init');

is($seq_len->sequence_lengths->{'11111_1#11'}, 58389, 'length of first sequence');



ok(-e 'accessory.tab');
ok(-e 'core_accessory.tab');
ok(-e 'number_of_conserved_genes.Rtab');
Expand Down Expand Up @@ -69,5 +83,11 @@ sub cleanup_files
unlink('number_of_new_genes.Rtab');
unlink('number_of_unique_genes.Rtab');
unlink('query_6.gff.proteome.faa');
unlink('core_gene_alignment.aln');
unlink('blast_identity_frequency.Rtab');
unlink('real_data_1.gff.proteome.faa');
unlink('real_data_2.gff.proteome.faa');
unlink('accessory.header.embl');
unlink('core_accessory.header.embl');

}
4 changes: 3 additions & 1 deletion t/Bio/PanGenome/CommandLine/ExtractProteomeFromGff.t
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@ BEGIN {
}
my $script_name = 'Bio::PanGenome::CommandLine::ExtractProteomeFromGff';
my $cwd = getcwd();

system('touch empty_file');
my %scripts_and_expected_files = (
't/data/example_annotation.gff' =>
['example_annotation.gff.proteome.faa','t/data/example_annotation.gff.proteome.faa.expected' ],
'-h' =>
[ 'empty_file', 't/data/empty_file' ],
);

mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files );
Expand Down
4 changes: 3 additions & 1 deletion t/Bio/PanGenome/CommandLine/MergeMultipleFastaAlignments.t
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ BEGIN {
use_ok('Bio::PanGenome::CommandLine::MergeMultipleFastaAlignments');
}
my $script_name = 'Bio::PanGenome::CommandLine::MergeMultipleFastaAlignments';

system('touch empty_file');
my %scripts_and_expected_files = (
't/data/multfasta1.aln t/data/multfasta2.aln t/data/multfasta3.aln' =>
[ 'merged_alignments.aln', 't/data/expected_output_merged.aln' ],
'-o different_output_file.aln t/data/multfasta1.aln t/data/multfasta2.aln t/data/multfasta3.aln' =>
[ 'different_output_file.aln', 't/data/expected_output_merged.aln' ],
'-h' =>
[ 'empty_file', 't/data/empty_file' ],
);

mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files );
Expand Down
Loading