Skip to content

Commit

Permalink
Merge pull request #138 from andrewjpage/extract_id
Browse files Browse the repository at this point in the history
Extract IDs from GFF file using Bio::Perl
  • Loading branch information
andrewjpage committed Jun 1, 2015
2 parents 91c9269 + 0515aa5 commit d532f49
Show file tree
Hide file tree
Showing 12 changed files with 155 additions and 267 deletions.
2 changes: 1 addition & 1 deletion dist.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name = Bio-Roary
version = 2.2.6
version = 2.3.0
author = Andrew J. Page <ap13@sanger.ac.uk>
license = GPL_3
copyright_holder = Wellcome Trust Sanger Institute
Expand Down
54 changes: 54 additions & 0 deletions lib/Bio/Roary/BedFromGFFRole.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package Bio::Roary::BedFromGFFRole;

# ABSTRACT: A role to create a bed file from a gff

=head1 SYNOPSIS
A role to create a bed file from a gff
with 'Bio::Roary::BedFromGFFRole';
=cut

use Moose::Role;
use Bio::Tools::GFF;

has '_tags_to_filter' => ( is => 'ro', isa => 'Str', default => '(CDS|ncRNA|tRNA|tmRNA|rRNA)' );
has 'min_gene_size_in_nucleotides' => ( is => 'ro', isa => 'Int', default => 120 );

sub _bed_output_filename {
my ($self) = @_;
return join( '.', ( $self->output_filename, 'intermediate.bed' ) );
}

sub _create_bed_file_from_gff {
my ($self) = @_;

open( my $bed_fh, '>', $self->_bed_output_filename );
my $gffio = Bio::Tools::GFF->new( -file => $self->gff_file, -gff_version => 3 );
while ( my $feature = $gffio->next_feature() ) {

next unless defined($feature);

# Only interested in a few tags
my $tags_regex = $self->_tags_to_filter;
next if !( $feature->primary_tag =~ /$tags_regex/ );

# Must have an ID tag
next unless ( $feature->has_tag('ID') );

#filter out small genes
next if ( ( $feature->end - $feature->start ) < $self->min_gene_size_in_nucleotides );

my ( $gene_id, @junk ) = $feature->get_tag_values('ID');
$gene_id =~ s!["']!!g;
next if ( $gene_id eq "" );

my $strand = ($feature->strand > 0)? '+':'-' ;
print {$bed_fh} join( "\t", ( $feature->seq_id, $feature->start -1, $feature->end, $gene_id, 1, $strand ) ) . "\n";
}
$gffio->close();
}



1;
2 changes: 1 addition & 1 deletion lib/Bio/Roary/External/Blastp.pm
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ has '_num_threads' => ( is => 'ro', isa => 'Int', default => 1 );
has '_max_target_seqs' => ( is => 'ro', isa => 'Int', default => 2000 );
has '_logging' => ( is => 'ro', isa => 'Str', default => '2> /dev/null' );
has 'output_file' => ( is => 'ro', isa => 'Str', default => 'results.out' );
has 'perc_identity' => ( is => 'ro', isa => 'Num', default => 98 );
has 'perc_identity' => ( is => 'ro', isa => 'Num', default => 98 );

sub _command_to_run {
my ($self) = @_;
Expand Down
2 changes: 0 additions & 2 deletions lib/Bio/Roary/External/Makeblastdb.pm
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ use Cwd;
with 'Bio::Roary::JobRunner::Role';

has 'fasta_file' => ( is => 'ro', isa => 'Str', required => 1 );
has 'mask_data' => ( is => 'ro', isa => 'Str', required => 1 );
has 'exec' => ( is => 'ro', isa => 'Str', default => 'makeblastdb' );
has '_working_directory' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
has '_dbtype' => ( is => 'ro', isa => 'Str', default => 'prot' );
Expand All @@ -51,7 +50,6 @@ sub _command_to_run {
'-in', $self->fasta_file,
'-dbtype', $self->_dbtype,
'-parse_seqids',
'-mask_data', $self->mask_data,
'-out', $self->output_database,
'-logfile', $self->_logfile
)
Expand Down
81 changes: 0 additions & 81 deletions lib/Bio/Roary/External/Segmasker.pm

This file was deleted.

Loading

0 comments on commit d532f49

Please sign in to comment.