-
Notifications
You must be signed in to change notification settings - Fork 7
/
genbank2gff.pl
executable file
·78 lines (70 loc) · 2.48 KB
/
genbank2gff.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/perl -w
use Bio::SeqIO;
@ARGV = qw(-) unless @ARGV;
# user-specifiable params
my $use_source = 0;
my $pr_comment = 0;
my $pr_trans = 0;
my $no_white = 0;
my $no_tags = 0;
# fixed params
my $width = 50;
my $default_source = "GenBank";
my $usage = "";
$usage .= "Usage: $0\n";
$usage .= " [-usesource] (use bioperl 'source' tag, rather than '$default_source')\n";
$usage .= " [-comment] (also print feature table as indented comments)\n";
$usage .= " [-trans] (print translation tags)\n";
$usage .= " [-nowhite] (suppress text-with-whitespace tags)\n";
$usage .= " [-notags] (suppress ALL tags)\n";
$usage .= " <GenBank files...>\n";
my @argv;
while (@ARGV) {
my $opt = shift;
unless ($opt =~ /^-./) { push @argv, $opt; next }
if ($opt eq '-usesource') { $use_source = 1 }
elsif ($opt eq '-comment') { $pr_comment = 1 }
elsif ($opt eq '-trans') { $pr_trans = 1 }
elsif ($opt eq '-nowhite') { $no_white = 1 }
elsif ($opt eq '-notags') { $no_tags = 1 }
else { die $usage . "Unknown option: $opt\n" }
}
die $usage unless @argv == 1;
foreach my $filename (@argv) {
my $stream = Bio::SeqIO->new (-file => $filename, -format => 'GenBank');
while (my $seq = $stream->next_seq) {
if ($pr_comment) {
print "\# Features for ", $seq->display_id, "\n";
print "\# ", $seq->desc, "\n";
comment_sf ("", $seq->top_SeqFeatures);
}
foreach my $sf ($seq->all_SeqFeatures) {
my @tags = $sf->all_tags;
@tags = grep (!/translation/i, @tags) unless $pr_trans;
my @tagval;
unless ($no_tags) {
@tagval = map { $tag = $_; map ("$tag=$_", $sf->each_tag_value ($tag)) } @tags;
if ($no_white) {
@tagval = grep (!/\s/, @tagval);
} else {
grep (s/=(.*\s.*)/=\'$1\'/, @tagval); # quote whitespace
}
}
my $source = $use_source ? $sf->source_tag : $default_source;
my @gff = ($seq->display_id, $source, $sf->primary_tag, $sf->start, $sf->end, $sf->score, $sf->strand, $sf->frame, join (" ", @tagval));
# iron out a few bioperl wrinkles
$gff[5] = '+' unless defined $gff[5]; # strand
@gff = map (defined($_) ? $_ : ".", @gff); # everything else
# display
print join ("\t", @gff), "\n";
}
}
}
sub comment_sf {
my ($indent, @sf) = @_;
foreach my $sf (@sf) {
print "\# ", $indent, "\"", $sf->primary_tag, "\" start=", $sf->start, " end=", $sf->end, "\n";
my @subsf = $sf->sub_SeqFeature;
if (@subsf) { comment_sf($indent." ", @subsf) }
}
}