-
Notifications
You must be signed in to change notification settings - Fork 7
/
gene.model
114 lines (95 loc) · 4.41 KB
/
gene.model
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
comment { Recognised GFFs: (start_exon end_exon start_end_exon exon intron ORF) }
author { Ian Holmes ihh@sanger.ac.uk }
eval { my $f; @exon_starts_at_linkend = map(($f=$_,scalar(grep($grepgfffeature =~ /exon/ && $grepgffstart==$linkend && $grepgffframe==$f, @gffcache)))[1], 0..2) }
link { comment { Start a new gene }
from { start } to { exon_start }
maxlen { 0 }
endfilter { $gfffeature =~ /exon/ && $linkend == $gffstart && $gffframe==0 }
push { "$gffframe $gffgroup" }
display { print "#\n# New gene\n#\n" }
}
link { comment { Exon }
from { exon_start } to { exon_end }
maxlen { 1000 }
endfilter { $gfffeature =~ /exon/ && $linkend == $gffend + 1 }
startfilter { $linkstart == $gffstart }
popfilter { my ($f,$g) = split; $f==$gffframe && $g eq $gffgroup }
push { $exit_frame = ($gffframe + $linklen) % 3; "$exit_frame $gffgroup" }
display { }
}
link { comment { Intron }
from { exon_end } to { exon_start }
maxlen { 1000 }
endfilter { $gfffeature =~ /intron/ && $linkend == $gffend + 1 }
startfilter { $linkstart == $gffstart }
popfilter { my ($f,$g) = split; $g eq $gffgroup }
push { $_ }
display { }
}
link { comment { End of a gene }
from { exon_end } to { end }
popfilter { 1 }
}
link { comment { Loop back, ready for another gene }
from { end } to { start }
}
link { comment { Partial end exon }
from { exon_start } to { partial_end_exon }
maxlen { 1000 }
endfilter { $gfffeature =~ /end/ && $linkend > $gffstart && ($exit_frame = ($gffframe+$linkend-$gffstart) % 3, grep($_!=$gff && $grepgfffeature =~ /exon/ && $grepgffstart<=$linkend && $grepgffend>$linkend && ($grepgffframe+$linkend-$grepgffstart) % 3 == $exit_frame, @gffcache))[1] }
startfilter { $linkstart == $gffstart }
popfilter { my ($f,$g) = split; $f==$gffframe && $g eq $gffgroup }
push { $exit_frame }
display { print "$gfftext PARTIAL HIT from $linkstart to $linkend\n" }
}
link { comment { Leap into the middle of an internal exon following a partial end exon }
from { partial_end_exon } to { exon_end }
maxlen { 1000 }
endfilter { $gfffeature =~ /exon/ && $linkend == $gffend + 1 }
startfilter { $linkstart >= $gffstart }
popfilter { $_ == $gffframe }
push { $exit_frame = ($gffframe + $linklen) % 3; "$exit_frame $gffgroup" }
display { print "$gfftext PARTIAL HIT from $linkstart to $linkend\n" }
}
link { comment { Partial internal exon }
from { exon_start } to { partial_internal_exon }
maxlen { 1000 }
endfilter { $gfffeature =~ /exon/ && $linkend > $gffstart && ($exit_frame = ($gffframe+$linkend-$gffstart) % 3, grep($_ ne $gff && $grepgfffeature =~ /start/ && $grepgffstart<=$linkend && $grepgffend>$linkend && ($grepgffframe+$linkend-$grepgffstart) % 3 == $exit_frame, @gffcache))[1] }
startfilter { $linkstart == $gffstart }
popfilter { my ($f,$g) = split; $f==$gffframe && $g eq $gffgroup }
push { $exit_frame }
display { print "$gfftext PARTIAL HIT from $linkstart to $linkend\n" }
}
link { comment { Leap into the middle of a start exon following a partial internal exon }
from { partial_internal_exon } to { exon_end }
maxlen { 1000 }
endfilter { $gfffeature =~ /start/ && $linkend == $gffend + 1 }
startfilter { $linkstart >= $gffstart }
popfilter { $_ == $gffframe }
push { $exit_frame = ($gffframe + $linklen) % 3; "$exit_frame $gffgroup" }
display { print "$gfftext PARTIAL HIT from $linkstart to $linkend\n" }
}
link { comment { Bridge exons using a convenient ORF }
from { exon_end } to { ORF }
maxlen { 1000 }
endfilter { $gfffeature =~ /ORF/ && $linkend >= $gffstart && $linkend <= $gffend + 1 }
startfilter { $linkstart >= $gffstart && $linkend <= $gffend + 1 }
popfilter { my ($f,$g) = split; $exit_frame = ($linkend-$gffstart) % 3; $f == ($linkstart-$gffstart) % 3 && $exon_starts_at_linkend[$exit_frame] }
push { $exit_frame }
display { print "$gfftext ORF from $linkstart to $linkend\n" }
}
link { comment { Extend the front of an exon using an ORF }
from { start } to { ORF }
maxlen { 1000 }
endfilter { $gfffeature =~ /ORF/ && $linkend >= $gffstart && $linkend <= $gffend + 1 && $exon_starts_at_linkend[$exit_frame = ($linkend-$gffstart) % 3] }
startfilter { $linkstart == $gffstart }
push { $exit_frame }
display { print "$gfftext ORF from $linkstart to $linkend\n" }
}
link { comment { Get the stack ready for the next exon after an ORF segment }
from { ORF } to { exon_start }
maxlen { 0 }
endfilter { $gfffeature =~ /exon/ && $linkend == $gffstart }
popfilter { $_ == $gffframe }
push { "$gffframe $gffgroup" }
}