-
Notifications
You must be signed in to change notification settings - Fork 55
/
gerp_dist.pl
136 lines (123 loc) · 4.6 KB
/
gerp_dist.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
use strict;
sub get_gerp_weighted_dist {
my ($tr, $pos, $gerp_db, $cons_db) = @_[0..3];
# collect some variables
my $chr = $tr->seq_region_name();
my @exons = @{ $tr->get_all_Exons };
my $strand = $tr->strand();
my $transcript_id = $tr->{stable_id};
my $number_of_exons = scalar @exons;
# determine boundaries of CDS sequence
my ($stop_codon_pos, $start_codon_pos);
if ($strand == 1) {
$stop_codon_pos = $tr->{coding_region_end};
$start_codon_pos = $tr->{coding_region_start};
} elsif ($strand == -1) {
$stop_codon_pos = $tr->{coding_region_start};
$start_codon_pos = $tr->{coding_region_end};
}
# get distance to from variant to stop codon, weighted by GERP
my $weighted_dist = 0;
my $dist = 0;
for (my $i=0; $i <= $number_of_exons - 1; $i++) {
my $current_exon = $exons[$i];
# skip exons upstream of variant
if ($strand == -1) {
next if $pos < $current_exon->start;
} else {
next if $pos > $current_exon->end;
}
# determine if last exon by checking if exon spans stop codon position
my $last_exon = 0;
if ($strand == 1) {
$last_exon = ($current_exon->start < $stop_codon_pos) && ($current_exon->end >= $stop_codon_pos);
} else {
$last_exon = ($current_exon->end > $stop_codon_pos) && ($current_exon->start <= $stop_codon_pos);
}
# get contribution of current exon to total weighted distance
my ($start, $end, $wd);
my $in_affected_exon = ($pos >= $current_exon->start) && ($pos <= $current_exon->end);
if ($last_exon) {
if ($in_affected_exon) {
$start = $pos;
} else {
$start = ($strand == 1) ? $current_exon->start : $current_exon->end;
}
$end = $stop_codon_pos;
$wd = get_interval_gerp($chr, $start, $end, $gerp_db);
} elsif ($in_affected_exon) {
$start = $pos;
$end = ($strand == 1) ? $current_exon->{end} : $current_exon->{start};
$wd = get_interval_gerp($chr, $start, $end, $gerp_db);
} else {
my $exon_num = $i + 1;
$wd = get_exon_gerp($transcript_id, $exon_num, $cons_db);
$start = $current_exon->start;
$end = $current_exon->end;
}
$weighted_dist = $weighted_dist + $wd;
$dist = $dist + (abs ($end - $start));
}
return ($weighted_dist, $dist);
}
sub get_interval_gerp {
my ($chrom, $a, $b, $gerp_db) = @_[0..3];
if ($gerp_db =~ 'tabix') {
return (get_interval_gerp_tabix($chrom, $a, $b, $gerp_db));
} else {
return (get_interval_gerp_db($chrom, $a, $b, $gerp_db));
}
}
sub get_interval_gerp_db {
my ($chrom, $a, $b, $gerp_db) = @_[0..3];
if ($a > $b) { my $tmp = $a; $a = $b; $b = $tmp; }
my $sql_query = $gerp_db->prepare("SELECT sum(gerp) as gerp FROM gerp_bases where chrom = ? AND pos >= ? AND pos <= ?;");
$sql_query->execute($chrom, $a, $b) or die("MySQL ERROR: $!");
my $results = $sql_query->fetchrow_hashref;
$sql_query->finish();
return ($results->{gerp});
}
sub get_interval_gerp_tabix {
my ($chrom, $a, $b, $gerp_db) = @_[0..3];
if ($a > $b) { my $tmp = $a; $a = $b; $b = $tmp; }
my @results = split /\n/, `$gerp_db $chrom:$a-$b 2>&1`;
my $gerp = 0;
for my $row (@results) {
my @res = split /\t/, $row;
$gerp += $res[2];
}
return ($gerp);
}
sub get_bp_gerp {
my ($chrom, $pos, $gerp_db) = @_[0..3];
if ($gerp_db =~ 'tabix') {
return (get_bp_gerp_tabix($chrom, $pos, $gerp_db));
} else {
return (get_bp_gerp_db($chrom, $pos, $gerp_db));
}
}
sub get_bp_gerp_db {
my ($chrom, $pos, $gerp_db) = @_[0..2];
my $sql_query = $gerp_db->prepare("SELECT * FROM gerp_bases where chrom = ? AND pos = ?;");
$sql_query->execute($chrom, $pos) or die("MySQL ERROR: $!");
my $results = $sql_query->fetchrow_hashref;
$sql_query->finish();
return ($results->{gerp});
}
sub get_bp_gerp_tabix {
my ($chrom, $pos, $gerp_db) = @_[0..2];
my $res = `$gerp_db $chrom:$pos-$pos 2>&1`;
my @results = split /\t/, $res;
return ($results[2]);
}
sub get_exon_gerp {
my $transcript_id = shift;
my $exon_number = shift;
my $gerp_db = shift;
my $sql_query = $gerp_db->prepare("SELECT * FROM gerp_exons where transcript_id = ? AND exon_num = ?; ");
$sql_query->execute($transcript_id, $exon_number) or die("MySQL ERROR: $!");
my $results = $sql_query->fetchrow_hashref;
$sql_query->finish();
return ($results->{gerp});
}
1;