-
Notifications
You must be signed in to change notification settings - Fork 6
/
vep_annotator.pl
79 lines (60 loc) · 4.57 KB
/
vep_annotator.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env perl
#----------------------------------
# @name GenomeVIP VEP annotator script
# @author R. Jay Mashl <rmashl@genome.wustl.edu>
# @version 0.2: allow VEP options to be passed
# @version 0.1: original version
#--------------------------------------
## song changed the parameter for vep
### 12/13/17 ###
use strict;
use warnings;
use Cwd;
use Carp;
use FileHandle;
use IO::File;
use Getopt::Long;
use POSIX qw( WIFEXITED );
use File::Temp qw/ tempfile /;
# get paras from config file
my (%paras);
map {chomp; if(!/^[#;]/ && /=/) { @_ = split /=/; $_[1] =~ s/^\s+//; $_[1] =~ s/\s+$//; my $v = $_[1]; print $v."\n"; $_[0] =~ s/ //g; $paras{ (split /\./, $_[0])[-1] } = $v } } (<>);
map { print; print "\t"; print $paras{$_}; print "\n" } keys %paras;
# check if options are present
my $opts="";
if( exists($paras{'vep_opts'}) ) { $opts = $paras{'vep_opts'} };
my $cmd="";
# split off original header
my (undef, $tmp_orig_calls) = tempfile();
$cmd="/bin/grep -v ^# $paras{'vcf'} > $tmp_orig_calls";
system($cmd);
# run vep
my (undef, $tmp_vep_out) = tempfile();
### wxs less mutation ##
#$cmd = "perl $paras{'vep_cmd'} --offline --cache --dir $paras{'cachedir'} --polyphen b --gmaf --maf_1kg --maf_esp --assembly $paras{'assembly'} --fork 4 --no_progress --no_stats --sift b --ccds --uniprot --hgvs --symbol --numbers --domains --canonical --protein --biotype --uniprot --tsl --pubmed --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --minimal --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --format vcf --vcf -i $tmp_orig_calls -o $tmp_vep_out --force_overwrite --fasta $paras{'reffasta'}";
#$cmd = "perl $paras{'vep_cmd'} --offline --cache --dir $paras{'cachedir'} --polyphen b --gmaf --maf_1kg --maf_esp --assembly $paras{'assembly'} --fork 4 --no_progress --no_stats --sift b --ccds --uniprot --hgvs --symbol --numbers --domains --canonical --protein --biotype --uniprot --tsl --pubmed --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --minimal --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --format vcf --input_file $paras{'vcf'} --output_file $paras{'output'} --force_overwrite --fasta $paras{'reffasta'}";
$cmd = "/usr/bin/perl $paras{'vep_cmd'} $opts --buffer_size 300 --offline --cache --dir $paras{'cachedir'} --assembly $paras{'assembly'} --fork 8 --no_progress --no_stats --sift b --ccds --uniprot --hgvs --symbol --numbers --domains --canonical --protein --biotype --uniprot --tsl --no_check_variants_order --pubmed --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --minimal --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --format vcf --vcf -i $tmp_orig_calls -o $tmp_vep_out --force_overwrite --fasta $paras{'reffasta'}";
## for wgs simple ##
#$cmd = "perl $paras{'vep_cmd'} $opts --buffer_size 10000 --offline --cache --dir $paras{'cachedir'} --assembly $paras{'assembly'} --fork 4 --format vcf --vcf -i $tmp_orig_calls -o $tmp_vep_out --force_overwrite --fasta $paras{'reffasta'}";
#$cmd = "perl $paras{'vep_cmd'} $opts --buffer_size 1000 --offline --cache --dir $paras{'cachedir'} --assembly $paras{'assembly'} --fork 4 --format vcf --vcf -i $tmp_orig_calls -o $tmp_vep_out --force_overwrite --everything --fasta $paras{'reffasta'}";
#variant_effect_predictor.pl --species $species --assembly $ncbi_build --offline --no_progress --no_stats --sift b --ccds --uniprot --hgvs --symbol --numbers --domains --canonical --protein --biotype --uniprot --tsl --pubmed --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --vcf --minimal --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --dir $vep_data --fasta $ref_fasta --format vcf --input_file $input_vcf --output_file $output_vcf
system($cmd);
# re-merge headers and move
my (undef, $tmp_merge) = tempfile();
$cmd = "grep ^##fileformat $tmp_vep_out > $tmp_merge";
system($cmd);
$cmd = "grep ^# $paras{'vcf'} | grep -v ^##fileformat | grep -v ^#CHROM >> $tmp_merge";
system($cmd);
$cmd = "grep -v ^##fileformat $tmp_vep_out >> $tmp_merge";
system($cmd);
$cmd = "cat $tmp_merge > $paras{'output'}";
system($cmd);
#Save other output
my @suffix=("_summary.html", "_warnings.txt");
foreach (@suffix) {
my $file = $tmp_vep_out.$_; if ( -e $file ) { $cmd = "cat $file > $paras{'output'}".$_; system($cmd); }
}
# clean up
$cmd = "rm -f $tmp_orig_calls $tmp_vep_out"."*"." ".$tmp_merge;
system($cmd);
1;