Skip to content

Commit

Permalink
Merge pull request #20 from sanger-pathogens/BT_add_vcf_sequence_length
Browse files Browse the repository at this point in the history
Add reference sequence length to VCF output
  • Loading branch information
aslett1 committed Jul 17, 2015
2 parents 3451d0a + b4497b1 commit bd2428e
Show file tree
Hide file tree
Showing 6 changed files with 10 additions and 7 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.0.0
2.0.1
2 changes: 1 addition & 1 deletion src/snp-sites.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ int generate_snp_sites(char filename[],int output_multi_fasta_file, int output_v
concat_strings_created_with_malloc(vcf_output_filename,extension);
}

create_vcf_file(vcf_output_filename, snp_locations, number_of_snps, bases_for_snps, sequence_names, number_of_samples);
create_vcf_file(vcf_output_filename, snp_locations, number_of_snps, bases_for_snps, sequence_names, number_of_samples, length_of_genome);
free(vcf_output_filename);
}

Expand Down
7 changes: 4 additions & 3 deletions src/vcf.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@
#include "snp-sites.h"
#include <assert.h>

void create_vcf_file(char filename[], int snp_locations[],int number_of_snps, char ** bases_for_snps, char ** sequence_names, int number_of_samples)
void create_vcf_file(char filename[], int snp_locations[],int number_of_snps, char ** bases_for_snps, char ** sequence_names, int number_of_samples, size_t length_of_genome)
{
FILE *vcf_file_pointer;
char * base_filename;
base_filename = (char *) malloc(MAX_FILENAME_SIZE*sizeof(char));
strcpy(base_filename, filename);

vcf_file_pointer=fopen(base_filename, "w");
output_vcf_header(vcf_file_pointer,sequence_names, number_of_samples);
output_vcf_header(vcf_file_pointer,sequence_names, number_of_samples, length_of_genome);
output_vcf_snps(vcf_file_pointer, bases_for_snps, snp_locations, number_of_snps, number_of_samples);
fclose(vcf_file_pointer);
free(base_filename);
Expand All @@ -51,10 +51,11 @@ void output_vcf_snps(FILE * vcf_file_pointer, char ** bases_for_snps, int * snp_
}
}

void output_vcf_header( FILE * vcf_file_pointer, char ** sequence_names, int number_of_samples)
void output_vcf_header( FILE * vcf_file_pointer, char ** sequence_names, int number_of_samples, size_t length_of_genome)
{
int i;
fprintf( vcf_file_pointer, "##fileformat=VCFv4.1\n" );
fprintf( vcf_file_pointer, "##contig=<ID=1,length=%i>\n", length_of_genome );
fprintf( vcf_file_pointer, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n" );
fprintf( vcf_file_pointer, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" );

Expand Down
4 changes: 2 additions & 2 deletions src/vcf.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
#ifndef _VCF_H_
#define _VCF_H_

void output_vcf_header( FILE * vcf_file_pointer, char ** sequence_names, int number_of_samples);
void create_vcf_file(char filename[], int snp_locations[], int number_of_snps, char ** bases_for_snps, char ** sequence_names, int number_of_samples);
void output_vcf_header( FILE * vcf_file_pointer, char ** sequence_names, int number_of_samples, size_t length_of_genome);
void create_vcf_file(char filename[], int snp_locations[], int number_of_snps, char ** bases_for_snps, char ** sequence_names, int number_of_samples, size_t length_of_genome);
void output_vcf_snps(FILE * vcf_file_pointer, char ** bases_for_snps, int * snp_locations, int number_of_snps, int number_of_samples);
void output_vcf_row(FILE * vcf_file_pointer, char * bases_for_snp, int snp_location, int number_of_samples);
void output_vcf_row_samples_bases(FILE * vcf_file_pointer, char reference_base, char * alt_bases, char * bases_for_snp, int number_of_samples);
Expand Down
1 change: 1 addition & 0 deletions tests/data/alignment_file_one_line_per_sequence.aln.vcf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
##fileformat=VCFv4.1
##contig=<ID=1,length=2000>
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 2956_6_1 2956_6_2 2956_6_3 2956_6_4 2956_6_5 2956_6_6 3002_8_1 3002_8_2 3002_8_3 3002_8_4 3002_8_5 3002_8_6 3002_8_7 4056_2_10 4056_2_11 4056_2_1 4056_2_12 4056_2_2 4056_2_3 4056_2_4 4056_2_5 4056_2_6 4056_2_7 4056_2_9 4056_6_10 4056_6_11 4056_6_12 4056_6_2 4056_6_3 4056_6_4 4056_6_5 4056_6_6 4056_6_7 4056_6_9 4056_7_10 4056_7_11 4056_7_1 4056_7_12 4056_7_7 4056_7_8 4056_7_9 4056_8_10 4056_8_1 4056_8_12 4056_8_2 4056_8_3 4056_8_4 4056_8_6 4056_8_8 4056_8_9 4075_3_11 4075_3_12 4075_3_2 4075_3_3 4075_3_5 4075_3_6 4075_3_7 4075_3_8 4075_3_9 4370_2_11 4370_2_12 4370_2_2 4370_2_3 4370_2_4 4370_2_7 4370_2_8 4370_2_9 4370_3_11 4370_3_1 4370_3_6 4370_3_7 4370_3_8 5174_5_1 5174_5_2 5174_5_3 5174_5_4 5174_5_5 5174_5_6 5174_5_7 5174_5_9 5174_6_10 5174_6_1 5174_6_2 5174_6_3 5174_6_4 5174_6_5 5174_6_6 5174_6_7 5174_6_8 5174_6_9 5174_7_10 5174_7_1 5174_7_2 5174_7_3 5174_7_4 5174_7_5 5174_7_6 5174_7_7 5174_7_8 5174_7_9 5174_8_1 5174_8_2 5174_8_3 5174_8_5 5174_8_6 5174_8_8 5174_8_9 Vibrio_parahaemolyticus Vibrio_vulnificus
1 825 . A G . . . GT 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
Expand Down
1 change: 1 addition & 0 deletions tests/data/alignment_file_with_n.aln.vcf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
##fileformat=VCFv4.1
##contig=<ID=1,length=2000>
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 2956_6_1 2956_6_2 2956_6_3 2956_6_4 2956_6_5 2956_6_6 3002_8_1 3002_8_2 3002_8_3 3002_8_4 3002_8_5 3002_8_6 3002_8_7 4056_2_10 4056_2_11 4056_2_1 4056_2_12 4056_2_2 4056_2_3 4056_2_4 4056_2_5 4056_2_6 4056_2_7 4056_2_9 4056_6_10 4056_6_11 4056_6_12 4056_6_2 4056_6_3 4056_6_4 4056_6_5 4056_6_6 4056_6_7 4056_6_9 4056_7_10 4056_7_11 4056_7_1 4056_7_12 4056_7_7 4056_7_8 4056_7_9 4056_8_10 4056_8_1 4056_8_12 4056_8_2 4056_8_3 4056_8_4 4056_8_6 4056_8_8 4056_8_9 4075_3_11 4075_3_12 4075_3_2 4075_3_3 4075_3_5 4075_3_6 4075_3_7 4075_3_8 4075_3_9 4370_2_11 4370_2_12 4370_2_2 4370_2_3 4370_2_4 4370_2_7 4370_2_8 4370_2_9 4370_3_11 4370_3_1 4370_3_6 4370_3_7 4370_3_8 5174_5_1 5174_5_2 5174_5_3 5174_5_4 5174_5_5 5174_5_6 5174_5_7 5174_5_9 5174_6_10 5174_6_1 5174_6_2 5174_6_3 5174_6_4 5174_6_5 5174_6_6 5174_6_7 5174_6_8 5174_6_9 5174_7_10 5174_7_1 5174_7_2 5174_7_3 5174_7_4 5174_7_5 5174_7_6 5174_7_7 5174_7_8 5174_7_9 5174_8_1 5174_8_2 5174_8_3 5174_8_5 5174_8_6 5174_8_8 5174_8_9 Vibrio_parahaemolyticus Vibrio_vulnificus
1 825 . A G . . . GT 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
Expand Down

0 comments on commit bd2428e

Please sign in to comment.