Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Choose output file formats #2

Merged
merged 1 commit into from
Jan 25, 2012
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 39 additions & 16 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,36 +20,59 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <unistd.h>
#include "snp_sites.h"

#define MAX_FILENAME_SIZE 250

static void print_usage()
{
printf("Find SNP sites from a multi fasta alignment file (which can be gzipped)\n");
printf("./snp_sites file.aln\n");
printf("./snp_sites file.aln.gz\n\n");

printf("SNP sites are outputted in the following formats: Multi fasta alignment, phylip, VCF \n\n");
printf("Usage: snp_sites [-mvph] <file>\n");
printf("This program finds snp sites from a multi fasta alignment file.\n");
printf(" -m output a multi fasta alignment file (default)\n");
printf(" -v output a VCF file\n");
printf(" -p output a phylip file\n");
printf(" -h this help message\n");
printf(" <file> input alignment file which can optionally be gzipped\n");
}

int main (int argc, const char * argv[]) {
int main (int argc, char **argv) {
char multi_fasta_filename[MAX_FILENAME_SIZE];

if(argc <=1)
{
print_usage();
}
else if(strcmp(argv[1], "--help") == 0)
int c;
int index;
int output_multi_fasta_file = 0;
int output_vcf_file = 0;
int output_phylip_file = 0;

while ((c = getopt (argc, argv, "mvp:")) != -1)
switch (c)
{
case 'm':
output_multi_fasta_file = 1;
break;
case 'v':
output_vcf_file = 1;
break;
case 'p':
output_phylip_file = 1;
break;
case 'h':
print_usage();
return 0;
default:
output_multi_fasta_file = 1;
}

if(optind < argc)
{
print_usage();
strcpy(multi_fasta_filename,argv[optind]);
generate_snp_sites(multi_fasta_filename, output_multi_fasta_file, output_vcf_file, output_phylip_file);
}
else
{
strcpy(multi_fasta_filename,argv[1]);
generate_snp_sites(multi_fasta_filename);
print_usage();
}


return 0;
}
Expand Down
3 changes: 3 additions & 0 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@ vcf.o: vcf.c
clean:
-rm *.o

test:
cd tests && make

Binary file modified snp_sites
Binary file not shown.
18 changes: 15 additions & 3 deletions snp_sites.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ void build_snp_locations(int snp_locations[], char reference_sequence[])
}


int generate_snp_sites(char filename[])
int generate_snp_sites(char filename[],int output_multi_fasta_file, int output_vcf_file, int output_phylip_file)
{
int length_of_genome;
char * reference_sequence;
Expand Down Expand Up @@ -87,10 +87,22 @@ int generate_snp_sites(char filename[])

char filename_without_directory[MAX_FILENAME_SIZE];
strip_directory_from_filename(filename, filename_without_directory);


create_vcf_file(filename_without_directory, snp_locations, number_of_snps, bases_for_snps, sequence_names, number_of_samples);
if(output_vcf_file)
{
create_vcf_file(filename_without_directory, snp_locations, number_of_snps, bases_for_snps, sequence_names, number_of_samples);
}

if(output_phylip_file)
{
create_phylib_of_snp_sites(filename_without_directory, number_of_snps, bases_for_snps, sequence_names, number_of_samples);
create_fasta_of_snp_sites(filename_without_directory, number_of_snps, bases_for_snps, sequence_names, number_of_samples);
}

if((output_multi_fasta_file) || (output_vcf_file ==0 && output_phylip_file == 0 && output_multi_fasta_file == 0))
{
create_fasta_of_snp_sites(filename_without_directory, number_of_snps, bases_for_snps, sequence_names, number_of_samples);
}

free(snp_locations);
return 1;
Expand Down
2 changes: 1 addition & 1 deletion snp_sites.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#define _SNP_SITES_H_

void build_snp_locations(int snp_locations[], char reference_sequence[]);
int generate_snp_sites(char filename[]);
int generate_snp_sites(char filename[],int output_multi_fasta_file, int output_vcf_file, int output_phylip_file);
int refilter_existing_snps(char * reference_bases, int number_of_snps, char ** column_names, int number_of_columns,int * snp_locations, int * filtered_snp_locations);
void remove_filtered_snp_locations(int * filtered_snp_locations, int * snp_locations, int number_of_snps);
void strip_directory_from_filename(char * input_filename, char * output_filename);
Expand Down
Binary file modified tests/check_snp_sites
Binary file not shown.
6 changes: 3 additions & 3 deletions tests/check_snp_sites.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

START_TEST (valid_alignment_with_one_line_per_sequence)
{
generate_snp_sites("data/alignment_file_one_line_per_sequence.aln");
generate_snp_sites("data/alignment_file_one_line_per_sequence.aln",1,1,1);
fail_unless( compare_files("data/alignment_file_one_line_per_sequence.aln.vcf", "alignment_file_one_line_per_sequence.aln.vcf" ) == 1, "Invalid VCF file for 1 line per seq" );
fail_unless( compare_files("data/alignment_file_one_line_per_sequence.aln.phylip", "alignment_file_one_line_per_sequence.aln.phylip" ) == 1, "Invalid Phylip file for 1 line per seq" );
fail_unless( compare_files("data/alignment_file_one_line_per_sequence.aln.snp_sites.aln","alignment_file_one_line_per_sequence.aln.snp_sites.aln" ) == 1 , "Invalid ALN file for 1 line per seq");
Expand All @@ -22,7 +22,7 @@ END_TEST

START_TEST (valid_alignment_with_one_line_per_sequence_gzipped)
{
generate_snp_sites("data/alignment_file_one_line_per_sequence.aln.gz");
generate_snp_sites("data/alignment_file_one_line_per_sequence.aln.gz",1,1,1);
fail_unless( compare_files("data/alignment_file_one_line_per_sequence.aln.vcf", "alignment_file_one_line_per_sequence.aln.gz.vcf" ) == 1, "Invalid VCF file for 1 line per seq" );
fail_unless( compare_files("data/alignment_file_one_line_per_sequence.aln.phylip", "alignment_file_one_line_per_sequence.aln.gz.phylip" ) == 1, "Invalid Phylip file for 1 line per seq" );
fail_unless( compare_files("data/alignment_file_one_line_per_sequence.aln.snp_sites.aln","alignment_file_one_line_per_sequence.aln.gz.snp_sites.aln" ) == 1 , "Invalid ALN file for 1 line per seq");
Expand All @@ -34,7 +34,7 @@ END_TEST

START_TEST (valid_alignment_with_multiple_lines_per_sequence)
{
generate_snp_sites("data/alignment_file_multiple_lines_per_sequence.aln");
generate_snp_sites("data/alignment_file_multiple_lines_per_sequence.aln",1,1,1);
fail_unless( compare_files("data/alignment_file_one_line_per_sequence.aln.vcf", "alignment_file_multiple_lines_per_sequence.aln.vcf" ) == 1, "Invalid VCF file for multiple lines per seq" );
fail_unless( compare_files("data/alignment_file_one_line_per_sequence.aln.phylip", "alignment_file_multiple_lines_per_sequence.aln.phylip" ) == 1, "Invalid Phylip file for multiple lines per seq" );
fail_unless( compare_files("data/alignment_file_one_line_per_sequence.aln.snp_sites.aln","alignment_file_multiple_lines_per_sequence.aln.snp_sites.aln" ) == 1 ,"Invalid ALN file for multiple lines per seq");
Expand Down