diff --git a/README.md b/README.md index 106fe36..fb197c2 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ This program finds snp sites from a multi fasta alignment file. -p output a phylip file -o specify an output filename -h this help message + -V print version and exit input alignment file which can optionally be gzipped ``` diff --git a/snp-sites.txt b/snp-sites.txt index b1dd7f7..5e39abe 100644 --- a/snp-sites.txt +++ b/snp-sites.txt @@ -34,6 +34,9 @@ OPTIONS *-h*:: This document +*-V*:: + Show version and exit + EXAMPLES -------- snp-sites my-alignment.aln diff --git a/src/alignment-file.c b/src/alignment-file.c index 16004e1..a3e948e 100644 --- a/src/alignment-file.c +++ b/src/alignment-file.c @@ -226,13 +226,13 @@ char * read_line(char sequence[], FILE * pFilePtr) while((pcRes = fgets(current_line_buffer, sizeof(current_line_buffer), pFilePtr)) != NULL){ - if(size_of_string(sequence) > 0) + if(strlen(sequence) > 0) { - sequence = realloc(sequence, sizeof(char)*(size_of_string(sequence) + size_of_string(current_line_buffer) + 2) ); + sequence = realloc(sequence, sizeof(char)*(strlen(sequence) + strlen(current_line_buffer) + 2) ); } - concat_strings_created_with_malloc(sequence,current_line_buffer); + strcat(sequence,current_line_buffer); current_line_buffer[0] = '\0'; - lineLength = size_of_string(sequence); + lineLength = strlen(sequence); //if end of line character is found then exit from loop if((sequence)[lineLength] == '\n' || (sequence)[lineLength] == '\0'){ @@ -257,7 +257,7 @@ void get_sample_names_for_header(char filename[], char ** sequence_names, int nu seq = kseq_init(fp); while ((l = kseq_read(seq)) >= 0) { - memcpy(sequence_names[i], seq->name.s, size_of_string(seq->name.s)+1); + strcpy(sequence_names[i], seq->name.s); i++; } kseq_destroy(seq); diff --git a/src/fasta-of-snp-sites.c b/src/fasta-of-snp-sites.c index 69a0b2f..6970af3 100644 --- a/src/fasta-of-snp-sites.c +++ b/src/fasta-of-snp-sites.c @@ -29,21 +29,17 @@ void create_fasta_of_snp_sites(char filename[], int number_of_snps, char ** base FILE *fasta_file_pointer; int sample_counter; int snp_counter; - char * base_filename; - - base_filename = (char *) calloc(FILENAME_MAX,sizeof(char)); - memcpy(base_filename, filename, FILENAME_MAX*sizeof(char)); - fasta_file_pointer = fopen(base_filename, "w"); + + fasta_file_pointer = fopen(filename, "w"); for(sample_counter=0; sample_counter< number_of_samples; sample_counter++) { fprintf( fasta_file_pointer, ">%s\n", sequence_names[sample_counter]); for(snp_counter=0; snp_counter< number_of_snps; snp_counter++) { - fprintf( fasta_file_pointer, "%c", bases_for_snps[snp_counter][sample_counter]); + fputc( bases_for_snps[snp_counter][sample_counter], fasta_file_pointer ); } fprintf( fasta_file_pointer, "\n"); } fclose(fasta_file_pointer); - free(base_filename); } diff --git a/src/main.c b/src/main.c index 07434ab..add2991 100644 --- a/src/main.c +++ b/src/main.c @@ -73,7 +73,7 @@ int main (int argc, char **argv) { output_phylip_file = 1; break; case 'o': - memcpy(output_filename, optarg, size_of_string(optarg) +1); + strncpy(output_filename, optarg, FILENAME_MAX); break; case 'h': print_usage(); @@ -84,8 +84,8 @@ int main (int argc, char **argv) { if(optind < argc) { - memcpy(multi_fasta_filename, argv[optind], size_of_string(argv[optind]) +1); - generate_snp_sites(multi_fasta_filename, output_multi_fasta_file, output_vcf_file, output_phylip_file, output_filename); + strncpy(multi_fasta_filename, argv[optind], FILENAME_MAX); + generate_snp_sites(multi_fasta_filename, output_multi_fasta_file, output_vcf_file, output_phylip_file, output_filename); } else { diff --git a/src/phylib-of-snp-sites.c b/src/phylib-of-snp-sites.c index daf9b45..9b26fd3 100644 --- a/src/phylib-of-snp-sites.c +++ b/src/phylib-of-snp-sites.c @@ -27,29 +27,25 @@ void create_phylib_of_snp_sites(char filename[], int number_of_snps, char ** bases_for_snps, char ** sequence_names, int number_of_samples) { - FILE *fasta_file_pointer; + FILE *phylip_file_pointer; int sample_counter; int snp_counter; - char * base_filename; - - base_filename = (char *) calloc(FILENAME_MAX,sizeof(char)); - memcpy(base_filename, filename, FILENAME_MAX*sizeof(char)); - fasta_file_pointer = fopen(base_filename, "w"); + + phylip_file_pointer = fopen(filename, "w"); - fprintf( fasta_file_pointer, "%d %d\n", number_of_samples, number_of_snps); + fprintf( phylip_file_pointer, "%d %d\n", number_of_samples, number_of_snps); for(sample_counter=0; sample_counter< number_of_samples; sample_counter++) { // sequence_name can be more than 10 (relaxed phylib format) and contain [\w\s] //TODO check for illegal characters [^\w\s] - fprintf( fasta_file_pointer, "%s\t", sequence_names[sample_counter]); + fprintf( phylip_file_pointer, "%s\t", sequence_names[sample_counter]); for(snp_counter=0; snp_counter< number_of_snps; snp_counter++) { - fprintf( fasta_file_pointer, "%c", bases_for_snps[snp_counter][sample_counter]); + fputc( bases_for_snps[snp_counter][sample_counter], phylip_file_pointer); } - fprintf( fasta_file_pointer, "\n"); + fprintf( phylip_file_pointer, "\n"); } - fclose(fasta_file_pointer); - free(base_filename); + fclose(phylip_file_pointer); } diff --git a/src/snp-sites.c b/src/snp-sites.c index 0477d6c..063003e 100644 --- a/src/snp-sites.c +++ b/src/snp-sites.c @@ -29,7 +29,7 @@ #include "phylib-of-snp-sites.h" #include "parse-phylip.h" #include "string-cat.h" - +#include "fasta-of-snp-sites.h" void build_snp_locations(int snp_locations[], char reference_sequence[]) { @@ -90,54 +90,45 @@ int generate_snp_sites(char filename[],int output_multi_fasta_file, int output_v char output_filename_base[FILENAME_MAX]; char filename_without_directory[FILENAME_MAX]; strip_directory_from_filename(filename, filename_without_directory); - memcpy(output_filename_base,filename_without_directory, size_of_string(filename_without_directory)+1 ); + strncpy(output_filename_base, filename_without_directory, FILENAME_MAX); if(output_filename != NULL && *output_filename != '\0') { - memcpy(output_filename_base,output_filename, size_of_string(output_filename)+1 ); + strncpy(output_filename_base, output_filename, FILENAME_MAX); } if(output_vcf_file) { - char * vcf_output_filename; - vcf_output_filename = calloc(FILENAME_MAX,sizeof(char)); - memcpy(vcf_output_filename, output_filename_base, (FILENAME_MAX)*sizeof(char)); + char vcf_output_filename[FILENAME_MAX]; + strncpy(vcf_output_filename, output_filename_base, FILENAME_MAX); if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') ) { - char extension[5] = {".vcf"}; - concat_strings_created_with_malloc(vcf_output_filename,extension); + strcat(vcf_output_filename, ".vcf"); } create_vcf_file(vcf_output_filename, snp_locations, number_of_snps, bases_for_snps, sequence_names, number_of_samples, length_of_genome); - free(vcf_output_filename); } if(output_phylip_file) { - char *phylip_output_filename; - phylip_output_filename = calloc(FILENAME_MAX,sizeof(char)); - memcpy(phylip_output_filename, output_filename_base, (FILENAME_MAX)*sizeof(char)); + char phylip_output_filename[FILENAME_MAX]; + strncpy(phylip_output_filename, output_filename_base, FILENAME_MAX); if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') ) { - char extension[10] = {".phylip"}; - concat_strings_created_with_malloc(phylip_output_filename,extension); + strcat(phylip_output_filename, ".phylip"); } create_phylib_of_snp_sites(phylip_output_filename, number_of_snps, bases_for_snps, sequence_names, number_of_samples); - free(phylip_output_filename); } if((output_multi_fasta_file) || (output_vcf_file ==0 && output_phylip_file == 0 && output_multi_fasta_file == 0)) { - char *multi_fasta_output_filename; - multi_fasta_output_filename = calloc(FILENAME_MAX,sizeof(char)); - memcpy(multi_fasta_output_filename, output_filename_base, (FILENAME_MAX)*sizeof(char)); + char multi_fasta_output_filename[FILENAME_MAX]; + strncpy(multi_fasta_output_filename, output_filename_base, FILENAME_MAX); if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') ) { - char extension[20] = {".snp_sites.aln"}; - concat_strings_created_with_malloc(multi_fasta_output_filename,extension); + strcat(multi_fasta_output_filename, ".snp_sites.aln"); } create_fasta_of_snp_sites(multi_fasta_output_filename, number_of_snps, bases_for_snps, sequence_names, number_of_samples); - free(multi_fasta_output_filename); } // free memory diff --git a/src/string-cat.c b/src/string-cat.c index 6385e3b..72d9a62 100644 --- a/src/string-cat.c +++ b/src/string-cat.c @@ -22,21 +22,3 @@ #include #include - -int size_of_string(char *input_string) -{ - int i = 0; - - while( input_string[i] != '\0') - { - i++; - } - return i; -} - -void concat_strings_created_with_malloc(char *input_string, char *string_to_concat) -{ - int input_str_size = size_of_string(input_string); - int to_concat_str_size = size_of_string(string_to_concat); - memcpy(input_string + input_str_size, string_to_concat, (to_concat_str_size+1)*sizeof(char)); -} diff --git a/src/string-cat.h b/src/string-cat.h index c521a6a..f362b95 100644 --- a/src/string-cat.h +++ b/src/string-cat.h @@ -22,7 +22,6 @@ #define _STRING_CAT_H_ void concat_strings_created_with_malloc(char *input_string, char *string_to_concat); -int size_of_string(char *input_string); #endif