Skip to content

Commit

Permalink
Major cleanup to use standard C string functions
Browse files Browse the repository at this point in the history
  • Loading branch information
tseemann committed Oct 29, 2015
1 parent 841da70 commit 0a17fa5
Show file tree
Hide file tree
Showing 9 changed files with 35 additions and 67 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ This program finds snp sites from a multi fasta alignment file.
-p output a phylip file
-o specify an output filename
-h this help message
-V print version and exit
<file> input alignment file which can optionally be gzipped
```

Expand Down
3 changes: 3 additions & 0 deletions snp-sites.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ OPTIONS
*-h*::
This document

*-V*::
Show version and exit

EXAMPLES
--------
snp-sites my-alignment.aln
Expand Down
10 changes: 5 additions & 5 deletions src/alignment-file.c
Original file line number Diff line number Diff line change
Expand Up @@ -226,13 +226,13 @@ char * read_line(char sequence[], FILE * pFilePtr)


while((pcRes = fgets(current_line_buffer, sizeof(current_line_buffer), pFilePtr)) != NULL){
if(size_of_string(sequence) > 0)
if(strlen(sequence) > 0)
{
sequence = realloc(sequence, sizeof(char)*(size_of_string(sequence) + size_of_string(current_line_buffer) + 2) );
sequence = realloc(sequence, sizeof(char)*(strlen(sequence) + strlen(current_line_buffer) + 2) );
}
concat_strings_created_with_malloc(sequence,current_line_buffer);
strcat(sequence,current_line_buffer);
current_line_buffer[0] = '\0';
lineLength = size_of_string(sequence);
lineLength = strlen(sequence);
//if end of line character is found then exit from loop

if((sequence)[lineLength] == '\n' || (sequence)[lineLength] == '\0'){
Expand All @@ -257,7 +257,7 @@ void get_sample_names_for_header(char filename[], char ** sequence_names, int nu
seq = kseq_init(fp);

while ((l = kseq_read(seq)) >= 0) {
memcpy(sequence_names[i], seq->name.s, size_of_string(seq->name.s)+1);
strcpy(sequence_names[i], seq->name.s);
i++;
}
kseq_destroy(seq);
Expand Down
10 changes: 3 additions & 7 deletions src/fasta-of-snp-sites.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,17 @@ void create_fasta_of_snp_sites(char filename[], int number_of_snps, char ** base
FILE *fasta_file_pointer;
int sample_counter;
int snp_counter;
char * base_filename;

base_filename = (char *) calloc(FILENAME_MAX,sizeof(char));
memcpy(base_filename, filename, FILENAME_MAX*sizeof(char));
fasta_file_pointer = fopen(base_filename, "w");

fasta_file_pointer = fopen(filename, "w");

for(sample_counter=0; sample_counter< number_of_samples; sample_counter++)
{
fprintf( fasta_file_pointer, ">%s\n", sequence_names[sample_counter]);
for(snp_counter=0; snp_counter< number_of_snps; snp_counter++)
{
fprintf( fasta_file_pointer, "%c", bases_for_snps[snp_counter][sample_counter]);
fputc( bases_for_snps[snp_counter][sample_counter], fasta_file_pointer );
}
fprintf( fasta_file_pointer, "\n");
}
fclose(fasta_file_pointer);
free(base_filename);
}
6 changes: 3 additions & 3 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ int main (int argc, char **argv) {
output_phylip_file = 1;
break;
case 'o':
memcpy(output_filename, optarg, size_of_string(optarg) +1);
strncpy(output_filename, optarg, FILENAME_MAX);
break;
case 'h':
print_usage();
Expand All @@ -84,8 +84,8 @@ int main (int argc, char **argv) {

if(optind < argc)
{
memcpy(multi_fasta_filename, argv[optind], size_of_string(argv[optind]) +1);
generate_snp_sites(multi_fasta_filename, output_multi_fasta_file, output_vcf_file, output_phylip_file, output_filename);
strncpy(multi_fasta_filename, argv[optind], FILENAME_MAX);
generate_snp_sites(multi_fasta_filename, output_multi_fasta_file, output_vcf_file, output_phylip_file, output_filename);
}
else
{
Expand Down
20 changes: 8 additions & 12 deletions src/phylib-of-snp-sites.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,29 +27,25 @@

void create_phylib_of_snp_sites(char filename[], int number_of_snps, char ** bases_for_snps, char ** sequence_names, int number_of_samples)
{
FILE *fasta_file_pointer;
FILE *phylip_file_pointer;
int sample_counter;
int snp_counter;
char * base_filename;

base_filename = (char *) calloc(FILENAME_MAX,sizeof(char));
memcpy(base_filename, filename, FILENAME_MAX*sizeof(char));
fasta_file_pointer = fopen(base_filename, "w");

phylip_file_pointer = fopen(filename, "w");

fprintf( fasta_file_pointer, "%d %d\n", number_of_samples, number_of_snps);
fprintf( phylip_file_pointer, "%d %d\n", number_of_samples, number_of_snps);

for(sample_counter=0; sample_counter< number_of_samples; sample_counter++)
{
// sequence_name can be more than 10 (relaxed phylib format) and contain [\w\s]
//TODO check for illegal characters [^\w\s]
fprintf( fasta_file_pointer, "%s\t", sequence_names[sample_counter]);
fprintf( phylip_file_pointer, "%s\t", sequence_names[sample_counter]);

for(snp_counter=0; snp_counter< number_of_snps; snp_counter++)
{
fprintf( fasta_file_pointer, "%c", bases_for_snps[snp_counter][sample_counter]);
fputc( bases_for_snps[snp_counter][sample_counter], phylip_file_pointer);
}
fprintf( fasta_file_pointer, "\n");
fprintf( phylip_file_pointer, "\n");
}
fclose(fasta_file_pointer);
free(base_filename);
fclose(phylip_file_pointer);
}
33 changes: 12 additions & 21 deletions src/snp-sites.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#include "phylib-of-snp-sites.h"
#include "parse-phylip.h"
#include "string-cat.h"

#include "fasta-of-snp-sites.h"

void build_snp_locations(int snp_locations[], char reference_sequence[])
{
Expand Down Expand Up @@ -90,54 +90,45 @@ int generate_snp_sites(char filename[],int output_multi_fasta_file, int output_v
char output_filename_base[FILENAME_MAX];
char filename_without_directory[FILENAME_MAX];
strip_directory_from_filename(filename, filename_without_directory);
memcpy(output_filename_base,filename_without_directory, size_of_string(filename_without_directory)+1 );
strncpy(output_filename_base, filename_without_directory, FILENAME_MAX);

if(output_filename != NULL && *output_filename != '\0')
{
memcpy(output_filename_base,output_filename, size_of_string(output_filename)+1 );
strncpy(output_filename_base, output_filename, FILENAME_MAX);
}

if(output_vcf_file)
{
char * vcf_output_filename;
vcf_output_filename = calloc(FILENAME_MAX,sizeof(char));
memcpy(vcf_output_filename, output_filename_base, (FILENAME_MAX)*sizeof(char));
char vcf_output_filename[FILENAME_MAX];
strncpy(vcf_output_filename, output_filename_base, FILENAME_MAX);
if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') )
{
char extension[5] = {".vcf"};
concat_strings_created_with_malloc(vcf_output_filename,extension);
strcat(vcf_output_filename, ".vcf");
}

create_vcf_file(vcf_output_filename, snp_locations, number_of_snps, bases_for_snps, sequence_names, number_of_samples, length_of_genome);
free(vcf_output_filename);
}

if(output_phylip_file)
{
char *phylip_output_filename;
phylip_output_filename = calloc(FILENAME_MAX,sizeof(char));
memcpy(phylip_output_filename, output_filename_base, (FILENAME_MAX)*sizeof(char));
char phylip_output_filename[FILENAME_MAX];
strncpy(phylip_output_filename, output_filename_base, FILENAME_MAX);
if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') )
{
char extension[10] = {".phylip"};
concat_strings_created_with_malloc(phylip_output_filename,extension);
strcat(phylip_output_filename, ".phylip");
}
create_phylib_of_snp_sites(phylip_output_filename, number_of_snps, bases_for_snps, sequence_names, number_of_samples);
free(phylip_output_filename);
}

if((output_multi_fasta_file) || (output_vcf_file ==0 && output_phylip_file == 0 && output_multi_fasta_file == 0))
{
char *multi_fasta_output_filename;
multi_fasta_output_filename = calloc(FILENAME_MAX,sizeof(char));
memcpy(multi_fasta_output_filename, output_filename_base, (FILENAME_MAX)*sizeof(char));
char multi_fasta_output_filename[FILENAME_MAX];
strncpy(multi_fasta_output_filename, output_filename_base, FILENAME_MAX);
if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') )
{
char extension[20] = {".snp_sites.aln"};
concat_strings_created_with_malloc(multi_fasta_output_filename,extension);
strcat(multi_fasta_output_filename, ".snp_sites.aln");
}
create_fasta_of_snp_sites(multi_fasta_output_filename, number_of_snps, bases_for_snps, sequence_names, number_of_samples);
free(multi_fasta_output_filename);
}

// free memory
Expand Down
18 changes: 0 additions & 18 deletions src/string-cat.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,3 @@
#include <stdlib.h>
#include <string.h>


int size_of_string(char *input_string)
{
int i = 0;

while( input_string[i] != '\0')
{
i++;
}
return i;
}

void concat_strings_created_with_malloc(char *input_string, char *string_to_concat)
{
int input_str_size = size_of_string(input_string);
int to_concat_str_size = size_of_string(string_to_concat);
memcpy(input_string + input_str_size, string_to_concat, (to_concat_str_size+1)*sizeof(char));
}
1 change: 0 additions & 1 deletion src/string-cat.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#define _STRING_CAT_H_

void concat_strings_created_with_malloc(char *input_string, char *string_to_concat);
int size_of_string(char *input_string);

#endif

0 comments on commit 0a17fa5

Please sign in to comment.