utility_functions.c

/* utility_functions.c - List manipulation functions, element
 * constructors, and macro definitions for leg markdown parser. */

#include "utility_functions.h"
#include "markdown_peg.h"

#include <string.h>
#include <assert.h>


/**********************************************************************

  List manipulation functions

 ***********************************************************************/

/* cons - cons an element onto a list, returning pointer to new head */
element * cons(element *new, element *list) {
    assert(new != NULL);
    new->next = list;
    return new;
}

/* reverse - reverse a list, returning pointer to new list */
element *reverse(element *list) {
    element *new = NULL;
    element *next = NULL;
    while (list != NULL) {
        next = list->next;
        new = cons(list, new);
        list = next;
    }
    return new;
}

/* append_list - add element to end of list */
void append_list(element *new, element *list) {
    assert(new != NULL);
    element *step = list;
    
    while (step->next != NULL) {
        step = step->next;
    }
    
    new->next = NULL;
    step->next = new;
}

/* concat_string_list - concatenates string contents of list of STR elements.
 * Frees STR elements as they are added to the concatenation. */
GString *concat_string_list(element *list) {
    GString *result;
    element *next;
    result = g_string_new("");
    while (list != NULL) {
        assert(list->key == STR);
        assert(list->contents.str != NULL);
        g_string_append(result, list->contents.str);
        next = list->next;
        free_element(list);
        list = next;
    }
    return result;
}

/**********************************************************************

  Global variables used in parsing

 ***********************************************************************/


char *charbuf = "";     /* Buffer of characters to be parsed. */
element *references = NULL;    /* List of link references found. */
element *notes = NULL;         /* List of footnotes found. */
element *parse_result;  /* Results of parse. */
int syntax_extensions;  /* Syntax extensions selected. */

element *labels = NULL;      /* List of labels found in document. */
clock_t start_time = 0;                 /* Used for ensuring we're not stuck in a loop */
bool parse_aborted = 0;      /* flag indicating we ran out of time */

/**********************************************************************

  Auxiliary functions for parsing actions.
  These make it easier to build up data structures (including lists)
  in the parsing actions.

 ***********************************************************************/

/* mk_element - generic constructor for element */
element * mk_element(int key) {
    element *result = malloc(sizeof(element));
    result->key = key;
    result->children = NULL;
    result->next = NULL;
    result->contents.str = NULL;
    return result;
}

/* mk_str - constructor for STR element */
element * mk_str(char *string) {
    element *result;
    assert(string != NULL);
    result = mk_element(STR);
    result->contents.str = strdup(string);
    return result;
}

/* mk_str_from_list - makes STR element by concatenating a
 * reversed list of strings, adding optional extra newline */
element * mk_str_from_list(element *list, bool extra_newline) {
    element *result;
    GString *c = concat_string_list(reverse(list));
    if (extra_newline)
        g_string_append(c, "\n");
    result = mk_element(STR);
    result->contents.str = c->str;
    g_string_free(c, false);
    return result;
}

/* mk_list - makes new list with key 'key' and children the reverse of 'lst'.
 * This is designed to be used with cons to build lists in a parser action.
 * The reversing is necessary because cons adds to the head of a list. */
element * mk_list(int key, element *lst) {
    element *result;
    result = mk_element(key);
    result->children = reverse(lst);
    return result;
}

/* mk_link - constructor for LINK element */
element * mk_link(element *label, char *url, char *title, element *attr, char *id) {
    element *result;
    result = mk_element(LINK);
    result->contents.link = malloc(sizeof(link));
    result->contents.link->label = label;
    result->contents.link->url = strdup(url);
    result->contents.link->title = strdup(title);
    result->contents.link->attr = attr;
    result->contents.link->identifier = strdup(id);
    return result;
}

/* extension = returns true if extension is selected */
bool extension(int ext) {
    return (syntax_extensions & ext);
}

/* match_inlines - returns true if inline lists match (case-insensitive...) */
bool match_inlines(element *l1, element *l2) {
    while (l1 != NULL && l2 != NULL) {
        if (l1->key != l2->key)
            return false;
        switch (l1->key) {
        case SPACE:
        case LINEBREAK:
        case ELLIPSIS:
        case EMDASH:
        case ENDASH:
        case APOSTROPHE:
            break;
        case CODE:
        case STR:
        case HTML:
            if (strcasecmp(l1->contents.str, l2->contents.str) == 0)
                break;
            else
                return false;
        case EMPH:
        case STRONG:
        case LIST:
        case SINGLEQUOTED:
        case DOUBLEQUOTED:
            if (match_inlines(l1->children, l2->children))
                break;
            else
                return false;
        case LINK:
        case IMAGE:
            return false;  /* No links or images within links */
        default:
            fprintf(stderr, "match_inlines encountered unknown key = %d\n", l1->key);
            exit(EXIT_FAILURE);
            break;
        }
        l1 = l1->next;
        l2 = l2->next;
    }
    return (l1 == NULL && l2 == NULL);  /* return true if both lists exhausted */
}

/* find_reference - return true if link found in references matching label.
 * 'link' is modified with the matching url and title. */
bool find_reference(link *result, element *label) {
    element *cur = references;  /* pointer to walk up list of references */
    link *curitem;
    while (cur != NULL) {
        curitem = cur->contents.link;
        if (match_inlines(label, curitem->label)) {
            *result = *curitem;
            return true;
        }
        else
            cur = cur->next;
    }
    return false;
}

/* find_note - return true if note found in notes matching label.
if found, 'result' is set to point to matched note. */

bool find_note(element **result, char *label) {
   element *cur = notes;  /* pointer to walk up list of notes */
   while (cur != NULL) {
       if (strcmp(label, cur->contents.str) == 0) {
           *result = cur;
           return true;
       }
       else
           cur = cur->next;
   }
   return false;
}


/* peg-multimarkdown additions */

/* print_raw_element - print an element as original text */
void print_raw_element(GString *out, element *elt) {
    if (elt->key == LINK) {
        print_raw_element_list(out,elt->contents.link->label);
    } else {
        if (elt->contents.str != NULL) {
            g_string_append_printf(out, "%s", elt->contents.str);
        } else {
            print_raw_element_list(out, elt->children);
        }
    }
}

/* print_raw_element_list - print a list of elements as original text */
void print_raw_element_list(GString *out, element *list) {
    while (list != NULL) {
        print_raw_element(out, list);
        list = list->next;
    }
}

/* label_from_element_list */
/* Returns a null-terminated string, which must be freed after use. */

char *label_from_element_list(element *list, bool obfuscate) {
    char *label;
    char *label2;
    GString *raw = g_string_new("");
    print_raw_element_list(raw, list);
    label =  label_from_string(raw->str,obfuscate);
    label2 = strdup(label);
    free(label);
    g_string_free(raw,true);
    return label2;
}

/* label_from_string - strip spaces and illegal characters to generate valid 
    HTML id */
/* Returns a null-terminated string, which must be freed after use. */

char *label_from_string(char *str, bool obfuscate) {
    bool valid = FALSE;
    GString *out = g_string_new("");
    char *label;

    while (*str != '\0') {
        if (valid) {
        /* can relax on following characters */
            if ((*str >= '0' && *str <= '9') || (*str >= 'A' && *str <= 'Z')
                || (*str >= 'a' && *str <= 'z') || (*str == '.') || (*str== '_')
                || (*str== '-') || (*str== ':'))
            {
                g_string_append_c(out, tolower(*str));
            }           
        } else {
        /* need alpha as first character */
            if ((*str >= 'A' && *str <= 'Z') || (*str >= 'a' && *str <= 'z'))
            {
                g_string_append_c(out, tolower(*str));
                valid = TRUE;
            }
        }
    str++;
    }
    label = out->str;
    g_string_free(out, false);
    return label;
}

/* find_label - return true if header, table, etc is found matching label.
 * 'link' is modified with the matching url and title. */
bool find_label(link *result, element *label) {
    char *lab;
    element *cur = labels;  /* pointer to walk up list of references */
    GString *text = g_string_new("");
    print_raw_element_list(text, label);
    lab = label_from_string(text->str,0);
    GString *query = g_string_new(lab);
    free(lab);
    g_string_free(text, true);

    while (cur != NULL) {
        if (strcmp(query->str,cur->contents.str) == 0) {
            g_string_free(query, true);
            return true;
        }
        else
           cur = cur->next;
    }
    g_string_free(query, true);
    return false;
}


/* localize_typography - return the proper string, based on language chosen */
/* Default action is English */

void localize_typography(GString *out, int character, int lang, int output) {

    switch (output) {
        case HTMLOUT:
            switch (character) {
                case LSQUOTE:
                    switch (lang) {
                        case SWEDISH:
                            g_string_append_printf(out, "&#8217;");
                            break;
                        case FRENCH:
                            g_string_append_printf(out,"&#39;");
                            break;
                        case GERMAN:
                            g_string_append_printf(out,"&#8218;");
                            break;
                        case GERMANGUILL:
                            g_string_append_printf(out,"&#8250;");
                            break;
                        default:
                            g_string_append_printf(out,"&#8216;");
                        }
                    break;
                case RSQUOTE:
                    switch (lang) {
                        case GERMAN:
                            g_string_append_printf(out,"&#8216;");
                            break;
                        case GERMANGUILL:
                            g_string_append_printf(out,"&#8249;");
                            break;
                        default:
                            g_string_append_printf(out,"&#8217;");
                        }
                    break;
                case APOS:
                    g_string_append_printf(out,"&#8217;");
                    break;
                case LDQUOTE:
                    switch (lang) {
                        case DUTCH:
                        case GERMAN:
                            g_string_append_printf(out,"&#8222;");
                            break;
                        case GERMANGUILL:
                            g_string_append_printf(out,"&#187;");
                            break;
                        case FRENCH:
                            g_string_append_printf(out,"&#171;");
                            break;
                        case SWEDISH:
                            g_string_append_printf(out, "&#8221;");
                            break;
                        default:
                            g_string_append_printf(out,"&#8220;");
                        }
                    break;
                case RDQUOTE:
                    switch (lang) {
                        case SWEDISH:
                        case DUTCH:
                            g_string_append_printf(out,"&#8221;");
                            break;
                        case GERMAN:
                            g_string_append_printf(out,"&#8220;");
                            break;
                        case GERMANGUILL:
                            g_string_append_printf(out,"&#171;");
                            break;
                        case FRENCH:
                            g_string_append_printf(out,"&#187;");
                            break;
                        default:
                            g_string_append_printf(out,"&#8221;");
                        }
                    break;
                case NDASH:
                    g_string_append_printf(out,"&#8211;");
                    break;
                case MDASH:
                    g_string_append_printf(out,"&#8212;");
                    break;
                case ELLIP:
                    g_string_append_printf(out,"&#8230;");
                    break;
                    default:;
            }
            break;
        case LATEXOUT:
            switch (character) {
                case LSQUOTE:
                    switch (lang) {
                        case SWEDISH:
                            g_string_append_printf(out,"'");
                            break;
                        case FRENCH:
                            g_string_append_printf(out,"'");
                            break;
                        case GERMAN:
                            g_string_append_printf(out,"‚");
                            break;
                        case GERMANGUILL:
                            g_string_append_printf(out,"›");
                            break;
                        default:
                            g_string_append_printf(out,"`");
                    }
                    break;
                case RSQUOTE:
                    switch (lang) {
                        case GERMAN:
                            g_string_append_printf(out,"`");
                            break;
                        case GERMANGUILL:
                            g_string_append_printf(out,"‹");
                            break;
                        default:
                            g_string_append_printf(out,"'");
                    }
                    break;
                case APOS:
                    g_string_append_printf(out,"'");
                    break;
                case LDQUOTE:
                    switch (lang) {
                        case DUTCH:
                        case GERMAN:
                            g_string_append_printf(out,"„");
                            break;
                        case GERMANGUILL:
                            g_string_append_printf(out,"»");
                            break;
                        case FRENCH:
                            g_string_append_printf(out,"«");
                            break;
                        case SWEDISH:
                            g_string_append_printf(out,"''");
                            break;
                        default:
                            g_string_append_printf(out,"``");
                        }
                    break;
                case RDQUOTE:
                    switch (lang) {
                        case SWEDISH:
                        case DUTCH:
                            g_string_append_printf(out,"''");
                            break;
                        case GERMAN:
                            g_string_append_printf(out,"``");
                            break;
                        case GERMANGUILL:
                            g_string_append_printf(out,"«");
                            break;
                        case FRENCH:
                            g_string_append_printf(out,"»");
                            break;
                        default:
                            g_string_append_printf(out,"''");
                        }
                    break;
                case NDASH:
                    g_string_append_printf(out,"--");
                    break;
                case MDASH:
                    g_string_append_printf(out,"---");
                    break;
                case ELLIP:
                    g_string_append_printf(out,"{\\ldots}");
                    break;
                    default:;
            }
            break;
        default:;
    }
}

/* Trim spaces at end of string */
void trim_trailing_whitespace(char *str) {    
    while ( ( str[strlen(str)-1] == ' ' ) ||
        ( str[strlen(str)-1] == '\n' ) || 
        ( str[strlen(str)-1] == '\r' ) || 
        ( str[strlen(str)-1] == '\t' ) ) {
        str[strlen(str)-1] = '\0';
    }
}

/* Don't let us get caught in "infinite" loop */
bool check_timeout() {
    /* Once we abort, keep aborting */
    if (parse_aborted)
        return 0;
    
    /* We're not timing this run */
    if (start_time == 0)
        return 1;

    clock_t end = clock();
    double elapsed = ((double) (end - start_time)) / CLOCKS_PER_SEC;
    
	/* fprintf(stderr,"%2.2f elapsed; (%4.2f CLOCKS_PER_SEC)\n",elapsed,CLOCKS_PER_SEC); */
	/* fprintf(stderr,"%2.2f elapsed\n",elapsed); */
	
	
    /* If > 3 clock seconds, then abort */
    float max = 3;
    if (elapsed > max) {
        parse_aborted = 1;
        return 0;
    }
    return 1;
}