-
Notifications
You must be signed in to change notification settings - Fork 1
/
tokenizer.c
157 lines (140 loc) · 3.89 KB
/
tokenizer.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
/**
* tokenizer.c - A simple token recognizer.
*
* NOTE: The terms 'token' and 'lexeme' are used interchangeably in this
* program.
*
* @author Gajjan Jasani
* @version 03/08/2016
* @modified 04/10/2016
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include "tokenizer.h"
int expression_pointer = 0;
extern char expression[100];
/**
* Function: get_token
* Purpose : To find one token or one error at a time from the input line
* and record that token or error on the output file
* Return : next token in the expression/entered line
*/
char* get_token(){
char c; // holding char from input line
char* token = calloc(sizeof(char), 20);
c = expression[expression_pointer];
if(isdigit(c)){ // if the first char is a digit
token = handle_digit_token(c);
}else if(check_lexeme(c)){ // If the char is part of our lexemes array
token = handle_special_char_token(c);
}else { // if the char is not a digit or a defined lexeme
handle_error(c);
}
return token;
}
//==================== HELPER FUNCTIONS =========================
/**
* Function: check_lexeme
* Purpose : Check if the char read from input line is part of our
* lexeme definition (lexems array)
* c: The char that needs to be checked
* Return: 1 if c is part of our lexeme definition, 0 if not
*/
int check_lexeme(char c){
int i = 0;
while(i < strlen(lexemes)){
if(lexemes[i] == c){
return 1; // early termination if c is part of our definition
}
i++;
}
return 0;
}
/**
* Function: handle_digit_token
* Purpose : If the char read from input line is a digit, check for more
* subsequent digits to find the whole integer
* c : the first char that is a digit
* Return : number token
*/
char* handle_digit_token(char c){
char* token = calloc(sizeof(char), 20);
int j; // counters
j = 0;
token[j]= c; // put the first digit on token array
j++;
expression_pointer++;
while(expression_pointer < strlen(expression)){ // keep looking for
// next chars until we find
// a char that is not digit
c = expression[expression_pointer]; // next char on the input line
if(!isdigit(c)){
return token; // if next char is not a digit
// end the token
} else { // if next char is a digit, keep looking
token[j] = c;
j++;
expression_pointer++;
}
}
return token;
}
/**
* Function: handle_special_char_token
* Purpose : If the char read from input line is a lexeme defined on our
* lexemes array, write that on the output file
* c : the first char that is a legit lexeme
* Return : special char (operation) token
*/
char* handle_special_char_token(char c){
char* token = calloc(sizeof(char), 20);
token[FIRST_ELE] = c;
expression_pointer++;
// Special case: if the first char is <, >, !, or = then
// check if the next char is = or not
if((c == '<' || c == '>' || c == '!' || c == '=') &&
(expression_pointer < strlen(expression))){
if(expression[expression_pointer] == '='){
token[FIRST_ELE+1] = '=';
expression_pointer++;
return token;
} else if(c == '!'){
printf("===> '%s'\n", token);
printf("Lexical Error: not a lexeme\n");
exit(0);
}
}
return token;
}
/**
* Function: handle_error
* Purpose : If the char read from input line is not a lexeme defined on our
* lexemes array, or not a digit, denote it as an error on the output file
* c : the first char that is a legit lexeme
*/
void handle_error(char c){
if(expression_pointer == 0 && c == 'q'){
printf("Goodbye!");
exit(0);
}
char* token = calloc(sizeof(char), 20);
int j; // counters
j = 0;
token[j]= c; // put the first digit on token array
j++;
expression_pointer++;
while(expression_pointer < strlen(expression)){
c = expression[expression_pointer];
if(isdigit(c) || check_lexeme(c)){
printf("===> '%s'\n", token);
printf("Lexical Error: not a lexeme\n");
exit(0);
} else {
token[j] = c;
j++;
expression_pointer++;
}
}
}