-
Notifications
You must be signed in to change notification settings - Fork 3
/
regex_lib.h
107 lines (94 loc) · 2.41 KB
/
regex_lib.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#ifndef clox_regex_h
#define clox_regex_h
#include <stdbool.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef enum RNodeType {
NODE_ATOM = 1, // a,b,c,etc.
NODE_GROUP, // ()
NODE_OR, // |
NODE_REPEAT, // +
NODE_REPEAT_NONGREEDY, // +?
NODE_REPEAT_Z, // *
NODE_REPEAT_Z_NONGREEDY, // *?
NODE_MAYBE, // ?
NODE_REPEAT_N, // {n[,m]}
NODE_CCLASS, // [aeiou]
NODE_ECLASS, // \d,\w,\s
NODE_ANCHOR, // ^,$
NODE_DOT, // .
NODE_PROGRAM // top-most node
} RNodeType;
// escape class types
typedef enum REClassType {
ECLASS_NONE = 0, // not an escape class
ECLASS_DIGIT, // \d
ECLASS_NON_DIGIT, // \D
ECLASS_SPACE, // \s
ECLASS_NON_SPACE, // \S
ECLASS_WORD, // \w
ECLASS_NON_WORD, // \W
ECLASS_WORD_BOUNDARY, // \b
ECLASS_NON_WORD_BOUNDARY // \B
} REClassType;
typedef enum RAnchorType {
ANCHOR_NONE = 0, // not an anchor
ANCHOR_BOS, // \A
ANCHOR_EOS, // \Z
ANCHOR_BOL, // ^
ANCHOR_EOL // $
} RAnchorType;
typedef struct RNode {
const char *tok; // not owned
int toklen;
int nodelen; // includes child tokens, if any
long repeat_min; // ex: {3,4}, would be 3
long repeat_max;
REClassType eclass_type;
RNodeType type;
RAnchorType anchor_type;
char *capture_beg;
char *capture_end;
struct RNode *next;
struct RNode *prev;
struct RNode *parent;
struct RNode *children;
} RNode;
typedef struct RegexOptions {
bool case_insensitive;
bool multiline; // don't end on \n
} RegexOptions;
typedef struct GroupNode {
RNode *group;
struct GroupNode *next;
} GroupNode;
typedef struct Regex {
RNode *node;
const char *src;
bool ownsSrc; // if `ownsSrc`, can free it in regex_free
GroupNode *groups;
RegexOptions opts;
} Regex;
typedef enum RegexCompileResult {
REGEX_UNITIALIZED_ERR,
REGEX_PARSE_ERR,
REGEX_COMPILE_ERR,
REGEX_COMPILE_SUCCESS
} RegexCompileResult;
typedef struct MatchData {
bool matched;
int match_start;
int match_len;
} MatchData;
void regex_init(Regex *regex, const char *src, RegexOptions *opts);
void regex_init_from(Regex *regex, const char *src, RegexOptions *opts);
void regex_free(Regex *regex);
RegexCompileResult regex_compile(Regex *regex);
MatchData regex_match(Regex *regex, const char *string);
void regex_output_ast(Regex *regex);
const char *rnodeTypeName(RNodeType nodeType);
#ifdef __cplusplus
}
#endif
#endif