-
Notifications
You must be signed in to change notification settings - Fork 1
/
libutf8proc.scm
196 lines (173 loc) · 11.5 KB
/
libutf8proc.scm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
;;; utf8proc.scm
;;;
;;; tie the utf8proc library into the *libutf8proc* environment
(require cload.scm)
(provide 'libutf8proc.scm)
;; if loading from a different directory, pass that info to C
(let ((directory (let ((current-file (port-filename)))
(and (memv (current-file 0) '(#\/ #\~))
(substring current-file 0 (- (length current-file) 9))))))
(when (and directory (not (member directory *load-path*)))
(set! *load-path* (cons directory *load-path*)))
(with-let (rootlet)
(require cload.scm))
(when (and directory (not (string-position directory *cload-cflags*)))
(set! *cload-cflags* (string-append "-I" directory " " *cload-cflags*))))
(if (not (defined? '*libutf8proc*))
(define *libutf8proc*
(with-let (unlet)
(set! *libraries* (cons (cons "libutf8proc.scm" (curlet)) *libraries*))
(c-define
'((C-macro (int (UTF8PROC_VERSION_MAJOR UTF8PROC_VERSION_MINOR UTF8PROC_VERSION_PATCH)))
(int (UTF8PROC_NULLTERM UTF8PROC_STABLE UTF8PROC_COMPAT UTF8PROC_COMPOSE UTF8PROC_DECOMPOSE
UTF8PROC_IGNORE UTF8PROC_REJECTNA UTF8PROC_NLF2LS UTF8PROC_NLF2PS UTF8PROC_NLF2LF
UTF8PROC_STRIPCC UTF8PROC_CASEFOLD UTF8PROC_CHARBOUND UTF8PROC_LUMP UTF8PROC_STRIPMARK))
(C-macro (int (UTF8PROC_ERROR_NOMEM UTF8PROC_ERROR_OVERFLOW UTF8PROC_ERROR_INVALIDUTF8 UTF8PROC_ERROR_NOTASSIGNED UTF8PROC_ERROR_INVALIDOPTS)))
(int (UTF8PROC_CATEGORY_CN UTF8PROC_CATEGORY_LU UTF8PROC_CATEGORY_LL UTF8PROC_CATEGORY_LT UTF8PROC_CATEGORY_LM
UTF8PROC_CATEGORY_LO UTF8PROC_CATEGORY_MN UTF8PROC_CATEGORY_MC UTF8PROC_CATEGORY_ME UTF8PROC_CATEGORY_ND
UTF8PROC_CATEGORY_NL UTF8PROC_CATEGORY_NO UTF8PROC_CATEGORY_PC UTF8PROC_CATEGORY_PD UTF8PROC_CATEGORY_PS
UTF8PROC_CATEGORY_PE UTF8PROC_CATEGORY_PI UTF8PROC_CATEGORY_PF UTF8PROC_CATEGORY_PO UTF8PROC_CATEGORY_SM
UTF8PROC_CATEGORY_SC UTF8PROC_CATEGORY_SK UTF8PROC_CATEGORY_SO UTF8PROC_CATEGORY_ZS UTF8PROC_CATEGORY_ZL
UTF8PROC_CATEGORY_ZP UTF8PROC_CATEGORY_CC UTF8PROC_CATEGORY_CF UTF8PROC_CATEGORY_CS UTF8PROC_CATEGORY_CO))
(int (UTF8PROC_BIDI_CLASS_L UTF8PROC_BIDI_CLASS_LRE UTF8PROC_BIDI_CLASS_LRO UTF8PROC_BIDI_CLASS_R UTF8PROC_BIDI_CLASS_AL
UTF8PROC_BIDI_CLASS_RLE UTF8PROC_BIDI_CLASS_RLO UTF8PROC_BIDI_CLASS_PDF UTF8PROC_BIDI_CLASS_EN UTF8PROC_BIDI_CLASS_ES
UTF8PROC_BIDI_CLASS_ET UTF8PROC_BIDI_CLASS_AN UTF8PROC_BIDI_CLASS_CS UTF8PROC_BIDI_CLASS_NSM UTF8PROC_BIDI_CLASS_BN
UTF8PROC_BIDI_CLASS_B UTF8PROC_BIDI_CLASS_S UTF8PROC_BIDI_CLASS_WS UTF8PROC_BIDI_CLASS_ON UTF8PROC_BIDI_CLASS_LRI
UTF8PROC_BIDI_CLASS_RLI UTF8PROC_BIDI_CLASS_FSI UTF8PROC_BIDI_CLASS_PDI))
(int (UTF8PROC_DECOMP_TYPE_FONT UTF8PROC_DECOMP_TYPE_NOBREAK UTF8PROC_DECOMP_TYPE_INITIAL UTF8PROC_DECOMP_TYPE_MEDIAL
UTF8PROC_DECOMP_TYPE_FINAL UTF8PROC_DECOMP_TYPE_ISOLATED UTF8PROC_DECOMP_TYPE_CIRCLE UTF8PROC_DECOMP_TYPE_SUPER
UTF8PROC_DECOMP_TYPE_SUB UTF8PROC_DECOMP_TYPE_VERTICAL UTF8PROC_DECOMP_TYPE_WIDE UTF8PROC_DECOMP_TYPE_NARROW
UTF8PROC_DECOMP_TYPE_SMALL UTF8PROC_DECOMP_TYPE_SQUARE UTF8PROC_DECOMP_TYPE_FRACTION UTF8PROC_DECOMP_TYPE_COMPAT))
(int (UTF8PROC_BOUNDCLASS_START UTF8PROC_BOUNDCLASS_OTHER UTF8PROC_BOUNDCLASS_CR UTF8PROC_BOUNDCLASS_LF
UTF8PROC_BOUNDCLASS_CONTROL UTF8PROC_BOUNDCLASS_EXTEND UTF8PROC_BOUNDCLASS_L UTF8PROC_BOUNDCLASS_V
UTF8PROC_BOUNDCLASS_T UTF8PROC_BOUNDCLASS_LV UTF8PROC_BOUNDCLASS_LVT UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR
UTF8PROC_BOUNDCLASS_SPACINGMARK))
;; in version 2:
;; UTF8PROC_BOUNDCLASS_PREPEND UTF8PROC_BOUNDCLASS_ZWJ UTF8PROC_BOUNDCLASS_E_BASE
;; UTF8PROC_BOUNDCLASS_E_MODIFIER UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ UTF8PROC_BOUNDCLASS_E_BASE_GAZ
;; utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c)
;; utf8proc_bool utf8proc_grapheme_break_stateful(utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2, utf8proc_int32_t *state)
(char* utf8proc_version (void))
(char* utf8proc_errmsg (int))
(int utf8proc_tolower ((utf8proc_int32_t int)))
(int utf8proc_toupper ((utf8proc_int32_t int)))
(int utf8proc_charwidth ((utf8proc_int32_t int)))
(int utf8proc_category ((utf8proc_int32_t int)))
(char* utf8proc_category_string ((utf8proc_int32_t int)))
(bool utf8proc_codepoint_valid ((utf8proc_int32_t int)))
(bool utf8proc_grapheme_break ((utf8proc_int32_t int) (utf8proc_int32_t int)))
(char* utf8proc_NFD (char*)) ; these return newly allocated memory -- should probably free it here
(char* utf8proc_NFC (char*))
(char* utf8proc_NFKD (char*))
(char* utf8proc_NFKC (char*))
(in-C "static s7_pointer g_utf8proc_iterate(s7_scheme *sc, s7_pointer args)
{
utf8proc_int32_t code_ref = 0;
int len, res;
char *str;
str = (char *)s7_string(s7_car(args));
len = s7_string_length(s7_car(args));
res = utf8proc_iterate(str, len, &code_ref);
return(s7_list(sc, 2, s7_make_integer(sc, code_ref), s7_make_integer(sc, res)));
}")
(C-function ("utf8proc_iterate" g_utf8proc_iterate "" 1))
(in-C "static s7_pointer g_utf8proc_encode_char(s7_scheme *sc, s7_pointer args)
{
ssize_t res;
utf8proc_uint8_t buf[8];
res = utf8proc_encode_char((utf8proc_int32_t)s7_integer(s7_car(args)), buf);
return(s7_list(sc, 2, s7_make_string_with_length(sc, buf, res), s7_make_integer(sc, res)));
}")
(C-function ("utf8proc_encode_char" g_utf8proc_encode_char "" 1))
(in-C "static s7_pointer g_utf8proc_reencode(s7_scheme *sc, s7_pointer args)
{
s7_pointer buffer, codepoints, options;
ssize_t res;
buffer = s7_car(args);
codepoints = s7_cadr(args);
options = s7_caddr(args);
res = utf8proc_reencode((utf8proc_int32_t *)s7_string(buffer),
(utf8proc_ssize_t)s7_integer(codepoints),
(utf8proc_option_t)s7_integer(options));
return(s7_make_integer(sc, res));
}")
(C-function ("utf8proc_reencode" g_utf8proc_reencode "" 1))
(in-C "static s7_pointer g_utf8proc_get_property(s7_scheme *sc, s7_pointer args)
{
const utf8proc_property_t *info;
info = utf8proc_get_property((utf8proc_int32_t)s7_integer(s7_car(args)));
return(s7_inlet(sc, s7_list(sc, 30,
s7_make_symbol(sc, \"category\"), s7_make_integer(sc, info->category),
s7_make_symbol(sc, \"combining_class\"), s7_make_integer(sc, info->combining_class),
s7_make_symbol(sc, \"bidi_class\"), s7_make_integer(sc, info->bidi_class),
s7_make_symbol(sc, \"decomp_type\"), s7_make_integer(sc, info->decomp_type),
#if (UTF8PROC_VERSION_MAJOR >= 2)
s7_make_symbol(sc, \"uppercase_seqindex\"), s7_make_integer(sc, info->uppercase_seqindex),
s7_make_symbol(sc, \"lowercase_seqindex\"), s7_make_integer(sc, info->lowercase_seqindex),
s7_make_symbol(sc, \"titlecase_seqindex\"), s7_make_integer(sc, info->titlecase_seqindex),
s7_make_symbol(sc, \"casefold_seqindex\"), s7_make_integer(sc, info->casefold_seqindex),
s7_make_symbol(sc, \"comb_index\"), s7_make_integer(sc, info->comb_index),
#else
s7_make_symbol(sc, \"uppercase_mapping\"), s7_make_integer(sc, info->uppercase_mapping),
s7_make_symbol(sc, \"lowercase_mapping\"), s7_make_integer(sc, info->lowercase_mapping),
s7_make_symbol(sc, \"titlecase_mapping\"), s7_make_integer(sc, info->titlecase_mapping),
s7_make_symbol(sc, \"comb1st_index\"), s7_make_integer(sc, info->comb1st_index),
s7_make_symbol(sc, \"comb2nd_index\"), s7_make_integer(sc, info->comb2nd_index),
#endif
s7_make_symbol(sc, \"bidi_mirrored\"), s7_make_integer(sc, info->bidi_mirrored),
s7_make_symbol(sc, \"comp_exclusion\"), s7_make_integer(sc, info->comp_exclusion),
s7_make_symbol(sc, \"ignorable\"), s7_make_integer(sc, info->ignorable),
s7_make_symbol(sc, \"control_boundary\"), s7_make_integer(sc, info->control_boundary),
s7_make_symbol(sc, \"boundclass\"), s7_make_integer(sc, info->boundclass),
s7_make_symbol(sc, \"charwidth\"), s7_make_integer(sc, info->charwidth))));
}")
(C-function ("utf8proc_get_property" g_utf8proc_get_property "" 1))
(in-C "static s7_pointer g_utf8proc_decompose_char(s7_scheme *sc, s7_pointer args)
{
s7_pointer code, opt, str;
int last_boundclass;
utf8proc_ssize_t size;
utf8proc_int32_t *dst;
ssize_t res;
code = s7_car(args);
str = s7_cadr(args);
opt = s7_caddr(args);
dst = (utf8proc_int32_t *)s7_string(str);
size = (utf8proc_ssize_t)s7_string_length(str);
res = utf8proc_decompose_char((utf8proc_int32_t)s7_integer(code), dst, size, (utf8proc_option_t)s7_integer(opt), &last_boundclass);
return(s7_make_integer(sc, res));
}")
(C-function ("utf8proc_decompose_char" g_utf8proc_decompose_char "" 3))
(in-C "static s7_pointer g_utf8proc_map(s7_scheme *sc, s7_pointer args)
{
s7_pointer opt, str, p;
ssize_t res;
utf8proc_uint8_t *dst;
str = s7_car(args);
opt = s7_cadr(args);
res = utf8proc_map((utf8proc_uint8_t *)s7_string(str), s7_string_length(str), &dst, (utf8proc_option_t)s7_integer(opt));
if (res < 0) return(s7_make_integer(sc, res));
p = s7_make_string_with_length(sc, dst, res);
free(dst);
return(p);
}")
(C-function ("utf8proc_map" g_utf8proc_map "" 2))
(in-C "static s7_pointer g_utf8proc_decompose(s7_scheme *sc, s7_pointer args)
{
s7_pointer opt, str;
int len;
ssize_t res;
utf8proc_int32_t *dst;
str = s7_car(args);
opt = s7_cadr(args);
len = s7_string_length(str);
dst = (utf8proc_int32_t *)malloc(len * 4);
res = utf8proc_decompose((const utf8proc_uint8_t *)s7_string(str), len, dst, len, (utf8proc_option_t)s7_integer(opt));
if (res < 0) return(s7_make_integer(sc, res));
return(s7_make_string_with_length(sc, (char *)dst, res));
}")
(C-function ("utf8proc_decompose" g_utf8proc_decompose "" 2))
)
"" "utf8proc.h" "" "-lutf8proc" "utf8proc_s7")
(curlet))))
*libutf8proc*