This repository has been archived by the owner on Mar 23, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.c
112 lines (95 loc) · 2.95 KB
/
main.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#include "utf8.h"
#include <stdio.h>
#include <string.h>
#if defined(_MSC_VER)
FILE* fopen_wrapper(const char* fname, const char* mode)
{
FILE* file = NULL;
fopen_s(&file, fname, mode);
return file;
}
#define fopen fopen_wrapper
#endif
#define assert(test) if (test) puts("[passed] " #test); else puts("[FAILED] " #test)
bool_t validate_file(const char* fname)
{
FILE* file = NULL;
size_t fileSize = 0;
char* fileContents = NULL;
bool_t result = false;
file = fopen(fname, "rb");
if (!file)
{
printf("Failed to open '%s'\n", fname);
return false;
}
fseek(file, 0, SEEK_END);
fileSize = ftell(file);
fseek(file, 0, SEEK_SET);
fileContents = (char*)malloc(fileSize + 1);
if (!fileContents)
{
printf("Failed to allocate %i bytes\n", fileSize);
fclose(file);
return false;
}
fileContents[fileSize] = 0;
fread(fileContents, 1, fileSize, file);
fclose(file);
file = NULL;
result = utf8_valid(fileContents);
free(fileContents);
return result;
}
int main(int argc, char** argv)
{
const char* two_chars = "\xE6\x97\xA5\xD1\x88";
const char* message = "Hello, world!";
utf8_codepoint_t* cp_str = NULL;
size_t cp_strlen = 0;
int32_t index = 0;
utf8_codepoint_t u0024 = utf8_encode("\x24"); // U+0024 == $
utf8_codepoint_t u00A2 = utf8_encode("\xC2\xA2"); // U+00A2 == ¢
utf8_codepoint_t u20AC = utf8_encode("\xE2\x82\xAC"); // U+20AC == €
utf8_codepoint_t u10348 = utf8_encode("\xF0\x90\x8D\x88"); // U+10348 == 𐍈
cp_str = utf8_encode_string("\x24\xE2\x82\xAC", &cp_strlen);
assert(utf8_valid(two_chars));
assert(utf8_valid(message));
assert(utf8_strlen(two_chars) == 2);
assert(utf8_strlen(message) == strlen(message));
assert(!utf8_valid_codepoint(UTF8_INVALID_CODEPOINT));
assert(utf8_valid_codepoint(u0024));
assert(utf8_valid_codepoint(u00A2));
assert(utf8_valid_codepoint(u20AC));
assert(utf8_valid_codepoint(u10348));
assert(u0024 == 0x24);
assert(u00A2 == 0xA2);
assert(u20AC == 0x20AC);
assert(u10348 == 0x10348);
assert(cp_strlen == 2);
assert(cp_str[0] == u0024);
assert(cp_str[1] == u20AC);
assert(utf8_encode(UTF8_BOM) == 0xFEFF);
assert(utf8_encode(UTF8_REPLACEMENT) == 0xFFFD);
assert(!utf8_valid("\xFF\xEE"));
assert(!utf8_valid("\xBE\xEF"));
assert(!utf8_valid("\xFE"));
assert(utf8_valid_codepoint(utf8_encode("?")));
assert(utf8_valid_codepoint(utf8_encode("\xE2\x82\xAC")));
UTF8_FREE(cp_str);
if (argc > 1)
{
for (index = 1; index < argc; ++index)
{
if (validate_file(argv[index]))
{
printf("[passed] '%s' contains valid UTF-8 text\n", argv[index]);
}
else
{
printf("[FAILED] '%s' does NOT contain valid UTF-8 text\n", argv[index]);
}
}
}
return 0;
}