-
Notifications
You must be signed in to change notification settings - Fork 29
/
stringx.lua
302 lines (268 loc) · 6.35 KB
/
stringx.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
--[[
extra string routines
]]
local path = (...):gsub("stringx", "")
local assert = require(path .. "assert")
local pretty = require(path .. "pretty")
local stringx = setmetatable({}, {
__index = string
})
--split a string on a delimiter into an ordered table
function stringx.split(self, delim, limit)
delim = delim or ""
limit = (limit ~= nil and limit) or math.huge
assert:type(self, "string", "stringx.split - self", 1)
assert:type(delim, "string", "stringx.split - delim", 1)
assert:type(limit, "number", "stringx.split - limit", 1)
if limit then
assert(limit >= 0, "max_split must be positive!")
end
--we try to create as little garbage as possible!
--only one table to contain the result, plus the split strings.
--so we do two passes, and work with the bytes underlying the string
--partly because string.find is not compiled on older luajit :)
local res = {}
local length = self:len()
--
local delim_length = delim:len()
--empty delim? split to individual characters
if delim_length == 0 then
for i = 1, length do
table.insert(res, self:sub(i, i))
end
return res
end
local delim_start = delim:byte(1)
--pass 1
--collect split sites
local i = 1
while i <= length do
--scan for delimiter
if self:byte(i) == delim_start then
local has_whole_delim = true
for j = 2, delim_length do
if self:byte(i + j - 1) ~= delim:byte(j) then
has_whole_delim = false
--step forward as far as we got
i = i + j
break
end
end
if has_whole_delim then
if #res < limit then
table.insert(res, i)
--iterate forward the whole delimiter
i = i + delim_length
else
break
end
end
else
--iterate forward
i = i + 1
end
end
--pass 2
--collect substrings
i = 1
for si, j in ipairs(res) do
res[si] = self:sub(i, j-1)
i = j + delim_length
end
--add the final section
table.insert(res, self:sub(i, -1))
--return the collection
return res
end
stringx.pretty = pretty.string
--(generate a map of whitespace byte values)
local _whitespace_bytes = {}
do
local _whitespace = " \t\n\r"
for i = 1, _whitespace:len() do
_whitespace_bytes[_whitespace:byte(i)] = true
end
end
--trim all whitespace off the head and tail of a string
-- specifically trims space, tab, newline, and carriage return characters
-- ignores form feeds, vertical tabs, and backspaces
--
-- only generates one string of garbage in the case there's actually space to trim
function stringx.trim(s)
--cache
local len = s:len()
--we search for the head and tail of the string iteratively
--we could fuse these loops, but two separate loops is a lot easier to follow
--and branches less as well.
local head = 0
for i = 1, len do
if not _whitespace_bytes[s:byte(i)] then
head = i
break
end
end
local tail = 0
for i = len, 1, -1 do
if not _whitespace_bytes[s:byte(i)] then
tail = i
break
end
end
--overlapping ranges means no content
if head > tail then
return ""
end
--limit ranges means no trim
if head == 1 and tail == len then
return s
end
--pull out the content
return s:sub(head, tail)
end
--trim the start of a string
function stringx.ltrim(s)
local head = 1
for i = 1, #s do
if not _whitespace_bytes[s:byte(i)] then
head = i
break
end
end
if head == 1 then
return s
end
return s:sub(head)
end
--trim the end of a string
function stringx.rtrim(s)
local tail = #s
for i = #s, 1, -1 do
if not _whitespace_bytes[s:byte(i)] then
tail = i
break
end
end
if tail == #s then
return s
end
return s:sub(1, tail)
end
function stringx.deindent(s, keep_trailing_empty)
--detect windows or unix newlines
local windows_newlines = s:find("\r\n", nil, true)
local newline = windows_newlines and "\r\n" or "\n"
--split along newlines
local lines = stringx.split(s, newline)
--detect and strip any leading blank lines
while lines[1] == "" do
table.remove(lines, 1)
end
--nothing to do
if #lines == 0 then
return ""
end
--detect indent
local _, _, indent = lines[1]:find("^([ \t]*)")
local indent_len = indent and indent:len() or 0
--not indented
if indent_len == 0 then
return table.concat(lines, newline)
end
--de-indent the lines
local res = {}
for _, line in ipairs(lines) do
if line ~= "" then
local line_start = line:sub(1, indent:len())
local start_len = line_start:len()
if
line_start == indent
or (
start_len < indent_len
and line_start == indent:sub(1, start_len)
)
then
line = line:sub(start_len + 1)
end
end
table.insert(res, line)
end
--should we keep any trailing empty lines?
if not keep_trailing_empty then
while res[#res] == "" do
table.remove(res)
end
end
return table.concat(res, newline)
end
--alias
stringx.dedent = stringx.deindent
--apply a template to a string
--supports $template style values, given as a table or function
-- ie ("hello $name"):format({name = "tom"}) == "hello tom"
function stringx.apply_template(s, sub)
local r = s:gsub("%$([%w_]+)", sub)
return r
end
--check if a given string contains another
--(without garbage)
function stringx.contains(haystack, needle)
for i = 1, #haystack - #needle + 1 do
local found = true
for j = 1, #needle do
if haystack:byte(i + j - 1) ~= needle:byte(j) then
found = false
break
end
end
if found then
return true
end
end
return false
end
--check if a given string starts with another
--(without garbage)
--Using loops is actually faster than string.find!
function stringx.starts_with(s, prefix)
for i = 1, #prefix do
if s:byte(i) ~= prefix:byte(i) then
return false
end
end
return true
end
--check if a given string ends with another
--(without garbage)
function stringx.ends_with(s, suffix)
local len = #s
local suffix_len = #suffix
for i = 0, suffix_len - 1 do
if s:byte(len - i) ~= suffix:byte(suffix_len - i) then
return false
end
end
return true
end
--split elements by delimiter and trim the results, discarding empties
--useful for hand-entered "permissive" data
-- "a,b, c, " -> {"a", "b", "c"}
function stringx.split_and_trim(s, delim)
s = stringx.split(s, delim)
for i = #s, 1, -1 do
local v = stringx.trim(s[i])
if v == "" then
table.remove(s, i)
else
s[i] = v
end
end
return s
end
--titlizes a string
--"quick brown fox" becomes "Quick Brown Fox"
function stringx.title_case(s)
s = s:gsub("%s%l", string.upper)
s = s:gsub("^%l", string.upper)
return s
end
return stringx