-
Notifications
You must be signed in to change notification settings - Fork 160
/
pystring.h
443 lines (369 loc) · 23.1 KB
/
pystring.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
// Copyright Contributors to the Pystring project.
// SPDX-License-Identifier: BSD-3-Clause
// https://github.com/imageworks/pystring/blob/master/LICENSE
#ifndef INCLUDED_PYSTRING_H
#define INCLUDED_PYSTRING_H
#include <string>
#include <vector>
namespace pystring
{
//////////////////////////////////////////////////////////////////////////////////////////////
/// @mainpage pystring
///
/// This is a set of functions matching the interface and behaviors of python string methods
/// (as of python 2.3) using std::string.
///
/// Overlapping functionality ( such as index and slice/substr ) of std::string is included
/// to match python interfaces.
///
//////////////////////////////////////////////////////////////////////////////////////////////
/// @defgroup functions pystring
/// @{
#define MAX_32BIT_INT 2147483647
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return a copy of the string with only its first character capitalized.
///
std::string capitalize( const std::string & str );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return centered in a string of length width. Padding is done using spaces.
///
std::string center( const std::string & str, int width );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return the number of occurrences of substring sub in string S[start:end]. Optional
/// arguments start and end are interpreted as in slice notation.
///
int count( const std::string & str, const std::string & substr, int start = 0, int end = MAX_32BIT_INT);
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return True if the string ends with the specified suffix, otherwise return False. With
/// optional start, test beginning at that position. With optional end, stop comparing at that position.
///
bool endswith( const std::string & str, const std::string & suffix, int start = 0, int end = MAX_32BIT_INT );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return a copy of the string where all tab characters are expanded using spaces. If tabsize
/// is not given, a tab size of 8 characters is assumed.
///
std::string expandtabs( const std::string & str, int tabsize = 8);
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return the lowest index in the string where substring sub is found, such that sub is
/// contained in the range [start, end). Optional arguments start and end are interpreted as
/// in slice notation. Return -1 if sub is not found.
///
int find( const std::string & str, const std::string & sub, int start = 0, int end = MAX_32BIT_INT );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Synonym of find right now. Python version throws exceptions. This one currently doesn't
///
int index( const std::string & str, const std::string & sub, int start = 0, int end = MAX_32BIT_INT );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return true if all characters in the string are alphanumeric and there is at least one
/// character, false otherwise.
///
bool isalnum( const std::string & str );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return true if all characters in the string are alphabetic and there is at least one
/// character, false otherwise
///
bool isalpha( const std::string & str );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return true if all characters in the string are digits and there is at least one
/// character, false otherwise.
///
bool isdigit( const std::string & str );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return true if all cased characters in the string are lowercase and there is at least one
/// cased character, false otherwise.
///
bool islower( const std::string & str );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return true if there are only whitespace characters in the string and there is at least
/// one character, false otherwise.
///
bool isspace( const std::string & str );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return true if the string is a titlecased string and there is at least one character,
/// i.e. uppercase characters may only follow uncased characters and lowercase characters only
/// cased ones. Return false otherwise.
///
bool istitle( const std::string & str );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return true if all cased characters in the string are uppercase and there is at least one
/// cased character, false otherwise.
///
bool isupper( const std::string & str );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return a string which is the concatenation of the strings in the sequence seq.
/// The separator between elements is the str argument
///
std::string join( const std::string & str, const std::vector< std::string > & seq );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return the string left justified in a string of length width. Padding is done using
/// spaces. The original string is returned if width is less than str.size().
///
std::string ljust( const std::string & str, int width );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return a copy of the string converted to lowercase.
///
std::string lower( const std::string & str );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return a copy of the string with leading characters removed. If chars is omitted or None,
/// whitespace characters are removed. If given and not "", chars must be a string; the
/// characters in the string will be stripped from the beginning of the string this method
/// is called on (argument "str" ).
///
std::string lstrip( const std::string & str, const std::string & chars = "" );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return a copy of the string, concatenated N times, together.
/// Corresponds to the __mul__ operator.
///
std::string mul( const std::string & str, int n);
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Split the string around first occurance of sep.
/// Three strings will always placed into result. If sep is found, the strings will
/// be the text before sep, sep itself, and the remaining text. If sep is
/// not found, the original string will be returned with two empty strings.
///
void partition( const std::string & str, const std::string & sep, std::vector< std::string > & result );
inline std::vector< std::string > partition( const std::string & str, const std::string & sep )
{
std::vector< std::string > result;
partition( str, sep, result );
return result;
}
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief If str starts with prefix return a copy of the string with prefix at the start
/// removed otherwise return an unmodified copy of the string.
///
std::string removeprefix( const std::string & str, const std::string & prefix );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief If str ends with suffix return a copy of the string with suffix at the end removed
/// otherwise return an unmodified copy of the string.
///
std::string removesuffix( const std::string & str, const std::string & suffix );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return a copy of the string with all occurrences of substring old replaced by new. If
/// the optional argument count is given, only the first count occurrences are replaced.
///
std::string replace( const std::string & str, const std::string & oldstr, const std::string & newstr, int count = -1);
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return the highest index in the string where substring sub is found, such that sub is
/// contained within s[start,end]. Optional arguments start and end are interpreted as in
/// slice notation. Return -1 on failure.
///
int rfind( const std::string & str, const std::string & sub, int start = 0, int end = MAX_32BIT_INT );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Currently a synonym of rfind. The python version raises exceptions. This one currently
/// does not
///
int rindex( const std::string & str, const std::string & sub, int start = 0, int end = MAX_32BIT_INT );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return the string right justified in a string of length width. Padding is done using
/// spaces. The original string is returned if width is less than str.size().
///
std::string rjust( const std::string & str, int width);
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Split the string around last occurance of sep.
/// Three strings will always placed into result. If sep is found, the strings will
/// be the text before sep, sep itself, and the remaining text. If sep is
/// not found, the original string will be returned with two empty strings.
///
void rpartition( const std::string & str, const std::string & sep, std::vector< std::string > & result );
inline std::vector< std::string > rpartition ( const std::string & str, const std::string & sep )
{
std::vector< std::string > result;
rpartition( str, sep, result );
return result;
}
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return a copy of the string with trailing characters removed. If chars is "", whitespace
/// characters are removed. If not "", the characters in the string will be stripped from the
/// end of the string this method is called on.
///
std::string rstrip( const std::string & str, const std::string & chars = "" );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Fills the "result" list with the words in the string, using sep as the delimiter string.
/// If maxsplit is > -1, at most maxsplit splits are done. If sep is "",
/// any whitespace string is a separator.
///
void split( const std::string & str, std::vector< std::string > & result, const std::string & sep = "", int maxsplit = -1);
inline std::vector< std::string > split( const std::string & str, const std::string & sep = "", int maxsplit = -1)
{
std::vector< std::string > result;
split( str, result, sep, maxsplit );
return result;
}
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Fills the "result" list with the words in the string, using sep as the delimiter string.
/// Does a number of splits starting at the end of the string, the result still has the
/// split strings in their original order.
/// If maxsplit is > -1, at most maxsplit splits are done. If sep is "",
/// any whitespace string is a separator.
///
void rsplit( const std::string & str, std::vector< std::string > & result, const std::string & sep = "", int maxsplit = -1);
inline std::vector< std::string > rsplit( const std::string & str, const std::string & sep = "", int maxsplit = -1)
{
std::vector< std::string > result;
rsplit( str, result, sep, maxsplit);
return result;
}
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return a list of the lines in the string, breaking at line boundaries. Line breaks
/// are not included in the resulting list unless keepends is given and true.
///
void splitlines( const std::string & str, std::vector< std::string > & result, bool keepends = false );
inline std::vector< std::string > splitlines( const std::string & str, bool keepends = false )
{
std::vector< std::string > result;
splitlines( str, result, keepends);
return result;
}
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return True if string starts with the prefix, otherwise return False. With optional start,
/// test string beginning at that position. With optional end, stop comparing string at that
/// position
///
bool startswith( const std::string & str, const std::string & prefix, int start = 0, int end = MAX_32BIT_INT );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return a copy of the string with leading and trailing characters removed. If chars is "",
/// whitespace characters are removed. If given not "", the characters in the string will be
/// stripped from the both ends of the string this method is called on.
///
std::string strip( const std::string & str, const std::string & chars = "" );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return a copy of the string with uppercase characters converted to lowercase and vice versa.
///
std::string swapcase( const std::string & str );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return a titlecased version of the string: words start with uppercase characters,
/// all remaining cased characters are lowercase.
///
std::string title( const std::string & str );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return a copy of the string where all characters occurring in the optional argument
/// deletechars are removed, and the remaining characters have been mapped through the given
/// translation table, which must be a string of length 256.
///
std::string translate( const std::string & str, const std::string & table, const std::string & deletechars = "");
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return a copy of the string converted to uppercase.
///
std::string upper( const std::string & str );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return the numeric string left filled with zeros in a string of length width. The original
/// string is returned if width is less than str.size().
///
std::string zfill( const std::string & str, int width );
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief function matching python's slice functionality.
///
std::string slice( const std::string & str, int start = 0, int end = MAX_32BIT_INT);
///
/// @ }
///
namespace os
{
namespace path
{
// All of the function below have three versions.
// Example:
// join(...)
// join_nt(...)
// join_posix(...)
//
// The regular function dispatches to the other versions - based on the OS
// at compile time - to match the result you'd get from the python
// interepreter on the same operating system
//
// Should you want to 'lock off' to a particular version of the string
// manipulation across *all* operating systems, use the version with the
// _OS you are interested in. I.e., you can use posix style path joining,
// even on Windows, with join_posix.
//
// The naming, (nt, posix) matches the cpython source implementation.
//////////////////////////////////////////////////////////////////////////////////////////////
/// @defgroup functions pystring::os::path
/// @{
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return the base name of pathname path. This is the second half of the pair returned
/// by split(path). Note that the result of this function is different from the Unix basename
/// program; where basename for '/foo/bar/' returns 'bar', the basename() function returns an
/// empty string ('').
std::string basename(const std::string & path);
std::string basename_nt(const std::string & path);
std::string basename_posix(const std::string & path);
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return the directory name of pathname path. This is the first half of the pair
/// returned by split(path).
std::string dirname(const std::string & path);
std::string dirname_nt(const std::string & path);
std::string dirname_posix(const std::string & path);
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return True if path is an absolute pathname. On Unix, that means it begins with a
/// slash, on Windows that it begins with a (back)slash after chopping off a potential drive
/// letter.
bool isabs(const std::string & path);
bool isabs_nt(const std::string & path);
bool isabs_posix(const std::string & s);
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Return a normalized absolutized version of the pathname path.
///
/// NOTE: This differs from the interface of the python equivalent in that it requires you
/// to pass in the current working directory as an argument.
std::string abspath(const std::string & path, const std::string & cwd);
std::string abspath_nt(const std::string & path, const std::string & cwd);
std::string abspath_posix(const std::string & path, const std::string & cwd);
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Join one or more path components intelligently. If any component is an absolute
/// path, all previous components (on Windows, including the previous drive letter, if there
/// was one) are thrown away, and joining continues. The return value is the concatenation of
/// path1, and optionally path2, etc., with exactly one directory separator (os.sep) inserted
/// between components, unless path2 is empty. Note that on Windows, since there is a current
/// directory for each drive, os.path.join("c:", "foo") represents a path relative to the
/// current directory on drive C: (c:foo), not c:\foo.
/// This dispatches based on the compilation OS
std::string join(const std::string & path1, const std::string & path2);
std::string join_nt(const std::string & path1, const std::string & path2);
std::string join_posix(const std::string & path1, const std::string & path2);
std::string join(const std::vector< std::string > & paths);
std::string join_nt(const std::vector< std::string > & paths);
std::string join_posix(const std::vector< std::string > & paths);
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Normalize a pathname. This collapses redundant separators and up-level references
/// so that A//B, A/B/, A/./B and A/foo/../B all become A/B. It does not normalize the case
/// (use normcase() for that). On Windows, it converts forward slashes to backward slashes.
/// It should be understood that this may change the meaning of the path if it contains
/// symbolic links!
std::string normpath(const std::string & path);
std::string normpath_nt(const std::string & path);
std::string normpath_posix(const std::string & path);
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Split the pathname path into a pair, (head, tail) where tail is the last pathname
/// component and head is everything leading up to that. The tail part will never contain a
/// slash; if path ends in a slash, tail will be empty. If there is no slash in path, head
/// will be empty. If path is empty, both head and tail are empty. Trailing slashes are
/// stripped from head unless it is the root (one or more slashes only). In all cases,
/// join(head, tail) returns a path to the same location as path (but the strings may
/// differ).
void split(std::string & head, std::string & tail, const std::string & path);
void split_nt(std::string & head, std::string & tail, const std::string & path);
void split_posix(std::string & head, std::string & tail, const std::string & path);
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Split the pathname path into a pair (drive, tail) where drive is either a drive
/// specification or the empty string. On systems which do not use drive specifications,
/// drive will always be the empty string. In all cases, drive + tail will be the same as
/// path.
void splitdrive(std::string & drivespec, std::string & pathspec, const std::string & path);
void splitdrive_nt(std::string & drivespec, std::string & pathspec, const std::string & p);
void splitdrive_posix(std::string & drivespec, std::string & pathspec, const std::string & path);
//////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Split the pathname path into a pair (root, ext) such that root + ext == path, and
/// ext is empty or begins with a period and contains at most one period. Leading periods on
/// the basename are ignored; splitext('.cshrc') returns ('.cshrc', '').
void splitext(std::string & root, std::string & ext, const std::string & path);
void splitext_nt(std::string & root, std::string & ext, const std::string & path);
void splitext_posix(std::string & root, std::string & ext, const std::string & path);
///
/// @ }
///
} // namespace path
} // namespace os
} // namespace pystring
#endif