-
Notifications
You must be signed in to change notification settings - Fork 80
/
stringzilla.h
4741 lines (4125 loc) · 221 KB
/
stringzilla.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/**
* @brief StringZilla is a collection of simple string algorithms, designed to be used in Big Data applications.
* It may be slower than LibC, but has a broader & cleaner interface, and a very short implementation
* targeting modern x86 CPUs with AVX-512 and Arm NEON and older CPUs with SWAR and auto-vectorization.
*
* Consider overriding the following macros to customize the library:
*
* - `SZ_DEBUG=0` - whether to enable debug assertions and logging.
* - `SZ_DYNAMIC_DISPATCH=0` - whether to use runtime dispatching of the most advanced SIMD backend.
* - `SZ_USE_MISALIGNED_LOADS=0` - whether to use misaligned loads on platforms that support them.
* - `SZ_SWAR_THRESHOLD=24` - threshold for switching to SWAR backend over serial byte-level for-loops.
* - `SZ_USE_X86_AVX512=?` - whether to use AVX-512 instructions on x86_64.
* - `SZ_USE_X86_AVX2=?` - whether to use AVX2 instructions on x86_64.
* - `SZ_USE_ARM_NEON=?` - whether to use NEON instructions on ARM.
* - `SZ_USE_ARM_SVE=?` - whether to use SVE instructions on ARM.
*
* @see StringZilla: https://github.com/ashvardanian/StringZilla/blob/main/README.md
* @see LibC String: https://pubs.opengroup.org/onlinepubs/009695399/basedefs/string.h.html
*
* @file stringzilla.h
* @author Ash Vardanian
*/
#ifndef STRINGZILLA_H_
#define STRINGZILLA_H_
#define STRINGZILLA_VERSION_MAJOR 2
#define STRINGZILLA_VERSION_MINOR 0
#define STRINGZILLA_VERSION_PATCH 4
/**
* @brief When set to 1, the library will include the following LibC headers: <stddef.h> and <stdint.h>.
* In debug builds (SZ_DEBUG=1), the library will also include <stdio.h> and <stdlib.h>.
*
* You may want to disable this compiling for use in the kernel, or in embedded systems.
* You may also avoid them, if you are very sensitive to compilation time and avoid pre-compiled headers.
* https://artificial-mind.net/projects/compile-health/
*/
#ifndef SZ_AVOID_LIBC
#define SZ_AVOID_LIBC (0) // true or false
#endif
/**
* @brief A misaligned load can be - trying to fetch eight consecutive bytes from an address
* that is not divisible by eight.
*
* Most platforms support it, but there is no industry standard way to check for those.
* This value will mostly affect the performance of the serial (SWAR) backend.
*/
#ifndef SZ_USE_MISALIGNED_LOADS
#define SZ_USE_MISALIGNED_LOADS (0) // true or false
#endif
/**
* @brief Removes compile-time dispatching, and replaces it with runtime dispatching.
* So the `sz_find` function will invoke the most advanced backend supported by the CPU,
* that runs the program, rather than the most advanced backend supported by the CPU
* used to compile the library or the downstream application.
*/
#ifndef SZ_DYNAMIC_DISPATCH
#define SZ_DYNAMIC_DISPATCH (0) // true or false
#endif
/**
* @brief Analogous to `size_t` and `std::size_t`, unsigned integer, identical to pointer size.
* 64-bit on most platforms where pointers are 64-bit.
* 32-bit on platforms where pointers are 32-bit.
*/
#if defined(__LP64__) || defined(_LP64) || defined(__x86_64__) || defined(_WIN64)
#define SZ_DETECT_64_BIT (1)
#define SZ_SIZE_MAX (0xFFFFFFFFFFFFFFFFull) // Largest unsigned integer that fits into 64 bits.
#define SZ_SSIZE_MAX (0x7FFFFFFFFFFFFFFFull) // Largest signed integer that fits into 64 bits.
#else
#define SZ_DETECT_64_BIT (0)
#define SZ_SIZE_MAX (0xFFFFFFFFu) // Largest unsigned integer that fits into 32 bits.
#define SZ_SSIZE_MAX (0x7FFFFFFFu) // Largest signed integer that fits into 32 bits.
#endif
/*
* Debugging and testing.
*/
#ifndef SZ_DEBUG
#if defined(DEBUG) || defined(_DEBUG) // This means "Not using DEBUG information".
#define SZ_DEBUG (1)
#else
#define SZ_DEBUG (0)
#endif
#endif
/* Annotation for the public API symbols:
*
* - `SZ_PUBLIC` is used for functions that are part of the public API.
* - `SZ_INTERNAL` is used for internal helper functions with unstable APIs.
* - `SZ_DYNAMIC` is used for functions that are part of the public API, but are dispatched at runtime.
*/
#ifndef SZ_DYNAMIC
#if SZ_DYNAMIC_DISPATCH
#if defined(_WIN32) || defined(__CYGWIN__)
#define SZ_DYNAMIC __declspec(dllexport)
#define SZ_PUBLIC inline static
#define SZ_INTERNAL inline static
#else
#define SZ_DYNAMIC __attribute__((visibility("default")))
#define SZ_PUBLIC __attribute__((unused)) inline static
#define SZ_INTERNAL __attribute__((always_inline)) inline static
#endif // _WIN32 || __CYGWIN__
#else
#define SZ_DYNAMIC inline static
#define SZ_PUBLIC inline static
#define SZ_INTERNAL inline static
#endif // SZ_DYNAMIC_DISPATCH
#endif // SZ_DYNAMIC
#ifdef __cplusplus
extern "C" {
#endif
/*
* Let's infer the integer types or pull them from LibC,
* if that is allowed by the user.
*/
#if !SZ_AVOID_LIBC
#include <stddef.h> // `size_t`
#include <stdint.h> // `uint8_t`
typedef int8_t sz_i8_t; // Always 8 bits
typedef uint8_t sz_u8_t; // Always 8 bits
typedef uint16_t sz_u16_t; // Always 16 bits
typedef int32_t sz_i32_t; // Always 32 bits
typedef uint32_t sz_u32_t; // Always 32 bits
typedef uint64_t sz_u64_t; // Always 64 bits
typedef int64_t sz_i64_t; // Always 64 bits
typedef size_t sz_size_t; // Pointer-sized unsigned integer, 32 or 64 bits
typedef ptrdiff_t sz_ssize_t; // Signed version of `sz_size_t`, 32 or 64 bits
#else // if SZ_AVOID_LIBC:
typedef signed char sz_i8_t; // Always 8 bits
typedef unsigned char sz_u8_t; // Always 8 bits
typedef unsigned short sz_u16_t; // Always 16 bits
typedef int sz_i32_t; // Always 32 bits
typedef unsigned int sz_u32_t; // Always 32 bits
typedef long long sz_i64_t; // Always 64 bits
typedef unsigned long long sz_u64_t; // Always 64 bits
#if SZ_DETECT_64_BIT
typedef unsigned long long sz_size_t; // 64-bit.
typedef long long sz_ssize_t; // 64-bit.
#else
typedef unsigned sz_size_t; // 32-bit.
typedef unsigned sz_ssize_t; // 32-bit.
#endif // SZ_DETECT_64_BIT
#endif // SZ_AVOID_LIBC
/**
* @brief Compile-time assert macro similar to `static_assert` in C++.
*/
#define sz_static_assert(condition, name) \
typedef struct { \
int static_assert_##name : (condition) ? 1 : -1; \
} sz_static_assert_##name##_t
sz_static_assert(sizeof(sz_size_t) == sizeof(void *), sz_size_t_must_be_pointer_size);
sz_static_assert(sizeof(sz_ssize_t) == sizeof(void *), sz_ssize_t_must_be_pointer_size);
#pragma region Public API
typedef char *sz_ptr_t; // A type alias for `char *`
typedef char const *sz_cptr_t; // A type alias for `char const *`
typedef sz_i8_t sz_error_cost_t; // Character mismatch cost for fuzzy matching functions
typedef enum { sz_false_k = 0, sz_true_k = 1 } sz_bool_t; // Only one relevant bit
typedef enum { sz_less_k = -1, sz_equal_k = 0, sz_greater_k = 1 } sz_ordering_t; // Only three possible states: <=>
/**
* @brief Tiny string-view structure. It's POD type, unlike the `std::string_view`.
*/
typedef struct sz_string_view_t {
sz_cptr_t start;
sz_size_t length;
} sz_string_view_t;
/**
* @brief Enumeration of SIMD capabilities of the target architecture.
* Used to introspect the supported functionality of the dynamic library.
*/
typedef enum sz_capability_t {
sz_cap_serial_k = 1, /// Serial (non-SIMD) capability
sz_cap_any_k = 0x7FFFFFFF, /// Mask representing any capability
sz_cap_arm_neon_k = 1 << 10, /// ARM NEON capability
sz_cap_arm_sve_k = 1 << 11, /// ARM SVE capability TODO: Not yet supported or used
sz_cap_x86_avx2_k = 1 << 20, /// x86 AVX2 capability
sz_cap_x86_avx512f_k = 1 << 21, /// x86 AVX512 F capability
sz_cap_x86_avx512bw_k = 1 << 22, /// x86 AVX512 BW instruction capability
sz_cap_x86_avx512vl_k = 1 << 23, /// x86 AVX512 VL instruction capability
sz_cap_x86_avx512vbmi_k = 1 << 24, /// x86 AVX512 VBMI instruction capability
sz_cap_x86_gfni_k = 1 << 25, /// x86 AVX512 GFNI instruction capability
} sz_capability_t;
/**
* @brief Function to determine the SIMD capabilities of the current machine @b only at @b runtime.
* @return A bitmask of the SIMD capabilities represented as a `sz_capability_t` enum value.
*/
SZ_DYNAMIC sz_capability_t sz_capabilities(void);
/**
* @brief Bit-set structure for 256 possible byte values. Useful for filtering and search.
* @see sz_charset_init, sz_charset_add, sz_charset_contains, sz_charset_invert
*/
typedef union sz_charset_t {
sz_u64_t _u64s[4];
sz_u32_t _u32s[8];
sz_u16_t _u16s[16];
sz_u8_t _u8s[32];
} sz_charset_t;
/** @brief Initializes a bit-set to an empty collection, meaning - all characters are banned. */
SZ_PUBLIC void sz_charset_init(sz_charset_t *s) { s->_u64s[0] = s->_u64s[1] = s->_u64s[2] = s->_u64s[3] = 0; }
/** @brief Adds a character to the set and accepts @b unsigned integers. */
SZ_PUBLIC void sz_charset_add_u8(sz_charset_t *s, sz_u8_t c) { s->_u64s[c >> 6] |= (1ull << (c & 63u)); }
/** @brief Adds a character to the set. Consider @b sz_charset_add_u8. */
SZ_PUBLIC void sz_charset_add(sz_charset_t *s, char c) { sz_charset_add_u8(s, *(sz_u8_t *)(&c)); } // bitcast
/** @brief Checks if the set contains a given character and accepts @b unsigned integers. */
SZ_PUBLIC sz_bool_t sz_charset_contains_u8(sz_charset_t const *s, sz_u8_t c) {
// Checking the bit can be done in different ways:
// - (s->_u64s[c >> 6] & (1ull << (c & 63u))) != 0
// - (s->_u32s[c >> 5] & (1u << (c & 31u))) != 0
// - (s->_u16s[c >> 4] & (1u << (c & 15u))) != 0
// - (s->_u8s[c >> 3] & (1u << (c & 7u))) != 0
return (sz_bool_t)((s->_u64s[c >> 6] & (1ull << (c & 63u))) != 0);
}
/** @brief Checks if the set contains a given character. Consider @b sz_charset_contains_u8. */
SZ_PUBLIC sz_bool_t sz_charset_contains(sz_charset_t const *s, char c) {
return sz_charset_contains_u8(s, *(sz_u8_t *)(&c)); // bitcast
}
/** @brief Inverts the contents of the set, so allowed character get disallowed, and vice versa. */
SZ_PUBLIC void sz_charset_invert(sz_charset_t *s) {
s->_u64s[0] ^= 0xFFFFFFFFFFFFFFFFull, s->_u64s[1] ^= 0xFFFFFFFFFFFFFFFFull, //
s->_u64s[2] ^= 0xFFFFFFFFFFFFFFFFull, s->_u64s[3] ^= 0xFFFFFFFFFFFFFFFFull;
}
typedef void *(*sz_memory_allocate_t)(sz_size_t, void *);
typedef void (*sz_memory_free_t)(void *, sz_size_t, void *);
typedef sz_u64_t (*sz_random_generator_t)(void *);
/**
* @brief Some complex pattern matching algorithms may require memory allocations.
* This structure is used to pass the memory allocator to those functions.
* @see sz_memory_allocator_init_fixed
*/
typedef struct sz_memory_allocator_t {
sz_memory_allocate_t allocate;
sz_memory_free_t free;
void *handle;
} sz_memory_allocator_t;
/**
* @brief Initializes a memory allocator to use the system default `malloc` and `free`.
* @param alloc Memory allocator to initialize.
*/
SZ_PUBLIC void sz_memory_allocator_init_default(sz_memory_allocator_t *alloc);
/**
* @brief Initializes a memory allocator to use a static-capacity buffer.
* No dynamic allocations will be performed.
*
* @param alloc Memory allocator to initialize.
* @param buffer Buffer to use for allocations.
* @param length Length of the buffer. @b Must be greater than 8 bytes. Different values would be optimal for
* different algorithms and input lengths, but 4096 bytes (one RAM page) is a good default.
*/
SZ_PUBLIC void sz_memory_allocator_init_fixed(sz_memory_allocator_t *alloc, void *buffer, sz_size_t length);
/**
* @brief The number of bytes a stack-allocated string can hold, including the SZ_NULL termination character.
* ! This can't be changed from outside. Don't use the `#error` as it may already be included and set.
*/
#ifdef SZ_STRING_INTERNAL_SPACE
#undef SZ_STRING_INTERNAL_SPACE
#endif
#define SZ_STRING_INTERNAL_SPACE (23)
/**
* @brief Tiny memory-owning string structure with a Small String Optimization (SSO).
* Differs in layout from Folly, Clang, GCC, and probably most other implementations.
* It's designed to avoid any branches on read-only operations, and can store up
* to 22 characters on stack, followed by the SZ_NULL-termination character.
*
* @section Changing Length
*
* One nice thing about this design, is that you can, in many cases, change the length of the string
* without any branches, invoking a `+=` or `-=` on the 64-bit `length` field. If the string is on heap,
* the solution is obvious. If it's on stack, inplace decrement wouldn't affect the top bytes of the string,
* only changing the last byte containing the length.
*/
typedef union sz_string_t {
struct internal {
sz_ptr_t start;
sz_u8_t length;
char chars[SZ_STRING_INTERNAL_SPACE];
} internal;
struct external {
sz_ptr_t start;
sz_size_t length;
/// @brief Number of bytes, that have been allocated for this string, equals to (capacity + 1).
sz_size_t space;
sz_size_t padding;
} external;
sz_u64_t u64s[4];
} sz_string_t;
typedef sz_u64_t (*sz_hash_t)(sz_cptr_t, sz_size_t);
typedef sz_bool_t (*sz_equal_t)(sz_cptr_t, sz_cptr_t, sz_size_t);
typedef sz_ordering_t (*sz_order_t)(sz_cptr_t, sz_size_t, sz_cptr_t, sz_size_t);
typedef void (*sz_to_converter_t)(sz_cptr_t, sz_size_t, sz_ptr_t);
/**
* @brief Computes the 64-bit unsigned hash of a string. Fairly fast for short strings,
* simple implementation, and supports rolling computation, reused in other APIs.
* Similar to `std::hash` in C++.
*
* @param text String to hash.
* @param length Number of bytes in the text.
* @return 64-bit hash value.
*
* @see sz_hashes, sz_hashes_fingerprint, sz_hashes_intersection
*/
SZ_PUBLIC sz_u64_t sz_hash(sz_cptr_t text, sz_size_t length);
/** @copydoc sz_hash */
SZ_PUBLIC sz_u64_t sz_hash_serial(sz_cptr_t text, sz_size_t length);
/**
* @brief Checks if two string are equal.
* Similar to `memcmp(a, b, length) == 0` in LibC and `a == b` in STL.
*
* The implementation of this function is very similar to `sz_order`, but the usage patterns are different.
* This function is more often used in parsing, while `sz_order` is often used in sorting.
* It works best on platforms with cheap
*
* @param a First string to compare.
* @param b Second string to compare.
* @param length Number of bytes in both strings.
* @return 1 if strings match, 0 otherwise.
*/
SZ_DYNAMIC sz_bool_t sz_equal(sz_cptr_t a, sz_cptr_t b, sz_size_t length);
/** @copydoc sz_equal */
SZ_PUBLIC sz_bool_t sz_equal_serial(sz_cptr_t a, sz_cptr_t b, sz_size_t length);
/**
* @brief Estimates the relative order of two strings. Equivalent to `memcmp(a, b, length)` in LibC.
* Can be used on different length strings.
*
* @param a First string to compare.
* @param a_length Number of bytes in the first string.
* @param b Second string to compare.
* @param b_length Number of bytes in the second string.
* @return Negative if (a < b), positive if (a > b), zero if they are equal.
*/
SZ_DYNAMIC sz_ordering_t sz_order(sz_cptr_t a, sz_size_t a_length, sz_cptr_t b, sz_size_t b_length);
/** @copydoc sz_order */
SZ_PUBLIC sz_ordering_t sz_order_serial(sz_cptr_t a, sz_size_t a_length, sz_cptr_t b, sz_size_t b_length);
/**
* @brief Equivalent to `for (char & c : text) c = tolower(c)`.
*
* ASCII characters [A, Z] map to decimals [65, 90], and [a, z] map to [97, 122].
* So there are 26 english letters, shifted by 32 values, meaning that a conversion
* can be done by flipping the 5th bit each inappropriate character byte. This, however,
* breaks for extended ASCII, so a different solution is needed.
* http://0x80.pl/notesen/2016-01-06-swar-swap-case.html
*
* @param text String to be normalized.
* @param length Number of bytes in the string.
* @param result Output string, can point to the same address as ::text.
*/
SZ_PUBLIC void sz_tolower(sz_cptr_t text, sz_size_t length, sz_ptr_t result);
/**
* @brief Equivalent to `for (char & c : text) c = toupper(c)`.
*
* ASCII characters [A, Z] map to decimals [65, 90], and [a, z] map to [97, 122].
* So there are 26 english letters, shifted by 32 values, meaning that a conversion
* can be done by flipping the 5th bit each inappropriate character byte. This, however,
* breaks for extended ASCII, so a different solution is needed.
* http://0x80.pl/notesen/2016-01-06-swar-swap-case.html
*
* @param text String to be normalized.
* @param length Number of bytes in the string.
* @param result Output string, can point to the same address as ::text.
*/
SZ_PUBLIC void sz_toupper(sz_cptr_t text, sz_size_t length, sz_ptr_t result);
/**
* @brief Equivalent to `for (char & c : text) c = toascii(c)`.
*
* @param text String to be normalized.
* @param length Number of bytes in the string.
* @param result Output string, can point to the same address as ::text.
*/
SZ_PUBLIC void sz_toascii(sz_cptr_t text, sz_size_t length, sz_ptr_t result);
/**
* @brief Generates a random string for a given alphabet, avoiding integer division and modulo operations.
* Similar to `text[i] = alphabet[rand() % cardinality]`.
*
* The modulo operation is expensive, and should be avoided in performance-critical code.
* We avoid it using small lookup tables and replacing it with a multiplication and shifts, similar to `libdivide`.
* Alternative algorithms would include:
* - Montgomery form: https://en.algorithmica.org/hpc/number-theory/montgomery/
* - Barret reduction: https://www.nayuki.io/page/barrett-reduction-algorithm
* - Lemire's trick: https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
*
* @param alphabet Set of characters to sample from.
* @param cardinality Number of characters to sample from.
* @param text Output string, can point to the same address as ::text.
* @param generate Callback producing random numbers given the generator state.
* @param generator Generator state, can be a pointer to a seed, or a pointer to a random number generator.
*/
SZ_PUBLIC void sz_generate(sz_cptr_t alphabet, sz_size_t cardinality, sz_ptr_t text, sz_size_t length,
sz_random_generator_t generate, void *generator);
/**
* @brief Similar to `memcpy`, copies contents of one string into another.
* The behavior is undefined if the strings overlap.
*
* @param target String to copy into.
* @param length Number of bytes to copy.
* @param source String to copy from.
*/
SZ_DYNAMIC void sz_copy(sz_ptr_t target, sz_cptr_t source, sz_size_t length);
/** @copydoc sz_copy */
SZ_PUBLIC void sz_copy_serial(sz_ptr_t target, sz_cptr_t source, sz_size_t length);
/**
* @brief Similar to `memmove`, copies (moves) contents of one string into another.
* Unlike `sz_copy`, allows overlapping strings as arguments.
*
* @param target String to copy into.
* @param length Number of bytes to copy.
* @param source String to copy from.
*/
SZ_DYNAMIC void sz_move(sz_ptr_t target, sz_cptr_t source, sz_size_t length);
/** @copydoc sz_move */
SZ_PUBLIC void sz_move_serial(sz_ptr_t target, sz_cptr_t source, sz_size_t length);
typedef void (*sz_move_t)(sz_ptr_t, sz_cptr_t, sz_size_t);
/**
* @brief Similar to `memset`, fills a string with a given value.
*
* @param target String to fill.
* @param length Number of bytes to fill.
* @param value Value to fill with.
*/
SZ_DYNAMIC void sz_fill(sz_ptr_t target, sz_size_t length, sz_u8_t value);
/** @copydoc sz_fill */
SZ_PUBLIC void sz_fill_serial(sz_ptr_t target, sz_size_t length, sz_u8_t value);
typedef void (*sz_fill_t)(sz_ptr_t, sz_size_t, sz_u8_t);
/**
* @brief Initializes a string class instance to an empty value.
*/
SZ_PUBLIC void sz_string_init(sz_string_t *string);
/**
* @brief Convenience function checking if the provided string is stored inside of the ::string instance itself,
* alternative being - allocated in a remote region of the heap.
*/
SZ_PUBLIC sz_bool_t sz_string_is_on_stack(sz_string_t const *string);
/**
* @brief Unpacks the opaque instance of a string class into its components.
* Recommended to use only in read-only operations.
*
* @param string String to unpack.
* @param start Pointer to the start of the string.
* @param length Number of bytes in the string, before the SZ_NULL character.
* @param space Number of bytes allocated for the string (heap or stack), including the SZ_NULL character.
* @param is_external Whether the string is allocated on the heap externally, or fits withing ::string instance.
*/
SZ_PUBLIC void sz_string_unpack(sz_string_t const *string, sz_ptr_t *start, sz_size_t *length, sz_size_t *space,
sz_bool_t *is_external);
/**
* @brief Unpacks only the start and length of the string.
* Recommended to use only in read-only operations.
*
* @param string String to unpack.
* @param start Pointer to the start of the string.
* @param length Number of bytes in the string, before the SZ_NULL character.
*/
SZ_PUBLIC void sz_string_range(sz_string_t const *string, sz_ptr_t *start, sz_size_t *length);
/**
* @brief Constructs a string of a given ::length with noisy contents.
* Use the returned character pointer to populate the string.
*
* @param string String to initialize.
* @param length Number of bytes in the string, before the SZ_NULL character.
* @param allocator Memory allocator to use for the allocation.
* @return SZ_NULL if the operation failed, pointer to the start of the string otherwise.
*/
SZ_PUBLIC sz_ptr_t sz_string_init_length(sz_string_t *string, sz_size_t length, sz_memory_allocator_t *allocator);
/**
* @brief Doesn't change the contents or the length of the string, but grows the available memory capacity.
* This is beneficial, if several insertions are expected, and we want to minimize allocations.
*
* @param string String to grow.
* @param new_capacity The number of characters to reserve space for, including existing ones.
* @param allocator Memory allocator to use for the allocation.
* @return SZ_NULL if the operation failed, pointer to the new start of the string otherwise.
*/
SZ_PUBLIC sz_ptr_t sz_string_reserve(sz_string_t *string, sz_size_t new_capacity, sz_memory_allocator_t *allocator);
/**
* @brief Grows the string by adding an uninitialized region of ::added_length at the given ::offset.
* Would often be used in conjunction with one or more `sz_copy` calls to populate the allocated region.
* Similar to `sz_string_reserve`, but changes the length of the ::string.
*
* @param string String to grow.
* @param offset Offset of the first byte to reserve space for.
* If provided offset is larger than the length, it will be capped.
* @param added_length The number of new characters to reserve space for.
* @param allocator Memory allocator to use for the allocation.
* @return SZ_NULL if the operation failed, pointer to the new start of the string otherwise.
*/
SZ_PUBLIC sz_ptr_t sz_string_expand(sz_string_t *string, sz_size_t offset, sz_size_t added_length,
sz_memory_allocator_t *allocator);
/**
* @brief Removes a range from a string. Changes the length, but not the capacity.
* Performs no allocations or deallocations and can't fail.
*
* @param string String to clean.
* @param offset Offset of the first byte to remove.
* @param length Number of bytes to remove. Out-of-bound ranges will be capped.
* @return Number of bytes removed.
*/
SZ_PUBLIC sz_size_t sz_string_erase(sz_string_t *string, sz_size_t offset, sz_size_t length);
/**
* @brief Shrinks the string to fit the current length, if it's allocated on the heap.
* Teh reverse operation of ::sz_string_reserve.
*
* @param string String to shrink.
* @param allocator Memory allocator to use for the allocation.
* @return Whether the operation was successful. The only failures can come from the allocator.
*/
SZ_PUBLIC sz_ptr_t sz_string_shrink_to_fit(sz_string_t *string, sz_memory_allocator_t *allocator);
/**
* @brief Frees the string, if it's allocated on the heap.
* If the string is on the stack, the function clears/resets the state.
*/
SZ_PUBLIC void sz_string_free(sz_string_t *string, sz_memory_allocator_t *allocator);
#pragma endregion
#pragma region Fast Substring Search API
typedef sz_cptr_t (*sz_find_byte_t)(sz_cptr_t, sz_size_t, sz_cptr_t);
typedef sz_cptr_t (*sz_find_t)(sz_cptr_t, sz_size_t, sz_cptr_t, sz_size_t);
typedef sz_cptr_t (*sz_find_set_t)(sz_cptr_t, sz_size_t, sz_charset_t const *);
/**
* @brief Locates first matching byte in a string. Equivalent to `memchr(haystack, *needle, h_length)` in LibC.
*
* X86_64 implementation: https://github.com/lattera/glibc/blob/master/sysdeps/x86_64/memchr.S
* Aarch64 implementation: https://github.com/lattera/glibc/blob/master/sysdeps/aarch64/memchr.S
*
* @param haystack Haystack - the string to search in.
* @param h_length Number of bytes in the haystack.
* @param needle Needle - single-byte substring to find.
* @return Address of the first match.
*/
SZ_DYNAMIC sz_cptr_t sz_find_byte(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle);
/** @copydoc sz_find_byte */
SZ_PUBLIC sz_cptr_t sz_find_byte_serial(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle);
/**
* @brief Locates last matching byte in a string. Equivalent to `memrchr(haystack, *needle, h_length)` in LibC.
*
* X86_64 implementation: https://github.com/lattera/glibc/blob/master/sysdeps/x86_64/memrchr.S
* Aarch64 implementation: missing
*
* @param haystack Haystack - the string to search in.
* @param h_length Number of bytes in the haystack.
* @param needle Needle - single-byte substring to find.
* @return Address of the last match.
*/
SZ_DYNAMIC sz_cptr_t sz_rfind_byte(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle);
/** @copydoc sz_rfind_byte */
SZ_PUBLIC sz_cptr_t sz_rfind_byte_serial(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle);
/**
* @brief Locates first matching substring.
* Equivalent to `memmem(haystack, h_length, needle, n_length)` in LibC.
* Similar to `strstr(haystack, needle)` in LibC, but requires known length.
*
* @param haystack Haystack - the string to search in.
* @param h_length Number of bytes in the haystack.
* @param needle Needle - substring to find.
* @param n_length Number of bytes in the needle.
* @return Address of the first match.
*/
SZ_DYNAMIC sz_cptr_t sz_find(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle, sz_size_t n_length);
/** @copydoc sz_find */
SZ_PUBLIC sz_cptr_t sz_find_serial(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle, sz_size_t n_length);
/**
* @brief Locates the last matching substring.
*
* @param haystack Haystack - the string to search in.
* @param h_length Number of bytes in the haystack.
* @param needle Needle - substring to find.
* @param n_length Number of bytes in the needle.
* @return Address of the last match.
*/
SZ_DYNAMIC sz_cptr_t sz_rfind(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle, sz_size_t n_length);
/** @copydoc sz_rfind */
SZ_PUBLIC sz_cptr_t sz_rfind_serial(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle, sz_size_t n_length);
/**
* @brief Finds the first character present from the ::set, present in ::text.
* Equivalent to `strspn(text, accepted)` and `strcspn(text, rejected)` in LibC.
* May have identical implementation and performance to ::sz_rfind_charset.
*
* @param text String to be trimmed.
* @param accepted Set of accepted characters.
* @return Number of bytes forming the prefix.
*/
SZ_DYNAMIC sz_cptr_t sz_find_charset(sz_cptr_t text, sz_size_t length, sz_charset_t const *set);
/** @copydoc sz_find_charset */
SZ_PUBLIC sz_cptr_t sz_find_charset_serial(sz_cptr_t text, sz_size_t length, sz_charset_t const *set);
/**
* @brief Finds the last character present from the ::set, present in ::text.
* Equivalent to `strspn(text, accepted)` and `strcspn(text, rejected)` in LibC.
* May have identical implementation and performance to ::sz_find_charset.
*
* Useful for parsing, when we want to skip a set of characters. Examples:
* * 6 whitespaces: " \t\n\r\v\f".
* * 16 digits forming a float number: "0123456789,.eE+-".
* * 5 HTML reserved characters: "\"'&<>", of which "<>" can be useful for parsing.
* * 2 JSON string special characters useful to locate the end of the string: "\"\\".
*
* @param text String to be trimmed.
* @param rejected Set of rejected characters.
* @return Number of bytes forming the prefix.
*/
SZ_DYNAMIC sz_cptr_t sz_rfind_charset(sz_cptr_t text, sz_size_t length, sz_charset_t const *set);
/** @copydoc sz_rfind_charset */
SZ_PUBLIC sz_cptr_t sz_rfind_charset_serial(sz_cptr_t text, sz_size_t length, sz_charset_t const *set);
#pragma endregion
#pragma region String Similarity Measures API
/**
* @brief Computes the Levenshtein edit-distance between two strings using the Wagner-Fisher algorithm.
* Similar to the Needleman-Wunsch alignment algorithm. Often used in fuzzy string matching.
*
* @param a First string to compare.
* @param a_length Number of bytes in the first string.
* @param b Second string to compare.
* @param b_length Number of bytes in the second string.
*
* @param alloc Temporary memory allocator. Only some of the rows of the matrix will be allocated,
* so the memory usage is linear in relation to ::a_length and ::b_length.
* If SZ_NULL is passed, will initialize to the systems default `malloc`.
* @param bound Upper bound on the distance, that allows us to exit early.
* If zero is passed, the maximum possible distance will be equal to the length of the longer input.
* @return Unsigned integer for edit distance, the `bound` if was exceeded or `SZ_SIZE_MAX`
* if the memory allocation failed.
*
* @see sz_memory_allocator_init_fixed, sz_memory_allocator_init_default
* @see https://en.wikipedia.org/wiki/Levenshtein_distance
*/
SZ_DYNAMIC sz_size_t sz_edit_distance(sz_cptr_t a, sz_size_t a_length, sz_cptr_t b, sz_size_t b_length, //
sz_size_t bound, sz_memory_allocator_t *alloc);
/** @copydoc sz_edit_distance */
SZ_PUBLIC sz_size_t sz_edit_distance_serial(sz_cptr_t a, sz_size_t a_length, sz_cptr_t b, sz_size_t b_length, //
sz_size_t bound, sz_memory_allocator_t *alloc);
typedef sz_size_t (*sz_edit_distance_t)(sz_cptr_t, sz_size_t, sz_cptr_t, sz_size_t, sz_size_t, sz_memory_allocator_t *);
/**
* @brief Computes Needleman–Wunsch alignment score for two string. Often used in bioinformatics and cheminformatics.
* Similar to the Levenshtein edit-distance, parameterized for gap and substitution penalties.
*
* Not commutative in the general case, as the order of the strings matters, as `sz_alignment_score(a, b)` may
* not be equal to `sz_alignment_score(b, a)`. Becomes @b commutative, if the substitution costs are symmetric.
* Equivalent to the negative Levenshtein distance, if: `gap == -1` and `subs[i][j] == (i == j ? 0: -1)`.
*
* @param a First string to compare.
* @param a_length Number of bytes in the first string.
* @param b Second string to compare.
* @param b_length Number of bytes in the second string.
* @param gap Penalty cost for gaps - insertions and removals.
* @param subs Substitution costs matrix with 256 x 256 values for all pairs of characters.
*
* @param alloc Temporary memory allocator. Only some of the rows of the matrix will be allocated,
* so the memory usage is linear in relation to ::a_length and ::b_length.
* If SZ_NULL is passed, will initialize to the systems default `malloc`.
* @return Signed similarity score. Can be negative, depending on the substitution costs.
* If the memory allocation fails, the function returns `SZ_SSIZE_MAX`.
*
* @see sz_memory_allocator_init_fixed, sz_memory_allocator_init_default
* @see https://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm
*/
SZ_DYNAMIC sz_ssize_t sz_alignment_score(sz_cptr_t a, sz_size_t a_length, sz_cptr_t b, sz_size_t b_length, //
sz_error_cost_t const *subs, sz_error_cost_t gap, //
sz_memory_allocator_t *alloc);
/** @copydoc sz_alignment_score */
SZ_PUBLIC sz_ssize_t sz_alignment_score_serial(sz_cptr_t a, sz_size_t a_length, sz_cptr_t b, sz_size_t b_length, //
sz_error_cost_t const *subs, sz_error_cost_t gap, //
sz_memory_allocator_t *alloc);
typedef sz_ssize_t (*sz_alignment_score_t)(sz_cptr_t, sz_size_t, sz_cptr_t, sz_size_t, sz_error_cost_t const *,
sz_error_cost_t, sz_memory_allocator_t *);
typedef void (*sz_hash_callback_t)(sz_cptr_t, sz_size_t, sz_u64_t, void *user);
/**
* @brief Computes the Karp-Rabin rolling hashes of a string supplying them to the provided `callback`.
* Can be used for similarity scores, search, ranking, etc.
*
* Rabin-Karp-like rolling hashes can have very high-level of collisions and depend
* on the choice of bases and the prime number. That's why, often two hashes from the same
* family are used with different bases.
*
* 1. Kernighan and Ritchie's function uses 31, a prime close to the size of English alphabet.
* 2. To be friendlier to byte-arrays and UTF8, we use 257 for the second function.
*
* Choosing the right ::window_length is task- and domain-dependant. For example, most English words are
* between 3 and 7 characters long, so a window of 4 bytes would be a good choice. For DNA sequences,
* the ::window_length might be a multiple of 3, as the codons are 3 (nucleotides) bytes long.
* With such minimalistic alphabets of just four characters (AGCT) longer windows might be needed.
* For protein sequences the alphabet is 20 characters long, so the window can be shorter, than for DNAs.
*
* @param text String to hash.
* @param length Number of bytes in the string.
* @param window_length Length of the rolling window in bytes.
* @param window_step Step of reported hashes. @b Must be power of two. Should be smaller than `window_length`.
* @param callback Function receiving the start & length of a substring, the hash, and the `callback_handle`.
* @param callback_handle Optional user-provided pointer to be passed to the `callback`.
* @see sz_hashes_fingerprint, sz_hashes_intersection
*/
SZ_DYNAMIC void sz_hashes(sz_cptr_t text, sz_size_t length, sz_size_t window_length, sz_size_t window_step, //
sz_hash_callback_t callback, void *callback_handle);
/** @copydoc sz_hashes */
SZ_PUBLIC void sz_hashes_serial(sz_cptr_t text, sz_size_t length, sz_size_t window_length, sz_size_t window_step, //
sz_hash_callback_t callback, void *callback_handle);
typedef void (*sz_hashes_t)(sz_cptr_t, sz_size_t, sz_size_t, sz_size_t, sz_hash_callback_t, void *);
/**
* @brief Computes the Karp-Rabin rolling hashes of a string outputting a binary fingerprint.
* Such fingerprints can be compared with Hamming or Jaccard (Tanimoto) distance for similarity.
*
* The algorithm doesn't clear the fingerprint buffer on start, so it can be invoked multiple times
* to produce a fingerprint of a longer string, by passing the previous fingerprint as the ::fingerprint.
* It can also be reused to produce multi-resolution fingerprints by changing the ::window_length
* and calling the same function multiple times for the same input ::text.
*
* Processes large strings in parts to maximize the cache utilization, using a small on-stack buffer,
* avoiding cache-coherency penalties of remote on-heap buffers.
*
* @param text String to hash.
* @param length Number of bytes in the string.
* @param fingerprint Output fingerprint buffer.
* @param fingerprint_bytes Number of bytes in the fingerprint buffer.
* @param window_length Length of the rolling window in bytes.
* @see sz_hashes, sz_hashes_intersection
*/
SZ_PUBLIC void sz_hashes_fingerprint(sz_cptr_t text, sz_size_t length, sz_size_t window_length, //
sz_ptr_t fingerprint, sz_size_t fingerprint_bytes);
typedef void (*sz_hashes_fingerprint_t)(sz_cptr_t, sz_size_t, sz_size_t, sz_ptr_t, sz_size_t);
/**
* @brief Given a hash-fingerprint of a textual document, computes the number of intersecting hashes
* of the incoming document. Can be used for document scoring and search.
*
* Processes large strings in parts to maximize the cache utilization, using a small on-stack buffer,
* avoiding cache-coherency penalties of remote on-heap buffers.
*
* @param text Input document.
* @param length Number of bytes in the input document.
* @param fingerprint Reference document fingerprint.
* @param fingerprint_bytes Number of bytes in the reference documents fingerprint.
* @param window_length Length of the rolling window in bytes.
* @see sz_hashes, sz_hashes_fingerprint
*/
SZ_PUBLIC sz_size_t sz_hashes_intersection(sz_cptr_t text, sz_size_t length, sz_size_t window_length, //
sz_cptr_t fingerprint, sz_size_t fingerprint_bytes);
typedef sz_size_t (*sz_hashes_intersection_t)(sz_cptr_t, sz_size_t, sz_size_t, sz_cptr_t, sz_size_t);
#pragma endregion
#pragma region Convenience API
/**
* @brief Finds the first character in the haystack, that is present in the needle.
* Convenience function, reused across different language bindings.
* @see sz_find_charset
*/
SZ_DYNAMIC sz_cptr_t sz_find_char_from(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length);
/**
* @brief Finds the first character in the haystack, that is @b not present in the needle.
* Convenience function, reused across different language bindings.
* @see sz_find_charset
*/
SZ_DYNAMIC sz_cptr_t sz_find_char_not_from(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length);
/**
* @brief Finds the last character in the haystack, that is present in the needle.
* Convenience function, reused across different language bindings.
* @see sz_find_charset
*/
SZ_DYNAMIC sz_cptr_t sz_rfind_char_from(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length);
/**
* @brief Finds the last character in the haystack, that is @b not present in the needle.
* Convenience function, reused across different language bindings.
* @see sz_find_charset
*/
SZ_DYNAMIC sz_cptr_t sz_rfind_char_not_from(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length);
#pragma endregion
#pragma region String Sequences API
struct sz_sequence_t;
typedef sz_cptr_t (*sz_sequence_member_start_t)(struct sz_sequence_t const *, sz_size_t);
typedef sz_size_t (*sz_sequence_member_length_t)(struct sz_sequence_t const *, sz_size_t);
typedef sz_bool_t (*sz_sequence_predicate_t)(struct sz_sequence_t const *, sz_size_t);
typedef sz_bool_t (*sz_sequence_comparator_t)(struct sz_sequence_t const *, sz_size_t, sz_size_t);
typedef sz_bool_t (*sz_string_is_less_t)(sz_cptr_t, sz_size_t, sz_cptr_t, sz_size_t);
typedef struct sz_sequence_t {
sz_u64_t *order;
sz_size_t count;
sz_sequence_member_start_t get_start;
sz_sequence_member_length_t get_length;
void const *handle;
} sz_sequence_t;
/**
* @brief Initiates the sequence structure from a tape layout, used by Apache Arrow.
* Expects ::offsets to contains `count + 1` entries, the last pointing at the end
* of the last string, indicating the total length of the ::tape.
*/
SZ_PUBLIC void sz_sequence_from_u32tape(sz_cptr_t *start, sz_u32_t const *offsets, sz_size_t count,
sz_sequence_t *sequence);
/**
* @brief Initiates the sequence structure from a tape layout, used by Apache Arrow.
* Expects ::offsets to contains `count + 1` entries, the last pointing at the end
* of the last string, indicating the total length of the ::tape.
*/
SZ_PUBLIC void sz_sequence_from_u64tape(sz_cptr_t *start, sz_u64_t const *offsets, sz_size_t count,
sz_sequence_t *sequence);
/**
* @brief Similar to `std::partition`, given a predicate splits the sequence into two parts.
* The algorithm is unstable, meaning that elements may change relative order, as long
* as they are in the right partition. This is the simpler algorithm for partitioning.
*/
SZ_PUBLIC sz_size_t sz_partition(sz_sequence_t *sequence, sz_sequence_predicate_t predicate);
/**
* @brief Inplace `std::set_union` for two consecutive chunks forming the same continuous `sequence`.
*
* @param partition The number of elements in the first sub-sequence in `sequence`.
* @param less Comparison function, to determine the lexicographic ordering.
*/
SZ_PUBLIC void sz_merge(sz_sequence_t *sequence, sz_size_t partition, sz_sequence_comparator_t less);
/**
* @brief Sorting algorithm, combining Radix Sort for the first 32 bits of every word
* and a follow-up by a more conventional sorting procedure on equally prefixed parts.
*/
SZ_PUBLIC void sz_sort(sz_sequence_t *sequence);
/**
* @brief Partial sorting algorithm, combining Radix Sort for the first 32 bits of every word
* and a follow-up by a more conventional sorting procedure on equally prefixed parts.
*/
SZ_PUBLIC void sz_sort_partial(sz_sequence_t *sequence, sz_size_t n);
/**
* @brief Intro-Sort algorithm that supports custom comparators.
*/
SZ_PUBLIC void sz_sort_intro(sz_sequence_t *sequence, sz_sequence_comparator_t less);
#pragma endregion
/*
* Hardware feature detection.
* All of those can be controlled by the user.
*/
#ifndef SZ_USE_X86_AVX512
#ifdef __AVX512BW__
#define SZ_USE_X86_AVX512 1
#else
#define SZ_USE_X86_AVX512 0
#endif
#endif
#ifndef SZ_USE_X86_AVX2
#ifdef __AVX2__
#define SZ_USE_X86_AVX2 1
#else
#define SZ_USE_X86_AVX2 0
#endif
#endif
#ifndef SZ_USE_ARM_NEON
#ifdef __ARM_NEON
#define SZ_USE_ARM_NEON 1
#else
#define SZ_USE_ARM_NEON 0
#endif
#endif
#ifndef SZ_USE_ARM_SVE
#ifdef __ARM_FEATURE_SVE
#define SZ_USE_ARM_SVE 1
#else
#define SZ_USE_ARM_SVE 0
#endif
#endif
/*
* Include hardware-specific headers.
*/
#if SZ_USE_X86_AVX512 || SZ_USE_X86_AVX2
#include <immintrin.h>
#endif // SZ_USE_X86...
#if SZ_USE_ARM_NEON
#include <arm_acle.h>
#include <arm_neon.h>
#endif // SZ_USE_ARM_NEON
#if SZ_USE_ARM_SVE
#include <arm_sve.h>
#endif // SZ_USE_ARM_SVE
#pragma region Hardware-Specific API
#if SZ_USE_X86_AVX512
/** @copydoc sz_equal_serial */
SZ_PUBLIC sz_bool_t sz_equal_avx512(sz_cptr_t a, sz_cptr_t b, sz_size_t length);
/** @copydoc sz_order_serial */
SZ_PUBLIC sz_ordering_t sz_order_avx512(sz_cptr_t a, sz_size_t a_length, sz_cptr_t b, sz_size_t b_length);
/** @copydoc sz_copy_serial */
SZ_PUBLIC void sz_copy_avx512(sz_ptr_t target, sz_cptr_t source, sz_size_t length);
/** @copydoc sz_move_serial */
SZ_PUBLIC void sz_move_avx512(sz_ptr_t target, sz_cptr_t source, sz_size_t length);
/** @copydoc sz_fill_serial */
SZ_PUBLIC void sz_fill_avx512(sz_ptr_t target, sz_size_t length, sz_u8_t value);
/** @copydoc sz_find_byte */
SZ_PUBLIC sz_cptr_t sz_find_byte_avx512(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle);
/** @copydoc sz_rfind_byte */
SZ_PUBLIC sz_cptr_t sz_rfind_byte_avx512(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle);
/** @copydoc sz_find */
SZ_PUBLIC sz_cptr_t sz_find_avx512(sz_cptr_t haystack, sz_size_t h_length, sz_cptr_t needle, sz_size_t n_length);
/** @copydoc sz_rfind */