-
Notifications
You must be signed in to change notification settings - Fork 6
/
ffmalloc.c
3374 lines (2901 loc) · 117 KB
/
ffmalloc.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// FFMalloc - an experimental alternative memory allocator
// Unlike conventional allocators, maximizing space efficiency
// is not a design goal. Instead, FFMalloc makes exploiting
// use-after-free bugs in calling applications impossible
// because freed memory is never reused (only released back to
// the operating system when possible). FFMalloc depends on the
// extensive virtual address space available on 64-bit operating
// systems and is unsuitable for a 32-bit OS
#ifdef _WIN64
#include <SDKDDKVer.h>
#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
#include <Windows.h>
#endif
/*** Compilation control ***/
// If the target application is single threaded then defining
// FFSINGLE_THREADED will remove threading support which slightly
// reduces the library size but also simplifies debugging.
// On Windows, FFSINGLE_THREADED must be defined if FFmalloc is
// compiled as a static library. Thread safety is only supported when
// compiled as a DLL
#if defined(_WIN64) && !defined(_WINDLL)
#define FFSINGLE_THREADED
#endif
// To include statistics collection in the library, define
// FF_PROFILE. There is a small size and time cost to doing so
//#define FF_PROFILE
// On x86_64 allocation alignment is usually 16-byte. However, this is only
// required to support certain SSE instructions. If those are not used then
// alignment can be 8-byte and therefore more efficient. Pointers don't seem
// to ever require 16-byte allignment and so 8-byte alignment will always be
// used for allocations of 8 bytes or less. This is backed up in practice by
// TCMalloc. To enable 8-byte alignment, define FF_EIGHTBYTEALIGN during
// library compilation
//#define FF_EIGHTBYTEALIGN
/*** Headers ***/
#include "ffmalloc.h"
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#ifdef _WIN64
#ifdef FF_PROFILE
#include <Psapi.h>
#endif
#else
#include <unistd.h>
#include <sys/mman.h>
#include <limits.h>
#include <errno.h>
#ifndef FFSINGLE_THREADED
#include <sched.h>
#include <pthread.h>
#endif
#ifdef FF_PROFILE
#include <sys/time.h>
#include <sys/resource.h>
#endif
// Caution - defining this symbol allows cross compilation on older Linuxes
// however running on those kernels risks ffmalloc overwriting other
// memory allocations. ffmalloc is only intended for Linux kernel 4.17 or later
#ifndef MAP_FIXED_NOREPLACE
#define MAP_FIXED_NOREPLACE 0x100000
#endif
#endif
// MAP_POPULATE is Linux-specific.
#ifndef MAP_POPULATE
#define MAP_POPULATE 0
#endif
typedef unsigned char byte;
/*** Library Constants ***/
// GCC and Visual C++ disagree on how many bits "long" is. Define
// key constants to make sure bitwise operations aren't truncated
#define ONE64 UINT64_C(1)
#define TWO64 UINT64_C(2)
#define THREE64 UINT64_C(3)
#define FOUR64 UINT64_C(4)
#define SEVEN64 UINT64_C(7)
#define EIGHT64 UINT64_C(8)
#define FIFTEEN64 UINT64_C(15)
#define SIXTYTHREE64 UINT64_C(63)
// The maximum size of a single memory pool. Must be a power of
// two greater than or equal to either 1MB or the size of a page if
// large (2MB or 1GB) pages are used instead of 4KB
#define POOL_SIZE_BITS 22
#define POOL_SIZE (ONE64 << POOL_SIZE_BITS)
// The size of a single page of memory from the OS
#define PAGE_SIZE UINT64_C(4096)
// Half of an OS memory page
#define HALF_PAGE UINT64_C(2048)
// The number of pages to assign from a pool to a thread cache
// when a thread cache is out of free pages. Must be an integral
// divisor of (POOL_SIZE / PAGE_SIZE)
#define PAGES_PER_REFILL 128
// The minimum number of consecutive pages ready to return to the
// OS required before calling munmap/VirtualFree. Higher values
// help mitigate against VMA growth on Linux and reduce expensive
// system calls on either OS at the cost of holding onto unneeded
// pages longer than strictly necessary.
#define MIN_PAGES_TO_FREE 8
// The maximum number of arenas allowed to exist at the same time
#define MAX_ARENAS 256
// The maximum number of large allocation pool lists allowed per
// arena regardless of processor count
#define MAX_LARGE_LISTS 8
// The maximum number of large allocation pools per each arena per
// CPU list. In other words, each arena including the default will
// have at most MAX_LARGE_LISTS * MAX_POOLS_PER_LIST large pools in
// use at any one time
#define MAX_POOLS_PER_LIST 3
// The number of bits matched at the root level of
// the page pool radix tree. Current x86_64 hardware supports only
// 48-bits in a pointer. Assuming POOL_SIZE is kept at its default
// value of 4MB then 26 bits total need to be tracked.
// Depending on build and processor, Windows might only supports 44-bit
// pointers, but go ahead and pretend it will always use 48 too
#define ROOT_BITS 8
#define STEM_COUNT (ONE64 << ROOT_BITS)
// The number of bits matched at the intermediate level of the page pool
// radix tree
#define STEM_BITS 8
#define LEAVES_PER_STEM (ONE64 << STEM_BITS)
// The number of bits matched at the leaf level of the page pool radix tree
#define LEAF_BITS (48 - ROOT_BITS - STEM_BITS - POOL_SIZE_BITS)
#define POOLS_PER_LEAF (ONE64 << LEAF_BITS)
#ifdef FFSINGLE_THREADED
// For a single threaded process, making lots of small allocations
// from the page pool to the one-and-only thread cache is pointless
#undef PAGES_PER_REFILL
#define PAGES_PER_REFILL (POOL_SIZE / PAGE_SIZE)
// When single threaded, having multiple parallel large pool lists has
// no advantage and wastes resources
#undef MAX_LARGE_LISTS
#define MAX_LARGE_LISTS 1
#endif
/*** Compiler compatibility ***/
#ifdef _WIN64
#define __attrUnusedParam
#else
// Disable warning about unused size parameter in non-profiling mode
#define __attrUnusedParam __attribute__((unused))
#endif
/*** Alignment control constants and macros ***/
#ifdef FF_EIGHTBYTEALIGN
// Defines the minimum alignment
#define MIN_ALIGNMENT 8
// The number of small allocation bins in a thread cache when using 8-byte
// alignment
#define BIN_COUNT 45
// Used by init_tcache to indicate the inflection point between evenly spaced
// bins and bins spaced by maximum packing
#define BIN_INFLECTION 19
// Rounds requested allocation size up to the next multiple of 8
#define ALIGN_SIZE(SIZE) ((SIZE + SEVEN64) & ~SEVEN64)
// Select the bin to allocate from based on the size. Below 208 bytes, bins are
// every 8 bytes. Above 208, bins are unevenly spaced based on the maximal size
// that divides into PAGE_SIZE for a given number of slots then rounded down to
// the nearest multiple of 8
#define GET_BIN(SIZE) (SIZE <= 208 ? BIN_COUNT - (SIZE >> 3) : PAGE_SIZE / SIZE)
#else
// Defines the minimum alignment
#define MIN_ALIGNMENT 16
// The number of small allocation bins in a thread cache when using 16-byte
// alignment
#define BIN_COUNT 32
// Used by init_tcache to indicate the inflection point between evenly spaced
// bins and bins spaced by maximum packing
#define BIN_INFLECTION 13
// Rounds requested allocation size up to the next multiple of 16
#define ALIGN_SIZE(SIZE) (SIZE <= 8 ? 8 : ((SIZE + FIFTEEN64) & ~FIFTEEN64))
// Select the bin to allocate from based on the size. Allocations smaller than
// eight bytes always come from the eight byte bin. Otherwise, below 304 bytes,
// bins are every 16 bytes. Above 304, bins are unevenly spaced based on the
// maximal size that divides into PAGE_SIZE for a given number of slots then
// rounded down to the nearest multiple of 16
#define GET_BIN(SIZE) (SIZE <= 8 ? 0 : SIZE <= 304 ? BIN_COUNT - (SIZE >> 4) : PAGE_SIZE / SIZE)
#endif
#define ALIGN_TO(VALUE, ALIGNMENT) ((VALUE + (ALIGNMENT - 1)) & ~(ALIGNMENT - 1))
/*** OS Intrinsic Translation Macros ***/
#ifdef _WIN64
// Counts the number of 1s in an integer. Useful for checking if an integer is
// a power of two
#define FFPOPCOUNT64 _mm_popcnt_u64
// Counts the number of leading (most significant) zeros in a 64-bit integer.
// Used to round up sizes to a power of two
#define FFCOUNTLEADINGZEROS64 __lzcnt64
#else
#define FFPOPCOUNT64 __builtin_popcountl
#define FFCOUNTLEADINGZEROS64 __builtin_clzl
#endif
/*** OS Threading Translation Macros ***/
// These macros substitute in the correct OS specific synchronization
// functions. Or when the library is being compiled for single threading,
// the macros disable the synchronization calls entirely eliminating the
// need for repetative #ifdefs
#ifdef FFSINGLE_THREADED
// When single threaded on any OS, null out all synchronization calls
#define FFEnterCriticalSection(LOCK)
#define FFLeaveCriticalSection(LOCK)
#define FFTryEnterCriticalSection(LOCK) 1
#define FFInitializeCriticalSection(LOCK)
#define FFDeleteCriticalSection(LOCK)
// Atomic operations redefined as basic C statements
#define FFAtomicAnd(DEST, VALUE) DEST &= VALUE
#define FFAtomicOr(DEST, VALUE) DEST |= VALUE
#define FFAtomicAdd(DEST, VALUE) DEST += VALUE
#define FFAtomicSub(DEST, VALUE) DEST -= VALUE
#define FFAtomicIncrement(DEST) DEST++
#define FFAtomicExchangeAdvancePtr(DEST, VALUE) DEST; DEST += VALUE
#define FFAtomicCompareExchangePtr(DEST, NEW, OLD) (*DEST = *DEST == OLD ? NEW : *DEST) == NEW
// "Thread" local storage functions
#define FFTlsAlloc(INDEX, FUNC) get_free_arena_index(&INDEX)
#define FFTlsFree(INDEX) free_arena_index(INDEX)
#define TLS_CLEANUP_CALLBACK NULL
// Swallow the lock variable declaration
#define FFLOCK(NAME)
#define FFLOCKSTATIC(NAME)
// "Thread" local storage key type
#define FFTLSINDEX size_t
// No threads so no need for volatile variables
#define volatile
#elif defined(_WIN64)
// Synchronization functions on Windows
#define FFEnterCriticalSection(LOCK) EnterCriticalSection(LOCK)
#define FFLeaveCriticalSection(LOCK) LeaveCriticalSection(LOCK)
#define FFTryEnterCriticalSection(LOCK) TryEnterCriticalSection(LOCK)
#define FFInitializeCriticalSection(LOCK) InitializeCriticalSection(LOCK)
#define FFDeleteCriticalSection(LOCK) DeleteCriticalSection(LOCK)
// Atomic operations
#define FFAtomicAnd(DEST, VALUE) InterlockedAnd64(&DEST, VALUE)
#define FFAtomicOr(DEST, VALUE) InterlockedOr64(&DEST, VALUE)
#define FFAtomicAdd(DEST, VALUE) InterlockedAdd64(&DEST, VALUE)
#define FFAtomicSub(DEST, VALUE) InterlockedAdd64(&(LONG64)DEST, -(LONG64)(VALUE))
#define FFAtomicIncrement(DEST) InterlockedIncrement64(&DEST);
#define FFAtomicExchangeAdvancePtr(DEST, VALUE) InterlockedExchangeAdd64(&(uintptr_t)DEST, VALUE)
#define FFAtomicCompareExchangePtr(DEST, NEW, OLD) (InterlockedCompareExchangePointer(DEST, NEW, OLD) == OLD)
// Thread local storage functions
#define FFTlsAlloc(INDEX, FUNC) ((INDEX = TlsAlloc()) != TLS_OUT_OF_INDEXES)
#define FFTlsFree(INDEX) TlsFree(INDEX)
#define TLS_CLEANUP_CALLBACK NULL
// Synchronization types
#define FFLOCK(NAME) CRITICAL_SECTION NAME;
#define FFLOCKSTATIC(NAME) static CRITICAL_SECTION NAME;
// Thread local storage key type
#define FFTLSINDEX DWORD
#else
// Synchronization functions on Linux
#define FFEnterCriticalSection(LOCK) pthread_mutex_lock(LOCK)
#define FFLeaveCriticalSection(LOCK) pthread_mutex_unlock(LOCK)
#define FFTryEnterCriticalSection(LOCK) (pthread_mutex_trylock(LOCK) == 0)
#define FFInitializeCriticalSection(LOCK) pthread_mutex_init(LOCK, NULL)
#define FFDeleteCriticalSection(LOCK) pthread_mutex_destroy(LOCK)
// Atomic Operations
#define FFAtomicAnd(DEST, VALUE) __sync_and_and_fetch(&DEST, VALUE)
#define FFAtomicOr(DEST, VALUE) __sync_or_and_fetch(&DEST, VALUE)
#define FFAtomicAdd(DEST, VALUE) __sync_add_and_fetch(&DEST, VALUE)
#define FFAtomicSub(DEST, VALUE) __sync_sub_and_fetch(&DEST, VALUE)
#define FFAtomicIncrement(DEST) __sync_add_and_fetch(&DEST, 1)
#define FFAtomicExchangeAdvancePtr(DEST, VALUE) __sync_fetch_and_add(&(DEST), (byte*)VALUE)
#define FFAtomicCompareExchangePtr(DEST, NEW, OLD) __sync_bool_compare_and_swap(DEST, OLD, NEW)
// Thread local storage functions
#define FFTlsAlloc(INDEX, FUNC) (pthread_key_create(&INDEX, FUNC) == 0)
#define FFTlsFree(INDEX) pthread_key_delete(INDEX)
#define TLS_CLEANUP_CALLBACK cleanup_thread
// Synchronization types
#define FFLOCK(NAME) pthread_mutex_t NAME;
#define FFLOCKSTATIC(NAME) static pthread_mutex_t NAME;
// Thread local storage key type
#define FFTLSINDEX pthread_key_t
#endif
/*** Metadata Structures ***/
// When a page allocates objects smaller than 64 bytes, interpret the
// bitmap field in the page map as a pointer to an array of bitmaps.
// Otherwise, the field is the bitmap
union bitmap_t {
uint64_t single;
uint64_t* array;
};
// A page map holds the metadata about a page that has been
// allocated from a small allocation page pool
struct pagemap_t {
// The starting address of the page. Guaranteed to be page aligned
byte* start;
// The size of allocations on this page; always a multiple of 8
size_t allocSize;
// Individual allocations on the page are tracked by setting
// or clearing the corresponding bit in the bitmap
union bitmap_t bitmap;
};
// Interprets the metadata allocation for a pool as either and array
// of page maps (small allocation pools) or an array of pointers to
// the allocations (large allocation pools)
union tracking_t {
struct pagemap_t* pageMaps;
uintptr_t* allocations;
};
// A page pool is an initially contiguous region of memory
// from which individual pages are assigned to the thread
// cache's bins. "Holes" in the pool will develop over time
// when enough allocations have been freed that entire pages
// can be returned to the OS
struct pagepool_t {
// The starting address of the pool. This value is constant
// and is not incremented even if the initial page is freed
byte* start;
// The final address (exclusive) of the pool
byte* end;
// The starting address of the next unallocated page in the pool
byte* nextFreePage;
// Pool metadata - either an array of page maps or of allocation pointers
union tracking_t tracking;
// The index of the next pointer in a large pool to be allocated.
// The pointer in this slot is not yet allocated, but it
// is needed so that the size of the last allocated
// pointer can still be computed
size_t nextFreeIndex;
// The address of the first page not yet freed
byte* startInUse;
// The address of the free page block that is continguous to the end of the pool
byte* endInUse;
// The arena this pool is a part of
struct arena_t* arena;
// Critical section used to lock certain updates on the pool
FFLOCK(poolLock)
};
// All small (less than half a page) allocations are assigned to a
// size bin based on maximum packing of similar sizes. All allocations
// on a single page are in the same bin.
struct bin_t {
// Pointer to the next free slot for allocation
byte* nextAlloc;
// The size of allocations in this bin. Always a multiple of 8
size_t allocSize;
// The number of allocations made so far in this bin. It is
// reset to 0 when the page is filled and a new page is
// assigned to the bin
size_t allocCount;
// The maximum number of allocations that can be made on one
// page in this bin
size_t maxAlloc;
// Points to the page map object with the tracking bitmap
struct pagemap_t* page;
#ifdef FF_PROFILE
// The cummulative number of allocations made from this bin
// across all pages
size_t totalAllocCount;
#endif
};
// Each thread is given its own cache of pages to allocate from
struct threadcache_t {
// The array of small allocation bins for this thread
struct bin_t bins[BIN_COUNT];
// To reduce round trips to the page pool, a small number of
// blank pages are assigned to the thread cache to add to a
// bin when it gets full. This points to the next available
// free page
struct pagemap_t* nextUnusedPage;
// The end address (exclusive) of the range of free pages
// available to the cache
struct pagemap_t* endUnusedPage;
// The arena this thread cache allocates from and the source of
// its free pages
struct arena_t* arena;
};
// A leaf node in a radix tree that points to a page pool
struct radixleaf_t {
// Radix leaf node has two arrays, one for start and one for end.
// The reason is that we can't assume that each pool allocation
// will be POOL_SIZE aligned (and in fact for ASLR purposes it's
// better that they aren't). Therefore, looking only at the high
// order bits of a pointer, we can't tell if its from a pool that
// starts in the middle of the prefix or ends there
// Pointers to pools that start on the matching prefix
struct pagepool_t* poolStart[POOLS_PER_LEAF];
// Pointers to pools that end on the matching prefix
struct pagepool_t* poolEnd[POOLS_PER_LEAF];
};
// Intermediate node in a radix tree
struct radixstem_t {
struct radixleaf_t* leaves[LEAVES_PER_STEM];
};
// Root node of the page pool radix tree
struct radixroot_t {
struct radixstem_t* stems[STEM_COUNT];
};
// Node in a list of allocation pools
struct poollistnode_t {
// Pointer to the next node in the list
struct poollistnode_t* next;
// Pointer to an allocation page pool
struct pagepool_t* pool;
};
// An arena is a collection of large and small pools that allocations can be
// specifically drawn from using the ffmalloc extended API. Arenas allow the
// calling application to free all allocations from that arena with one call
// which benefits performance through fewer system calls to VirtualFree or
// munmap and simplifies memory management since each allocation doesn't have
// to be individually freed. Allocations from the standard malloc API come
// from a default arena, but that arena is persistent and allocations need to
// be individually freed.
struct arena_t {
// List of small pools created in this arena. The head of the list is
// the pool currently being allocated from
struct poollistnode_t* volatile smallPoolList;
// Array of lists of large pools created in this arena. Typically one
// list per CPU in the system. The head of the list is usually where
// allocations come from but pools further down will be searched for
// available space if the first node is locked by another thread
struct poollistnode_t* volatile largePoolList[MAX_LARGE_LISTS];
// List of jumbo allocation pools create in this arena
struct poollistnode_t* volatile jumboPoolList;
// Index to get the correct thread local storage value for this arena
// which holds the pointer to the thread cache for invoking thread
FFTLSINDEX tlsIndex;
// Lock that protects modifying the small pool list header
FFLOCK(smallListLock)
// Locks that protect each large list
FFLOCK(largeListLock[MAX_LARGE_LISTS])
#ifdef FF_PROFILE
// Structure to hold arena usage statistics
ffprofile_t profile;
#endif
};
// Reinterprets freed metadata allocations as a pointer to the next
// available free block
struct usedmd_t {
byte* next;
};
/*** Library Globals ***/
static int isInit = 0;
// Number of pools currently allocated
// TODO: move this into future global profiling structure
static size_t poolCount = 0;
// The highest pool end address seen yet. The next pool will attempt
// to start at this address
//static byte* volatile poolHighWater;
static byte* poolHighWater;
// Root node of radix tree containing all pools
static struct radixroot_t poolTree;
// Array of arenas. The default arena used by the standard malloc API is
// at index 0
static struct arena_t* volatile arenas[MAX_ARENAS];
// The start of the global metadata allocation pool
static byte* metadataPool;
// The top of the metadata pool - i.e. the next unallocated block
static byte* metadataFree;
// The end of the currently available metadata address space
static byte* metadataEnd;
// Bin headers for the metadata pool
static byte* bins[256];
static byte* metadatabins[2];
// Lock that protects modifications to the pool radix tree
FFLOCKSTATIC(poolTreeLock)
// Locks that protect access to the metadata allocation bins
FFLOCKSTATIC(binLocks[256])
FFLOCKSTATIC(mdBinLocks[2])
// Lock that protects access to the metadata allocation pool
FFLOCKSTATIC(mdPoolLock)
FFLOCKSTATIC(poolAllocLock);
#ifdef FF_PROFILE
// The interval (in calls to malloc) to print usage statistics
unsigned int usagePrintInterval;
// The file that interval usage statistics will be sent to
FILE * usagePrintFile;
#endif // FF_PROFILE
/*** Forward declarations ***/
static int create_pagepool(struct pagepool_t* newPool);
static int create_largepagepool(struct pagepool_t* newPool);
static void destroy_pool_list(struct poollistnode_t* node);
static struct pagepool_t* find_pool_for_ptr(const byte* ptr);
static void init_tcache(struct threadcache_t* tcache, struct arena_t* arena);
static void initialize();
static void init_threading();
static void* ffmetadata_alloc(size_t size);
static void ffmetadata_free(void* ptr, size_t size);
static void free_large_pointer(struct pagepool_t* pool, size_t index, size_t size);
void ffprint_stats_wrapper(void);
#ifdef FF_PROFILE
static void print_current_usage();
#endif
#if !defined(FFSINGLE_THREADED) && !defined(_WIN64)
static void cleanup_thread(void* ptr);
#endif
/*** OS compatibility functions ***/
static size_t os_alloc_total = 0;
static size_t os_alloc_count = 0;
static size_t os_free_count = 0;
#ifdef _WIN64
#define MAP_FAILED NULL
static inline LPVOID os_alloc(LPVOID startAddress, size_t size) {
void* alloc = VirtualAlloc(startAddress, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
os_alloc_total += size;
os_alloc_count++;
return alloc;
}
// Obtains memory from the OS whose starting virtual address is no lower than
// the previous highest address
static inline LPVOID os_alloc_highwater(size_t size) {
MEM_ADDRESS_REQUIREMENTS addressReqs = { 0 };
MEM_EXTENDED_PARAMETER param = { 0 };
addressReqs.LowestStartingAddress = poolHighWater;
param.Type = MemExtendedParameterAddressRequirements;
param.Pointer = &addressReqs;
LPVOID result = VirtualAlloc2(NULL, NULL, size, MEM_RESERVE | MEM_COMMIT,
PAGE_READWRITE, ¶m, 1);
if (result != NULL) {
// Incrementing by extra 64K leaves guard pages in between pools for a weak
// buffer overflow protection
byte* newHigh = (byte*)result + size + 65536ULL;
byte* oldHigh = poolHighWater;
byte* update;
os_alloc_total += size;
os_alloc_count++;
while ((update = InterlockedCompareExchangePointer(&poolHighWater, newHigh, oldHigh)) < newHigh) {
oldHigh = update;
}
}
return result;
}
// Returns memory to the OS but holds onto the memory addresses
static inline BOOL os_decommit(LPVOID startAddress, size_t size) {
return VirtualFree(startAddress, size, MEM_DECOMMIT);
}
// Returns memory to the OS and releases the virutal address space
static inline BOOL os_free(LPVOID startAddress) {
os_free_count++;
return VirtualFree(startAddress, 0, MEM_RELEASE);
}
#else
/*static inline void* os_alloc(void* startAddress, size_t size) {
int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE;
if(startAddress != NULL) {
flags |= MAP_FIXED_NOREPLACE | MAP_POPULATE;
}
return mmap(startAddress, size, PROT_READ | PROT_WRITE, flags, -1, 0);
}*/
static inline void* os_alloc_highwater(size_t size) {
int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE;
void* result = NULL;
void* localHigh = FFAtomicExchangeAdvancePtr(poolHighWater, size);
// If we need more space from the OS, its likely to get used immediately
// after, so go ahead and pre-fault the pages for faster access. Except
// don't do that for jumbo allocations since that could cause swapping if
// the allocation is sufficiently large and the system is under pressure
if(size == POOL_SIZE) {
flags |= MAP_POPULATE;
}
while(result == NULL) {
// TODO: Add wrap around if we hit the top of address space
result = mmap(localHigh, size, PROT_READ | PROT_WRITE,
flags, -1, 0);
if(result == MAP_FAILED) {
// If the failure was because the requested address already has
// a mapping associated then jump up by POOL_SIZE (since a new
// pool created on another thread is the most likely reason)
// and try again
if(errno == EEXIST) {
localHigh = FFAtomicExchangeAdvancePtr(poolHighWater, POOL_SIZE);
result = NULL;
}
else {
fprintf(stderr, "[ffmalloc] Warning: os_alloc_highwater failed\n");
return MAP_FAILED;
}
}
}
return result;
}
#define FALSE -1
static inline int os_decommit(void* startAddress, size_t size) {
// Surprisingly, benchmarking seems to suggest that unmapping is actually
// faster than madvise. Revisit in the future
return munmap(startAddress, size);
//return madvise(startAddress, size, MADV_FREE);
}
static inline int os_free(void* startAddress) {
// On Windows, this helper can only completely decommit and unreserve
// an entire reservation, so no size parameter. Here, we'll look for
// the pool getting the axe and figure out the size
struct pagepool_t* pool = find_pool_for_ptr((const byte*)startAddress);
if (pool != NULL) {
return munmap(pool->start, pool->end - pool->start);
}
else {
// Wasn't a pool - that shouldn't happen
// Likely a bug if we get here
abort();
}
errno = EINVAL;
return -1;
}
#endif
/*** Dynamic metadata allocation ***/
// FFmalloc has several metadata structures that need to be dynamically allocated
// If we restricted usage to Windows or requiring use of the ff prefix on Linux
// then LocalAlloc or libc malloc respectively could be used. But, we'd like to
// allow non-prefixed usage via LD_PRELOAD on Linux (or static compilation so long
// as we're the first library). Therefore, we have a mini-allocator for metadata
// allocations. Because this should only be used internally, it does *not*
// implement the forward-only principal. Also note that the "free" equivalent
// requires a size parameter. This simplifies the amount of metadata for the
// metadata stored
static void* ffpoolmetadata_alloc(int isSmallPool) {
byte* allocation;
size_t size = isSmallPool ? (POOL_SIZE / PAGE_SIZE) * sizeof(struct pagemap_t) :
(POOL_SIZE >> 20) * PAGE_SIZE;
size = ALIGN_SIZE(size);
FFEnterCriticalSection(&mdBinLocks[isSmallPool]);
if(metadatabins[isSmallPool] == NULL) {
FFEnterCriticalSection(&mdPoolLock);
allocation = metadataFree;
if(allocation + size > metadataEnd) {
// Need to grow metadata pool space
#ifdef _WIN64
VirtualAlloc(metadataEnd, POOL_SIZE, MEM_COMMIT, PAGE_READWRITE);
#else
mprotect(metadataEnd, POOL_SIZE, PROT_READ | PROT_WRITE);
madvise(metadataEnd, PAGE_SIZE * 16, MADV_WILLNEED);
#endif
metadataEnd += POOL_SIZE;
#ifdef FF_PROFILE
FFAtomicAdd(arenas[0]->profile.currentOSBytesMapped, POOL_SIZE);
if(arenas[0]->profile.currentOSBytesMapped > arenas[0]->profile.maxOSBytesMapped) {
arenas[0]->profile.maxOSBytesMapped = arenas[0]->profile.currentOSBytesMapped;
}
#endif
}
metadataFree += size;
FFLeaveCriticalSection(&mdPoolLock);
}
else {
allocation = metadatabins[isSmallPool];
metadatabins[isSmallPool] = ((struct usedmd_t*)allocation)->next;
}
FFLeaveCriticalSection(&mdBinLocks[isSmallPool]);
memset(allocation, 0, size);
return allocation;
}
static void ffpoolmetadata_free(void* ptr, int isSmallPool) {
size_t size = isSmallPool ? (POOL_SIZE / PAGE_SIZE) * sizeof(struct pagemap_t) :
(POOL_SIZE >> 20) * PAGE_SIZE;
size = ALIGN_SIZE(size);
if (ptr > (void*)metadataFree || ptr < (void*)metadataPool) {
abort();
}
FFEnterCriticalSection(&mdBinLocks[isSmallPool]);
// Put the freed block at the front of the list and have it point to
// the former head of the line
((struct usedmd_t*)ptr)->next = metadatabins[isSmallPool];
metadatabins[isSmallPool] = (byte*)ptr;
FFLeaveCriticalSection(&mdBinLocks[isSmallPool]);
}
static void* ffmetadata_alloc(size_t size) {
// Ensure 16 byte alignment
size = ALIGN_TO(size, UINT64_C(16));
// Making the assumption that the radix leaf nodes are the only metadata
// structures that are bigger than a page. If that changes then we'll be
// in a bit of a bind here. But for now, go with it
size_t binID = size >=4096 ? 255 : (size >> 4) - 1;
byte* allocation;
FFEnterCriticalSection(&binLocks[binID]);
if (bins[binID] == NULL) {
// No freed chunks of this size exist. Allocate space from the top
// of the pool. Keeping things simple for now and not trying to
// break a free 64-byte chunk into 4*16-byte chunks or whatever
FFEnterCriticalSection(&mdPoolLock);
allocation = metadataFree;
if(allocation + size > metadataEnd) {
// Need to grow metadata pool space
#ifdef _WIN64
VirtualAlloc(metadataEnd, POOL_SIZE, MEM_COMMIT, PAGE_READWRITE);
#else
mprotect(metadataEnd, POOL_SIZE, PROT_READ | PROT_WRITE);
madvise(metadataEnd, PAGE_SIZE * 4, MADV_WILLNEED);
#endif
metadataEnd += POOL_SIZE;
#ifdef FF_PROFILE
FFAtomicAdd(arenas[0]->profile.currentOSBytesMapped, POOL_SIZE);
if(arenas[0]->profile.currentOSBytesMapped > arenas[0]->profile.maxOSBytesMapped) {
arenas[0]->profile.maxOSBytesMapped = arenas[0]->profile.currentOSBytesMapped;
}
#endif
}
metadataFree += size;
FFLeaveCriticalSection(&mdPoolLock);
}
else {
// Take the first available chunk from the front of the list
// and the advance the header to the next chunk
allocation = bins[binID];
bins[binID] = ((struct usedmd_t*)allocation)->next;
}
FFLeaveCriticalSection(&binLocks[binID]);
// Simplify logic elsewhere by guaranteeing zeroed blocks
memset(allocation, 0, size);
return allocation;
}
static void ffmetadata_free(void* ptr, size_t size) {
// Ensure 16 byte alignment to find right bin
size = ALIGN_TO(size, UINT64_C(16));
size_t binID = size >= 4096 ? 255 : (size >> 4) - 1;
if (ptr > (void*)metadataFree || ptr < (void*)metadataPool) {
abort();
}
FFEnterCriticalSection(&binLocks[binID]);
// Put the freed block at the front of the list and have it point to
// the former head of the line
((struct usedmd_t*)ptr)->next = bins[binID];
bins[binID] = (byte*)ptr;
FFLeaveCriticalSection(&binLocks[binID]);
}
/*** Radix tree implementation ***/
// Gets the page pool that matches the page prefix. Returns NULL if no matching
// pool could be found
struct pagepool_t* find_pool_for_ptr(const byte* ptr) {
// Compute the index for each level
size_t stemIndex = (uintptr_t)ptr >> (POOL_SIZE_BITS + LEAF_BITS + STEM_BITS);
size_t leafIndex = ((uintptr_t)ptr >> (POOL_SIZE_BITS + LEAF_BITS)) & (LEAVES_PER_STEM - 1);
// Find the correct leaf node
struct radixleaf_t* leaf = poolTree.stems[stemIndex] != NULL ? poolTree.stems[stemIndex]->leaves[leafIndex] : NULL;
if (leaf != NULL) {
// Check if there is a pool that starts or ends in this leaf
// that could possibly contain the given pointer
struct pagepool_t* pool = leaf->poolStart[((uintptr_t)ptr >> POOL_SIZE_BITS) & (POOLS_PER_LEAF - 1)];
if (pool != NULL && ptr >= pool->start) {
return pool;
}
pool = leaf->poolEnd[((uintptr_t)ptr >> POOL_SIZE_BITS) & (POOLS_PER_LEAF - 1)];
if (pool != NULL && ptr < pool->end) {
return pool;
}
}
return NULL;
}
// Inserts a newly created page pool into the radix tree
void add_pool_to_tree(struct pagepool_t* pool) {
// Compute the level indexes for both the start and end addresses
size_t startStemIndex = (uintptr_t)pool->start >> (POOL_SIZE_BITS + LEAF_BITS + STEM_BITS);
size_t startLeafIndex = ((uintptr_t)pool->start >> (POOL_SIZE_BITS + LEAF_BITS)) & (LEAVES_PER_STEM - 1);
size_t startPoolIndex = ((uintptr_t)pool->start >> POOL_SIZE_BITS) & (POOLS_PER_LEAF - 1);
size_t endStemIndex = (uintptr_t)pool->end >> (POOL_SIZE_BITS + LEAF_BITS + STEM_BITS);
size_t endLeafIndex = ((uintptr_t)pool->end >> (POOL_SIZE_BITS + LEAF_BITS)) & (LEAVES_PER_STEM - 1);
size_t endPoolIndex = ((uintptr_t)pool->end >> POOL_SIZE_BITS) & (POOLS_PER_LEAF - 1);
// Pool creation should be infrequent enough that trying to come up
// with a fancy lock-free update structure probably isn't worth it
FFEnterCriticalSection(&poolTreeLock);
// Make sure that the nodes in the tree exist
if(poolTree.stems[startStemIndex] == NULL) {
poolTree.stems[startStemIndex] = (struct radixstem_t*)ffmetadata_alloc(sizeof(struct radixstem_t));
}
if(poolTree.stems[startStemIndex]->leaves[startLeafIndex] == NULL) {
poolTree.stems[startStemIndex]->leaves[startLeafIndex] = (struct radixleaf_t*)ffmetadata_alloc(sizeof(struct radixleaf_t));
}
if(poolTree.stems[endStemIndex] == NULL) {
poolTree.stems[endStemIndex] = (struct radixstem_t*)ffmetadata_alloc(sizeof(struct radixstem_t));
}
if(poolTree.stems[endStemIndex]->leaves[endLeafIndex] == NULL) {
poolTree.stems[endStemIndex]->leaves[endLeafIndex] = (struct radixleaf_t*)ffmetadata_alloc(sizeof(struct radixleaf_t));
}
// Add the pool to the tree
poolTree.stems[startStemIndex]->leaves[startLeafIndex]->poolStart[startPoolIndex] = pool;
poolTree.stems[endStemIndex]->leaves[endLeafIndex]->poolEnd[endPoolIndex] = pool;
poolCount++;
FFLeaveCriticalSection(&poolTreeLock);
}
// Removes a page pool from the lookup tree
void remove_pool_from_tree(struct pagepool_t* pool) {
// Compute the level indexes for the start and end of the pool
size_t startStemIndex = (uintptr_t)pool->start >> (POOL_SIZE_BITS + LEAF_BITS + STEM_BITS);
size_t startLeafIndex = ((uintptr_t)pool->start >> (POOL_SIZE_BITS + LEAF_BITS)) & (LEAVES_PER_STEM - 1);
size_t startPoolIndex = ((uintptr_t)pool->start >> POOL_SIZE_BITS) & (POOLS_PER_LEAF - 1);
size_t endStemIndex = (uintptr_t)pool->end >> (POOL_SIZE_BITS + LEAF_BITS + STEM_BITS);
size_t endLeafIndex = ((uintptr_t)pool->end >> (POOL_SIZE_BITS + LEAF_BITS)) & (LEAVES_PER_STEM - 1);
size_t endPoolIndex = ((uintptr_t)pool->end >> POOL_SIZE_BITS) & (POOLS_PER_LEAF - 1);
// Not checking path validity. Caller is responsible for calling only
// if the pool has definitively been added the tree already
FFEnterCriticalSection(&poolTreeLock);
poolTree.stems[startStemIndex]->leaves[startLeafIndex]->poolStart[startPoolIndex] = NULL;
poolTree.stems[endStemIndex]->leaves[endLeafIndex]->poolEnd[endPoolIndex] = NULL;
poolCount--;
FFLeaveCriticalSection(&poolTreeLock);
}
/*** Multi-threaded application support ***/
#ifdef FFSINGLE_THREADED
// Support single threaded applications only
// Array of thread caches, one per arena. Thread caches are still needed even
// when compiled without threading support because they serve as the interface
// for small allocations.
struct threadcache_t* arenaCaches[MAX_ARENAS];
// Single threaded implementation of FFTlsAlloc
static int get_free_arena_index(FFTLSINDEX* index) {
for(FFTLSINDEX i=0; i<MAX_ARENAS; i++) {
if(arenaCaches[i] == NULL) {
arenaCaches[i] = ffmetadata_alloc(sizeof(struct threadcache_t));
*index = i;
return 1;
}
}
return 0;
}
// Single threaded implementation of FFTlsFree
static int free_arena_index(FFTLSINDEX index) {
ffmetadata_free(arenaCaches[index], sizeof(struct threadcache_t));
arenaCaches[index] = NULL;
return 0;
}
// One time initialization of the default arena thread cache
static void init_threading() {
arenaCaches[0] = ffmetadata_alloc(sizeof(struct threadcache_t));
init_tcache(arenaCaches[0], arenas[0]);
}
// Returns the thread cache for the associated arena
static inline struct threadcache_t* get_threadcache(struct arena_t* arena) {
if(arenaCaches[arena->tlsIndex]->arena == NULL) {
init_tcache(arenaCaches[arena->tlsIndex], arena);
}
return arenaCaches[arena->tlsIndex];
}