LLVM 22.0.0git
rpmalloc.c
Go to the documentation of this file.
1//===---------------------- rpmalloc.c ------------------*- C -*-=============//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This library provides a cross-platform lock free thread caching malloc
10// implementation in C11.
11//
12//===----------------------------------------------------------------------===//
13
14#include "rpmalloc.h"
15
16////////////
17///
18/// Build time configurable limits
19///
20//////
21
22#if defined(__clang__)
23#pragma clang diagnostic ignored "-Wunused-macros"
24#pragma clang diagnostic ignored "-Wunused-function"
25#if __has_warning("-Wreserved-identifier")
26#pragma clang diagnostic ignored "-Wreserved-identifier"
27#endif
28#if __has_warning("-Wstatic-in-inline")
29#pragma clang diagnostic ignored "-Wstatic-in-inline"
30#endif
31#elif defined(__GNUC__)
32#pragma GCC diagnostic ignored "-Wunused-macros"
33#pragma GCC diagnostic ignored "-Wunused-function"
34#endif
35
36#if !defined(__has_builtin)
37#define __has_builtin(b) 0
38#endif
39
40#if defined(__GNUC__) || defined(__clang__)
41
42#if __has_builtin(__builtin_memcpy_inline)
43#define _rpmalloc_memcpy_const(x, y, s) __builtin_memcpy_inline(x, y, s)
44#else
45#define _rpmalloc_memcpy_const(x, y, s) \
46 do { \
47 _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0), \
48 "len must be a constant integer"); \
49 memcpy(x, y, s); \
50 } while (0)
51#endif
52
53#if __has_builtin(__builtin_memset_inline)
54#define _rpmalloc_memset_const(x, y, s) __builtin_memset_inline(x, y, s)
55#else
56#define _rpmalloc_memset_const(x, y, s) \
57 do { \
58 _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0), \
59 "len must be a constant integer"); \
60 memset(x, y, s); \
61 } while (0)
62#endif
63#else
64#define _rpmalloc_memcpy_const(x, y, s) memcpy(x, y, s)
65#define _rpmalloc_memset_const(x, y, s) memset(x, y, s)
66#endif
67
68#if __has_builtin(__builtin_assume)
69#define rpmalloc_assume(cond) __builtin_assume(cond)
70#elif defined(__GNUC__)
71#define rpmalloc_assume(cond) \
72 do { \
73 if (!__builtin_expect(cond, 0)) \
74 __builtin_unreachable(); \
75 } while (0)
76#elif defined(_MSC_VER)
77#define rpmalloc_assume(cond) __assume(cond)
78#else
79#define rpmalloc_assume(cond) 0
80#endif
81
82#ifndef HEAP_ARRAY_SIZE
83//! Size of heap hashmap
84#define HEAP_ARRAY_SIZE 47
85#endif
86#ifndef ENABLE_THREAD_CACHE
87//! Enable per-thread cache
88#define ENABLE_THREAD_CACHE 1
89#endif
90#ifndef ENABLE_GLOBAL_CACHE
91//! Enable global cache shared between all threads, requires thread cache
92#define ENABLE_GLOBAL_CACHE 1
93#endif
94#ifndef ENABLE_VALIDATE_ARGS
95//! Enable validation of args to public entry points
96#define ENABLE_VALIDATE_ARGS 0
97#endif
98#ifndef ENABLE_STATISTICS
99//! Enable statistics collection
100#define ENABLE_STATISTICS 0
101#endif
102#ifndef ENABLE_ASSERTS
103//! Enable asserts
104#define ENABLE_ASSERTS 0
105#endif
106#ifndef ENABLE_OVERRIDE
107//! Override standard library malloc/free and new/delete entry points
108#define ENABLE_OVERRIDE 0
109#endif
110#ifndef ENABLE_PRELOAD
111//! Support preloading
112#define ENABLE_PRELOAD 0
113#endif
114#ifndef DISABLE_UNMAP
115//! Disable unmapping memory pages (also enables unlimited cache)
116#define DISABLE_UNMAP 0
117#endif
118#ifndef ENABLE_UNLIMITED_CACHE
119//! Enable unlimited global cache (no unmapping until finalization)
120#define ENABLE_UNLIMITED_CACHE 0
121#endif
122#ifndef ENABLE_ADAPTIVE_THREAD_CACHE
123//! Enable adaptive thread cache size based on use heuristics
124#define ENABLE_ADAPTIVE_THREAD_CACHE 0
125#endif
126#ifndef DEFAULT_SPAN_MAP_COUNT
127//! Default number of spans to map in call to map more virtual memory (default
128//! values yield 4MiB here)
129#define DEFAULT_SPAN_MAP_COUNT 64
130#endif
131#ifndef GLOBAL_CACHE_MULTIPLIER
132//! Multiplier for global cache
133#define GLOBAL_CACHE_MULTIPLIER 8
134#endif
135
136#if DISABLE_UNMAP && !ENABLE_GLOBAL_CACHE
137#error Must use global cache if unmap is disabled
138#endif
139
140#if DISABLE_UNMAP
141#undef ENABLE_UNLIMITED_CACHE
142#define ENABLE_UNLIMITED_CACHE 1
143#endif
144
145#if !ENABLE_GLOBAL_CACHE
146#undef ENABLE_UNLIMITED_CACHE
147#define ENABLE_UNLIMITED_CACHE 0
148#endif
149
150#if !ENABLE_THREAD_CACHE
151#undef ENABLE_ADAPTIVE_THREAD_CACHE
152#define ENABLE_ADAPTIVE_THREAD_CACHE 0
153#endif
154
155#if defined(_WIN32) || defined(__WIN32__) || defined(_WIN64)
156#define PLATFORM_WINDOWS 1
157#define PLATFORM_POSIX 0
158#else
159#define PLATFORM_WINDOWS 0
160#define PLATFORM_POSIX 1
161#endif
162
163/// Platform and arch specifics
164#if defined(_MSC_VER) && !defined(__clang__)
165#pragma warning(disable : 5105)
166#ifndef FORCEINLINE
167#define FORCEINLINE inline __forceinline
168#endif
169#define _Static_assert static_assert
170#else
171#ifndef FORCEINLINE
172#define FORCEINLINE inline __attribute__((__always_inline__))
173#endif
174#endif
175#if PLATFORM_WINDOWS
176#ifndef WIN32_LEAN_AND_MEAN
177#define WIN32_LEAN_AND_MEAN
178#endif
179#include <windows.h>
180#if ENABLE_VALIDATE_ARGS
181#include <intsafe.h>
182#endif
183#else
184#include <stdio.h>
185#include <stdlib.h>
186#include <time.h>
187#include <unistd.h>
188#if defined(__linux__) || defined(__ANDROID__)
189#include <sys/prctl.h>
190#if !defined(PR_SET_VMA)
191#define PR_SET_VMA 0x53564d41
192#define PR_SET_VMA_ANON_NAME 0
193#endif
194#endif
195#if defined(__APPLE__)
196#include <TargetConditionals.h>
197#if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
198#include <mach/mach_vm.h>
199#include <mach/vm_statistics.h>
200#endif
201#include <pthread.h>
202#endif
203#if defined(__HAIKU__) || defined(__TINYC__)
204#include <pthread.h>
205#endif
206#endif
207
208#include <errno.h>
209#include <stdint.h>
210#include <string.h>
211
212#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
213#include <fibersapi.h>
214static DWORD fls_key;
215#endif
216
217#if PLATFORM_POSIX
218#include <sched.h>
219#include <sys/mman.h>
220#ifdef __FreeBSD__
221#include <sys/sysctl.h>
222#define MAP_HUGETLB MAP_ALIGNED_SUPER
223#ifndef PROT_MAX
224#define PROT_MAX(f) 0
225#endif
226#else
227#define PROT_MAX(f) 0
228#endif
229#ifdef __sun
230extern int madvise(caddr_t, size_t, int);
231#endif
232#ifndef MAP_UNINITIALIZED
233#define MAP_UNINITIALIZED 0
234#endif
235#endif
236#include <errno.h>
237
238#if ENABLE_ASSERTS
239#undef NDEBUG
240#if defined(_MSC_VER) && !defined(_DEBUG)
241#define _DEBUG
242#endif
243#include <assert.h>
244#define RPMALLOC_TOSTRING_M(x) #x
245#define RPMALLOC_TOSTRING(x) RPMALLOC_TOSTRING_M(x)
246#define rpmalloc_assert(truth, message) \
247 do { \
248 if (!(truth)) { \
249 if (_memory_config.error_callback) { \
250 _memory_config.error_callback(message " (" RPMALLOC_TOSTRING( \
251 truth) ") at " __FILE__ ":" RPMALLOC_TOSTRING(__LINE__)); \
252 } else { \
253 assert((truth) && message); \
254 } \
255 } \
256 } while (0)
257#else
258#define rpmalloc_assert(truth, message) \
259 do { \
260 } while (0)
261#endif
262#if ENABLE_STATISTICS
263#include <stdio.h>
264#endif
265
266//////
267///
268/// Atomic access abstraction (since MSVC does not do C11 yet)
269///
270//////
271
272#if defined(_MSC_VER) && !defined(__clang__)
273
274typedef volatile long atomic32_t;
275typedef volatile long long atomic64_t;
276typedef volatile void *atomicptr_t;
277
278static FORCEINLINE int32_t atomic_load32(atomic32_t *src) {
279 return (int32_t)InterlockedOr(src, 0);
280}
281static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) {
282 InterlockedExchange(dst, val);
283}
284static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) {
285 return (int32_t)InterlockedIncrement(val);
286}
287static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) {
288 return (int32_t)InterlockedDecrement(val);
289}
290static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) {
291 return (int32_t)InterlockedExchangeAdd(val, add) + add;
292}
293static FORCEINLINE int atomic_cas32_acquire(atomic32_t *dst, int32_t val,
294 int32_t ref) {
295 return (InterlockedCompareExchange(dst, val, ref) == ref) ? 1 : 0;
296}
297static FORCEINLINE void atomic_store32_release(atomic32_t *dst, int32_t val) {
298 InterlockedExchange(dst, val);
299}
300static FORCEINLINE int64_t atomic_load64(atomic64_t *src) {
301 return (int64_t)InterlockedOr64(src, 0);
302}
303static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) {
304 return (int64_t)InterlockedExchangeAdd64(val, add) + add;
305}
306static FORCEINLINE void *atomic_load_ptr(atomicptr_t *src) {
307 return InterlockedCompareExchangePointer(src, 0, 0);
308}
309static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val) {
310 InterlockedExchangePointer(dst, val);
311}
312static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val) {
313 InterlockedExchangePointer(dst, val);
314}
315static FORCEINLINE void *atomic_exchange_ptr_acquire(atomicptr_t *dst,
316 void *val) {
317 return (void *)InterlockedExchangePointer((void *volatile *)dst, val);
318}
319static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) {
320 return (InterlockedCompareExchangePointer((void *volatile *)dst, val, ref) ==
321 ref)
322 ? 1
323 : 0;
324}
325
326#define EXPECTED(x) (x)
327#define UNEXPECTED(x) (x)
328
329#else
330
331#include <stdatomic.h>
332
333typedef volatile _Atomic(int32_t) atomic32_t;
334typedef volatile _Atomic(int64_t) atomic64_t;
335typedef volatile _Atomic(void *) atomicptr_t;
336
337static FORCEINLINE int32_t atomic_load32(atomic32_t *src) {
338 return atomic_load_explicit(src, memory_order_relaxed);
339}
340static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) {
341 atomic_store_explicit(dst, val, memory_order_relaxed);
342}
343static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) {
344 return atomic_fetch_add_explicit(val, 1, memory_order_relaxed) + 1;
345}
346static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) {
347 return atomic_fetch_add_explicit(val, -1, memory_order_relaxed) - 1;
348}
349static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) {
350 return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add;
351}
352static FORCEINLINE int atomic_cas32_acquire(atomic32_t *dst, int32_t val,
353 int32_t ref) {
354 return atomic_compare_exchange_weak_explicit(
355 dst, &ref, val, memory_order_acquire, memory_order_relaxed);
356}
357static FORCEINLINE void atomic_store32_release(atomic32_t *dst, int32_t val) {
358 atomic_store_explicit(dst, val, memory_order_release);
359}
360static FORCEINLINE int64_t atomic_load64(atomic64_t *val) {
361 return atomic_load_explicit(val, memory_order_relaxed);
362}
363static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) {
364 return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add;
365}
366static FORCEINLINE void *atomic_load_ptr(atomicptr_t *src) {
367 return atomic_load_explicit(src, memory_order_relaxed);
368}
369static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val) {
370 atomic_store_explicit(dst, val, memory_order_relaxed);
371}
372static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val) {
373 atomic_store_explicit(dst, val, memory_order_release);
374}
375static FORCEINLINE void *atomic_exchange_ptr_acquire(atomicptr_t *dst,
376 void *val) {
377 return atomic_exchange_explicit(dst, val, memory_order_acquire);
378}
379static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) {
380 return atomic_compare_exchange_weak_explicit(
381 dst, &ref, val, memory_order_relaxed, memory_order_relaxed);
382}
383
384#define EXPECTED(x) __builtin_expect((x), 1)
385#define UNEXPECTED(x) __builtin_expect((x), 0)
386
387#endif
388
389////////////
390///
391/// Statistics related functions (evaluate to nothing when statistics not
392/// enabled)
393///
394//////
395
396#if ENABLE_STATISTICS
397#define _rpmalloc_stat_inc(counter) atomic_incr32(counter)
398#define _rpmalloc_stat_dec(counter) atomic_decr32(counter)
399#define _rpmalloc_stat_add(counter, value) \
400 atomic_add32(counter, (int32_t)(value))
401#define _rpmalloc_stat_add64(counter, value) \
402 atomic_add64(counter, (int64_t)(value))
403#define _rpmalloc_stat_add_peak(counter, value, peak) \
404 do { \
405 int32_t _cur_count = atomic_add32(counter, (int32_t)(value)); \
406 if (_cur_count > (peak)) \
407 peak = _cur_count; \
408 } while (0)
409#define _rpmalloc_stat_sub(counter, value) \
410 atomic_add32(counter, -(int32_t)(value))
411#define _rpmalloc_stat_inc_alloc(heap, class_idx) \
412 do { \
413 int32_t alloc_current = \
414 atomic_incr32(&heap->size_class_use[class_idx].alloc_current); \
415 if (alloc_current > heap->size_class_use[class_idx].alloc_peak) \
416 heap->size_class_use[class_idx].alloc_peak = alloc_current; \
417 atomic_incr32(&heap->size_class_use[class_idx].alloc_total); \
418 } while (0)
419#define _rpmalloc_stat_inc_free(heap, class_idx) \
420 do { \
421 atomic_decr32(&heap->size_class_use[class_idx].alloc_current); \
422 atomic_incr32(&heap->size_class_use[class_idx].free_total); \
423 } while (0)
424#else
425#define _rpmalloc_stat_inc(counter) \
426 do { \
427 } while (0)
428#define _rpmalloc_stat_dec(counter) \
429 do { \
430 } while (0)
431#define _rpmalloc_stat_add(counter, value) \
432 do { \
433 } while (0)
434#define _rpmalloc_stat_add64(counter, value) \
435 do { \
436 } while (0)
437#define _rpmalloc_stat_add_peak(counter, value, peak) \
438 do { \
439 } while (0)
440#define _rpmalloc_stat_sub(counter, value) \
441 do { \
442 } while (0)
443#define _rpmalloc_stat_inc_alloc(heap, class_idx) \
444 do { \
445 } while (0)
446#define _rpmalloc_stat_inc_free(heap, class_idx) \
447 do { \
448 } while (0)
449#endif
450
451///
452/// Preconfigured limits and sizes
453///
454
455//! Granularity of a small allocation block (must be power of two)
456#define SMALL_GRANULARITY 16
457//! Small granularity shift count
458#define SMALL_GRANULARITY_SHIFT 4
459//! Number of small block size classes
460#define SMALL_CLASS_COUNT 65
461//! Maximum size of a small block
462#define SMALL_SIZE_LIMIT (SMALL_GRANULARITY * (SMALL_CLASS_COUNT - 1))
463//! Granularity of a medium allocation block
464#define MEDIUM_GRANULARITY 512
465//! Medium granularity shift count
466#define MEDIUM_GRANULARITY_SHIFT 9
467//! Number of medium block size classes
468#define MEDIUM_CLASS_COUNT 61
469//! Total number of small + medium size classes
470#define SIZE_CLASS_COUNT (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT)
471//! Number of large block size classes
472#define LARGE_CLASS_COUNT 63
473//! Maximum size of a medium block
474#define MEDIUM_SIZE_LIMIT \
475 (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT))
476//! Maximum size of a large block
477#define LARGE_SIZE_LIMIT \
478 ((LARGE_CLASS_COUNT * _memory_span_size) - SPAN_HEADER_SIZE)
479//! Size of a span header (must be a multiple of SMALL_GRANULARITY and a power
480//! of two)
481#define SPAN_HEADER_SIZE 128
482//! Number of spans in thread cache
483#define MAX_THREAD_SPAN_CACHE 400
484//! Number of spans to transfer between thread and global cache
485#define THREAD_SPAN_CACHE_TRANSFER 64
486//! Number of spans in thread cache for large spans (must be greater than
487//! LARGE_CLASS_COUNT / 2)
488#define MAX_THREAD_SPAN_LARGE_CACHE 100
489//! Number of spans to transfer between thread and global cache for large spans
490#define THREAD_SPAN_LARGE_CACHE_TRANSFER 6
491
492_Static_assert((SMALL_GRANULARITY & (SMALL_GRANULARITY - 1)) == 0,
493 "Small granularity must be power of two");
494_Static_assert((SPAN_HEADER_SIZE & (SPAN_HEADER_SIZE - 1)) == 0,
495 "Span header size must be power of two");
496
497#if ENABLE_VALIDATE_ARGS
498//! Maximum allocation size to avoid integer overflow
499#undef MAX_ALLOC_SIZE
500#define MAX_ALLOC_SIZE (((size_t) - 1) - _memory_span_size)
501#endif
502
503#define pointer_offset(ptr, ofs) (void *)((char *)(ptr) + (ptrdiff_t)(ofs))
504#define pointer_diff(first, second) \
505 (ptrdiff_t)((const char *)(first) - (const char *)(second))
506
507#define INVALID_POINTER ((void *)((uintptr_t) - 1))
508
509#define SIZE_CLASS_LARGE SIZE_CLASS_COUNT
510#define SIZE_CLASS_HUGE ((uint32_t) - 1)
511
512////////////
513///
514/// Data types
515///
516//////
517
518//! A memory heap, per thread
519typedef struct heap_t heap_t;
520//! Span of memory pages
521typedef struct span_t span_t;
522//! Span list
524//! Span active data
526//! Size class definition
528//! Global cache
530
531//! Flag indicating span is the first (master) span of a split superspan
532#define SPAN_FLAG_MASTER 1U
533//! Flag indicating span is a secondary (sub) span of a split superspan
534#define SPAN_FLAG_SUBSPAN 2U
535//! Flag indicating span has blocks with increased alignment
536#define SPAN_FLAG_ALIGNED_BLOCKS 4U
537//! Flag indicating an unmapped master span
538#define SPAN_FLAG_UNMAPPED_MASTER 8U
539
540#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
541struct span_use_t {
542 //! Current number of spans used (actually used, not in cache)
543 atomic32_t current;
544 //! High water mark of spans used
545 atomic32_t high;
546#if ENABLE_STATISTICS
547 //! Number of spans in deferred list
548 atomic32_t spans_deferred;
549 //! Number of spans transitioned to global cache
550 atomic32_t spans_to_global;
551 //! Number of spans transitioned from global cache
552 atomic32_t spans_from_global;
553 //! Number of spans transitioned to thread cache
554 atomic32_t spans_to_cache;
555 //! Number of spans transitioned from thread cache
556 atomic32_t spans_from_cache;
557 //! Number of spans transitioned to reserved state
558 atomic32_t spans_to_reserved;
559 //! Number of spans transitioned from reserved state
560 atomic32_t spans_from_reserved;
561 //! Number of raw memory map calls
562 atomic32_t spans_map_calls;
563#endif
564};
565typedef struct span_use_t span_use_t;
566#endif
567
568#if ENABLE_STATISTICS
569struct size_class_use_t {
570 //! Current number of allocations
571 atomic32_t alloc_current;
572 //! Peak number of allocations
573 int32_t alloc_peak;
574 //! Total number of allocations
575 atomic32_t alloc_total;
576 //! Total number of frees
577 atomic32_t free_total;
578 //! Number of spans in use
579 atomic32_t spans_current;
580 //! Number of spans transitioned to cache
581 int32_t spans_peak;
582 //! Number of spans transitioned to cache
583 atomic32_t spans_to_cache;
584 //! Number of spans transitioned from cache
585 atomic32_t spans_from_cache;
586 //! Number of spans transitioned from reserved state
587 atomic32_t spans_from_reserved;
588 //! Number of spans mapped
589 atomic32_t spans_map_calls;
590 int32_t unused;
591};
592typedef struct size_class_use_t size_class_use_t;
593#endif
594
595// A span can either represent a single span of memory pages with size declared
596// by span_map_count configuration variable, or a set of spans in a continuous
597// region, a super span. Any reference to the term "span" usually refers to both
598// a single span or a super span. A super span can further be divided into
599// multiple spans (or this, super spans), where the first (super)span is the
600// master and subsequent (super)spans are subspans. The master span keeps track
601// of how many subspans that are still alive and mapped in virtual memory, and
602// once all subspans and master have been unmapped the entire superspan region
603// is released and unmapped (on Windows for example, the entire superspan range
604// has to be released in the same call to release the virtual memory range, but
605// individual subranges can be decommitted individually to reduce physical
606// memory use).
607struct span_t {
608 //! Free list
610 //! Total block count of size class
612 //! Size class
614 //! Index of last block initialized in free list
616 //! Number of used blocks remaining when in partial state
618 //! Deferred free list
620 //! Size of deferred free list, or list of spans when part of a cache list
622 //! Size of a block
624 //! Flags and counters
626 //! Number of spans
628 //! Total span counter for master spans
630 //! Offset from master span for subspans
632 //! Remaining span counter, for master spans
633 atomic32_t remaining_spans;
634 //! Alignment offset
636 //! Owning heap
638 //! Next span
640 //! Previous span
642};
643_Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "span size mismatch");
644
650
656
658 //! Free list of active span
660 //! Double linked list of partially used spans with free blocks.
661 // Previous span pointer in head points to tail span of list.
663 //! Early level cache of fully free spans
665};
667
668// Control structure for a heap, either a thread heap or a first class heap if
669// enabled
670struct heap_t {
671 //! Owning thread ID
672 uintptr_t owner_thread;
673 //! Free lists for each size class
675#if ENABLE_THREAD_CACHE
676 //! Arrays of fully freed spans, single span
677 span_cache_t span_cache;
678#endif
679 //! List of deferred free spans (single linked list)
681 //! Number of full spans
683 //! Mapped but unused spans
685 //! Master span for mapped but unused spans
687 //! Number of mapped but unused spans
689 //! Child count
690 atomic32_t child_count;
691 //! Next heap in id list
693 //! Next heap in orphan list
695 //! Heap ID
696 int32_t id;
697 //! Finalization state flag
699 //! Master heap owning the memory pages
701#if ENABLE_THREAD_CACHE
702 //! Arrays of fully freed spans, large spans with > 1 span count
703 span_large_cache_t span_large_cache[LARGE_CLASS_COUNT - 1];
704#endif
705#if RPMALLOC_FIRST_CLASS_HEAPS
706 //! Double linked list of fully utilized spans with free blocks for each size
707 //! class.
708 // Previous span pointer in head points to tail span of list.
709 span_t *full_span[SIZE_CLASS_COUNT];
710 //! Double linked list of large and huge spans allocated by this heap
711 span_t *large_huge_span;
712#endif
713#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
714 //! Current and high water mark of spans used per span count
715 span_use_t span_use[LARGE_CLASS_COUNT];
716#endif
717#if ENABLE_STATISTICS
718 //! Allocation stats per size class
719 size_class_use_t size_class_use[SIZE_CLASS_COUNT + 1];
720 //! Number of bytes transitioned thread -> global
721 atomic64_t thread_to_global;
722 //! Number of bytes transitioned global -> thread
723 atomic64_t global_to_thread;
724#endif
725};
726
727// Size class for defining a block size bucket
729 //! Size of blocks in this class
731 //! Number of blocks in each chunk
733 //! Class index this class is merged with
735};
736_Static_assert(sizeof(size_class_t) == 8, "Size class size mismatch");
737
739 //! Cache lock
740 atomic32_t lock;
741 //! Cache count
743#if ENABLE_STATISTICS
744 //! Insert count
745 size_t insert_count;
746 //! Extract count
747 size_t extract_count;
748#endif
749 //! Cached spans
751 //! Unlimited cache overflow
753};
754
755////////////
756///
757/// Global data
758///
759//////
760
761//! Default span size (64KiB)
762#define _memory_default_span_size (64 * 1024)
763#define _memory_default_span_size_shift 16
764#define _memory_default_span_mask (~((uintptr_t)(_memory_span_size - 1)))
765
766//! Initialized flag
768//! Main thread ID
770//! Configuration
772//! Memory page size
773static size_t _memory_page_size;
774//! Shift to divide by page size
776//! Granularity at which memory pages are mapped by OS
778#if RPMALLOC_CONFIGURABLE
779//! Size of a span of memory pages
780static size_t _memory_span_size;
781//! Shift to divide by span size
782static size_t _memory_span_size_shift;
783//! Mask to get to start of a memory span
784static uintptr_t _memory_span_mask;
785#else
786//! Hardwired span size
787#define _memory_span_size _memory_default_span_size
788#define _memory_span_size_shift _memory_default_span_size_shift
789#define _memory_span_mask _memory_default_span_mask
790#endif
791//! Number of spans to map in each map call
793//! Number of spans to keep reserved in each heap
795//! Global size classes
797//! Run-time size limit of medium blocks
799//! Heap ID counter
800static atomic32_t _memory_heap_id;
801//! Huge page support
803#if ENABLE_GLOBAL_CACHE
804//! Global span cache
805static global_cache_t _memory_span_cache[LARGE_CLASS_COUNT];
806#endif
807//! Global reserved spans
809//! Global reserved count
811//! Global reserved master
813//! All heaps
815//! Used to restrict access to mapping memory for huge pages
816static atomic32_t _memory_global_lock;
817//! Orphaned heaps
819#if RPMALLOC_FIRST_CLASS_HEAPS
820//! Orphaned heaps (first class heaps)
821static heap_t *_memory_first_class_orphan_heaps;
822#endif
823#if ENABLE_STATISTICS
824//! Allocations counter
825static atomic64_t _allocation_counter;
826//! Deallocations counter
827static atomic64_t _deallocation_counter;
828//! Active heap count
829static atomic32_t _memory_active_heaps;
830//! Number of currently mapped memory pages
831static atomic32_t _mapped_pages;
832//! Peak number of concurrently mapped memory pages
833static int32_t _mapped_pages_peak;
834//! Number of mapped master spans
835static atomic32_t _master_spans;
836//! Number of unmapped dangling master spans
837static atomic32_t _unmapped_master_spans;
838//! Running counter of total number of mapped memory pages since start
839static atomic32_t _mapped_total;
840//! Running counter of total number of unmapped memory pages since start
841static atomic32_t _unmapped_total;
842//! Number of currently mapped memory pages in OS calls
843static atomic32_t _mapped_pages_os;
844//! Number of currently allocated pages in huge allocations
845static atomic32_t _huge_pages_current;
846//! Peak number of currently allocated pages in huge allocations
847static int32_t _huge_pages_peak;
848#endif
849
850////////////
851///
852/// Thread local heap and ID
853///
854//////
855
856//! Current thread heap
857#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || \
858 defined(__TINYC__)
859static pthread_key_t _memory_thread_heap;
860#else
861#ifdef _MSC_VER
862#define _Thread_local __declspec(thread)
863#define TLS_MODEL
864#else
865#ifndef __HAIKU__
866#define TLS_MODEL __attribute__((tls_model("initial-exec")))
867#else
868#define TLS_MODEL
869#endif
870#if !defined(__clang__) && defined(__GNUC__)
871#define _Thread_local __thread
872#endif
873#endif
874static _Thread_local heap_t *_memory_thread_heap TLS_MODEL;
875#endif
876
877static inline heap_t *get_thread_heap_raw(void) {
878#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
879 return pthread_getspecific(_memory_thread_heap);
880#else
881 return _memory_thread_heap;
882#endif
883}
884
885//! Get the current thread heap
886static inline heap_t *get_thread_heap(void) {
887 heap_t *heap = get_thread_heap_raw();
888#if ENABLE_PRELOAD
889 if (EXPECTED(heap != 0))
890 return heap;
892 return get_thread_heap_raw();
893#else
894 return heap;
895#endif
896}
897
898//! Fast thread ID
899static inline uintptr_t get_thread_id(void) {
900#if defined(_WIN32)
901 return (uintptr_t)((void *)NtCurrentTeb());
902#elif (defined(__GNUC__) || defined(__clang__)) && !defined(__CYGWIN__)
903 uintptr_t tid;
904#if defined(__i386__)
905 __asm__("movl %%gs:0, %0" : "=r"(tid) : :);
906#elif defined(__x86_64__)
907#if defined(__MACH__)
908 __asm__("movq %%gs:0, %0" : "=r"(tid) : :);
909#else
910 __asm__("movq %%fs:0, %0" : "=r"(tid) : :);
911#endif
912#elif defined(__arm__)
913 __asm__ volatile("mrc p15, 0, %0, c13, c0, 3" : "=r"(tid));
914#elif defined(__aarch64__)
915#if defined(__MACH__)
916 // tpidr_el0 likely unused, always return 0 on iOS
917 __asm__ volatile("mrs %0, tpidrro_el0" : "=r"(tid));
918#else
919 __asm__ volatile("mrs %0, tpidr_el0" : "=r"(tid));
920#endif
921#else
922#error This platform needs implementation of get_thread_id()
923#endif
924 return tid;
925#else
926#error This platform needs implementation of get_thread_id()
927#endif
928}
929
930//! Set the current thread heap
931static void set_thread_heap(heap_t *heap) {
932#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || \
933 defined(__TINYC__)
934 pthread_setspecific(_memory_thread_heap, heap);
935#else
936 _memory_thread_heap = heap;
937#endif
938 if (heap)
939 heap->owner_thread = get_thread_id();
940}
941
942//! Set main thread ID
943extern void rpmalloc_set_main_thread(void);
944
948
949static void _rpmalloc_spin(void) {
950#if defined(_MSC_VER)
951#if defined(_M_ARM64)
952 __yield();
953#else
954 _mm_pause();
955#endif
956#elif defined(__x86_64__) || defined(__i386__)
957 __asm__ volatile("pause" ::: "memory");
958#elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7)
959 __asm__ volatile("yield" ::: "memory");
960#elif defined(__powerpc__) || defined(__powerpc64__)
961 // No idea if ever been compiled in such archs but ... as precaution
962 __asm__ volatile("or 27,27,27");
963#elif defined(__sparc__)
964 __asm__ volatile("rd %ccr, %g0 \n\trd %ccr, %g0 \n\trd %ccr, %g0");
965#else
966 struct timespec ts = {0};
967 nanosleep(&ts, 0);
968#endif
969}
970
971#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
972
973static void NTAPI RPMallocTlsOnThreadExit(PVOID module, DWORD reason,
974 PVOID reserved) {
975 switch (reason) {
976 case DLL_PROCESS_ATTACH:
977 break;
978 case DLL_PROCESS_DETACH:
980 break;
981 case DLL_THREAD_ATTACH:
982 break;
983 case DLL_THREAD_DETACH:
985 break;
986 }
987}
988
989#ifdef _WIN64
990#pragma comment(linker, "/INCLUDE:_tls_used")
991#pragma comment(linker, "/INCLUDE:rpmalloc_tls_thread_exit_callback")
992
993// .CRT$XL* is an array of PIMAGE_TLS_CALLBACK on Windows. It is executed
994// alphabetically (A-Z) during CRT calls. As an allocator, the allocator must be
995// deinitialized last. Therefore, Y(.CRT$XLY) is used here.
996#pragma const_seg(".CRT$XLY")
997
998extern const PIMAGE_TLS_CALLBACK rpmalloc_tls_thread_exit_callback;
999const PIMAGE_TLS_CALLBACK rpmalloc_tls_thread_exit_callback =
1000 RPMallocTlsOnThreadExit;
1001
1002// Reset const section
1003#pragma const_seg()
1004#else // _WIN64
1005#pragma comment(linker, "/INCLUDE:__tls_used")
1006#pragma comment(linker, "/INCLUDE:_rpmalloc_tls_thread_exit_callback")
1007
1008#pragma data_seg(".CRT$XLY")
1009
1010PIMAGE_TLS_CALLBACK rpmalloc_tls_thread_exit_callback = RPMallocTlsOnThreadExit;
1011
1012// Reset data section
1013#pragma data_seg()
1014#endif // _WIN64
1015
1016static void NTAPI _rpmalloc_thread_destructor(void *value) {
1017#if ENABLE_OVERRIDE
1018 // If this is called on main thread it means rpmalloc_finalize
1019 // has not been called and shutdown is forced (through _exit) or unclean
1021 return;
1022#endif
1023 if (value)
1025}
1026#endif
1027
1028////////////
1029///
1030/// Low level memory map/unmap
1031///
1032//////
1033
1034static void _rpmalloc_set_name(void *address, size_t size) {
1035#if defined(__linux__) || defined(__ANDROID__)
1036 const char *name = _memory_huge_pages ? _memory_config.huge_page_name
1037 : _memory_config.page_name;
1038 if (address == MAP_FAILED || !name)
1039 return;
1040 // If the kernel does not support CONFIG_ANON_VMA_NAME or if the call fails
1041 // (e.g. invalid name) it is a no-op basically.
1042 (void)prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)address, size,
1043 (uintptr_t)name);
1044#else
1045 (void)sizeof(size);
1046 (void)sizeof(address);
1047#endif
1048}
1049
1050//! Map more virtual memory
1051// size is number of bytes to map
1052// offset receives the offset in bytes from start of mapped region
1053// returns address to start of mapped region to use
1054static void *_rpmalloc_mmap(size_t size, size_t *offset) {
1055 rpmalloc_assert(!(size % _memory_page_size), "Invalid mmap size");
1056 rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size");
1057 void *address = _memory_config.memory_map(size, offset);
1058 if (EXPECTED(address != 0)) {
1059 _rpmalloc_stat_add_peak(&_mapped_pages, (size >> _memory_page_size_shift),
1060 _mapped_pages_peak);
1061 _rpmalloc_stat_add(&_mapped_total, (size >> _memory_page_size_shift));
1062 }
1063 return address;
1064}
1065
1066//! Unmap virtual memory
1067// address is the memory address to unmap, as returned from _memory_map
1068// size is the number of bytes to unmap, which might be less than full region
1069// for a partial unmap offset is the offset in bytes to the actual mapped
1070// region, as set by _memory_map release is set to 0 for partial unmap, or size
1071// of entire range for a full unmap
1072static void _rpmalloc_unmap(void *address, size_t size, size_t offset,
1073 size_t release) {
1074 rpmalloc_assert(!release || (release >= size), "Invalid unmap size");
1075 rpmalloc_assert(!release || (release >= _memory_page_size),
1076 "Invalid unmap size");
1077 if (release) {
1078 rpmalloc_assert(!(release % _memory_page_size), "Invalid unmap size");
1079 _rpmalloc_stat_sub(&_mapped_pages, (release >> _memory_page_size_shift));
1080 _rpmalloc_stat_add(&_unmapped_total, (release >> _memory_page_size_shift));
1081 }
1082 _memory_config.memory_unmap(address, size, offset, release);
1083}
1084
1085//! Default implementation to map new pages to virtual memory
1086static void *_rpmalloc_mmap_os(size_t size, size_t *offset) {
1087 // Either size is a heap (a single page) or a (multiple) span - we only need
1088 // to align spans, and only if larger than map granularity
1089 size_t padding = ((size >= _memory_span_size) &&
1092 : 0;
1093 rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size");
1094#if PLATFORM_WINDOWS
1095 // Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not
1096 // allocated unless/until the virtual addresses are actually accessed"
1097 void *ptr = VirtualAlloc(0, size + padding,
1098 (_memory_huge_pages ? MEM_LARGE_PAGES : 0) |
1099 MEM_RESERVE | MEM_COMMIT,
1100 PAGE_READWRITE);
1101 if (!ptr) {
1102 if (_memory_config.map_fail_callback) {
1103 if (_memory_config.map_fail_callback(size + padding))
1104 return _rpmalloc_mmap_os(size, offset);
1105 } else {
1106 rpmalloc_assert(ptr, "Failed to map virtual memory block");
1107 }
1108 return 0;
1109 }
1110#else
1111 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED;
1112#if defined(__APPLE__) && !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
1113 int fd = (int)VM_MAKE_TAG(240U);
1115 fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
1116 void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, fd, 0);
1117#elif defined(MAP_HUGETLB)
1118 void *ptr = mmap(0, size + padding,
1119 PROT_READ | PROT_WRITE | PROT_MAX(PROT_READ | PROT_WRITE),
1120 (_memory_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0);
1121#if defined(MADV_HUGEPAGE)
1122 // In some configurations, huge pages allocations might fail thus
1123 // we fallback to normal allocations and promote the region as transparent
1124 // huge page
1125 if ((ptr == MAP_FAILED || !ptr) && _memory_huge_pages) {
1126 ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
1127 if (ptr && ptr != MAP_FAILED) {
1128 int prm = madvise(ptr, size + padding, MADV_HUGEPAGE);
1129 (void)prm;
1130 rpmalloc_assert((prm == 0), "Failed to promote the page to THP");
1131 }
1132 }
1133#endif
1134 _rpmalloc_set_name(ptr, size + padding);
1135#elif defined(MAP_ALIGNED)
1136 const size_t align =
1137 (sizeof(size_t) * 8) - (size_t)(__builtin_clzl(size - 1));
1138 void *ptr =
1139 mmap(0, size + padding, PROT_READ | PROT_WRITE,
1140 (_memory_huge_pages ? MAP_ALIGNED(align) : 0) | flags, -1, 0);
1141#elif defined(MAP_ALIGN)
1142 caddr_t base = (_memory_huge_pages ? (caddr_t)(4 << 20) : 0);
1143 void *ptr = mmap(base, size + padding, PROT_READ | PROT_WRITE,
1144 (_memory_huge_pages ? MAP_ALIGN : 0) | flags, -1, 0);
1145#else
1146 void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
1147#endif
1148 if ((ptr == MAP_FAILED) || !ptr) {
1149 if (_memory_config.map_fail_callback) {
1150 if (_memory_config.map_fail_callback(size + padding))
1151 return _rpmalloc_mmap_os(size, offset);
1152 } else if (errno != ENOMEM) {
1153 rpmalloc_assert((ptr != MAP_FAILED) && ptr,
1154 "Failed to map virtual memory block");
1155 }
1156 return 0;
1157 }
1158#endif
1159 _rpmalloc_stat_add(&_mapped_pages_os,
1160 (int32_t)((size + padding) >> _memory_page_size_shift));
1161 if (padding) {
1162 size_t final_padding = padding - ((uintptr_t)ptr & ~_memory_span_mask);
1163 rpmalloc_assert(final_padding <= _memory_span_size,
1164 "Internal failure in padding");
1165 rpmalloc_assert(final_padding <= padding, "Internal failure in padding");
1166 rpmalloc_assert(!(final_padding % 8), "Internal failure in padding");
1167 ptr = pointer_offset(ptr, final_padding);
1168 *offset = final_padding >> 3;
1169 }
1171 !((uintptr_t)ptr & ~_memory_span_mask),
1172 "Internal failure in padding");
1173 return ptr;
1174}
1175
1176//! Default implementation to unmap pages from virtual memory
1177static void _rpmalloc_unmap_os(void *address, size_t size, size_t offset,
1178 size_t release) {
1179 rpmalloc_assert(release || (offset == 0), "Invalid unmap size");
1180 rpmalloc_assert(!release || (release >= _memory_page_size),
1181 "Invalid unmap size");
1182 rpmalloc_assert(size >= _memory_page_size, "Invalid unmap size");
1183 if (release && offset) {
1184 offset <<= 3;
1185 address = pointer_offset(address, -(int32_t)offset);
1186 if ((release >= _memory_span_size) &&
1188 // Padding is always one span size
1189 release += _memory_span_size;
1190 }
1191 }
1192#if !DISABLE_UNMAP
1193#if PLATFORM_WINDOWS
1194 if (!VirtualFree(address, release ? 0 : size,
1195 release ? MEM_RELEASE : MEM_DECOMMIT)) {
1196 rpmalloc_assert(0, "Failed to unmap virtual memory block");
1197 }
1198#else
1199 if (release) {
1200 if (munmap(address, release)) {
1201 rpmalloc_assert(0, "Failed to unmap virtual memory block");
1202 }
1203 } else {
1204#if defined(MADV_FREE_REUSABLE)
1205 int ret;
1206 while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 &&
1207 (errno == EAGAIN))
1208 errno = 0;
1209 if ((ret == -1) && (errno != 0)) {
1210#elif defined(MADV_DONTNEED)
1211 if (madvise(address, size, MADV_DONTNEED)) {
1212#elif defined(MADV_PAGEOUT)
1213 if (madvise(address, size, MADV_PAGEOUT)) {
1214#elif defined(MADV_FREE)
1215 if (madvise(address, size, MADV_FREE)) {
1216#else
1217 if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) {
1218#endif
1219 rpmalloc_assert(0, "Failed to madvise virtual memory block as free");
1220 }
1221 }
1222#endif
1223#endif
1224 if (release)
1225 _rpmalloc_stat_sub(&_mapped_pages_os, release >> _memory_page_size_shift);
1226}
1227
1229 span_t *subspan,
1230 size_t span_count);
1231
1232//! Use global reserved spans to fulfill a memory map request (reserve size must
1233//! be checked by caller)
1237 span, span_count);
1238 _memory_global_reserve_count -= span_count;
1241 (span_t *)pointer_offset(span, span_count << _memory_span_size_shift);
1242 else
1244 return span;
1245}
1246
1247//! Store the given spans as global reserve (must only be called from within new
1248//! heap allocation, not thread safe)
1250 size_t reserve_span_count) {
1252 _memory_global_reserve_count = reserve_span_count;
1253 _memory_global_reserve = reserve;
1254}
1255
1256////////////
1257///
1258/// Span linked list management
1259///
1260//////
1261
1262//! Add a span to double linked list at the head
1264 if (*head)
1265 (*head)->prev = span;
1266 span->next = *head;
1267 *head = span;
1268}
1269
1270//! Pop head span from double linked list
1272 span_t *span) {
1273 rpmalloc_assert(*head == span, "Linked list corrupted");
1274 span = *head;
1275 *head = span->next;
1276}
1277
1278//! Remove a span from double linked list
1280 span_t *span) {
1281 rpmalloc_assert(*head, "Linked list corrupted");
1282 if (*head == span) {
1283 *head = span->next;
1284 } else {
1285 span_t *next_span = span->next;
1286 span_t *prev_span = span->prev;
1287 prev_span->next = next_span;
1288 if (EXPECTED(next_span != 0))
1289 next_span->prev = prev_span;
1290 }
1291}
1292
1293////////////
1294///
1295/// Span control
1296///
1297//////
1298
1299static void _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span);
1300
1301static void _rpmalloc_heap_finalize(heap_t *heap);
1302
1303static void _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master,
1304 span_t *reserve,
1305 size_t reserve_span_count);
1306
1307//! Declare the span to be a subspan and store distance from master span and
1308//! span count
1310 span_t *subspan,
1311 size_t span_count) {
1312 rpmalloc_assert((subspan != master) || (subspan->flags & SPAN_FLAG_MASTER),
1313 "Span master pointer and/or flag mismatch");
1314 if (subspan != master) {
1315 subspan->flags = SPAN_FLAG_SUBSPAN;
1316 subspan->offset_from_master =
1317 (uint32_t)((uintptr_t)pointer_diff(subspan, master) >>
1319 subspan->align_offset = 0;
1320 }
1321 subspan->span_count = (uint32_t)span_count;
1322}
1323
1324//! Use reserved spans to fulfill a memory map request (reserve size must be
1325//! checked by caller)
1327 size_t span_count) {
1328 // Update the heap span reserve
1329 span_t *span = heap->span_reserve;
1330 heap->span_reserve =
1331 (span_t *)pointer_offset(span, span_count * _memory_span_size);
1332 heap->spans_reserved -= (uint32_t)span_count;
1333
1335 span_count);
1336 if (span_count <= LARGE_CLASS_COUNT)
1337 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_reserved);
1338
1339 return span;
1340}
1341
1342//! Get the aligned number of spans to map in based on wanted count, configured
1343//! mapping granularity and the page size
1344static size_t _rpmalloc_span_align_count(size_t span_count) {
1345 size_t request_count = (span_count > _memory_span_map_count)
1346 ? span_count
1349 ((request_count * _memory_span_size) % _memory_page_size))
1350 request_count +=
1352 return request_count;
1353}
1354
1355//! Setup a newly mapped span
1356static void _rpmalloc_span_initialize(span_t *span, size_t total_span_count,
1357 size_t span_count, size_t align_offset) {
1358 span->total_spans = (uint32_t)total_span_count;
1359 span->span_count = (uint32_t)span_count;
1360 span->align_offset = (uint32_t)align_offset;
1361 span->flags = SPAN_FLAG_MASTER;
1362 atomic_store32(&span->remaining_spans, (int32_t)total_span_count);
1363}
1364
1365static void _rpmalloc_span_unmap(span_t *span);
1366
1367//! Map an aligned set of spans, taking configured mapping granularity and the
1368//! page size into account
1370 size_t span_count) {
1371 // If we already have some, but not enough, reserved spans, release those to
1372 // heap cache and map a new full set of spans. Otherwise we would waste memory
1373 // if page size > span size (huge pages)
1374 size_t aligned_span_count = _rpmalloc_span_align_count(span_count);
1375 size_t align_offset = 0;
1376 span_t *span = (span_t *)_rpmalloc_mmap(
1377 aligned_span_count * _memory_span_size, &align_offset);
1378 if (!span)
1379 return 0;
1380 _rpmalloc_span_initialize(span, aligned_span_count, span_count, align_offset);
1381 _rpmalloc_stat_inc(&_master_spans);
1382 if (span_count <= LARGE_CLASS_COUNT)
1383 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_map_calls);
1384 if (aligned_span_count > span_count) {
1385 span_t *reserved_spans =
1386 (span_t *)pointer_offset(span, span_count * _memory_span_size);
1387 size_t reserved_count = aligned_span_count - span_count;
1388 if (heap->spans_reserved) {
1390 heap->span_reserve_master, heap->span_reserve, heap->spans_reserved);
1392 }
1393 if (reserved_count > _memory_heap_reserve_count) {
1394 // If huge pages or eager spam map count, the global reserve spin lock is
1395 // held by caller, _rpmalloc_span_map
1396 rpmalloc_assert(atomic_load32(&_memory_global_lock) == 1,
1397 "Global spin lock not held as expected");
1398 size_t remain_count = reserved_count - _memory_heap_reserve_count;
1399 reserved_count = _memory_heap_reserve_count;
1400 span_t *remain_span = (span_t *)pointer_offset(
1401 reserved_spans, reserved_count * _memory_span_size);
1407 }
1408 _rpmalloc_global_set_reserved_spans(span, remain_span, remain_count);
1409 }
1410 _rpmalloc_heap_set_reserved_spans(heap, span, reserved_spans,
1411 reserved_count);
1412 }
1413 return span;
1414}
1415
1416//! Map in memory pages for the given number of spans (or use previously
1417//! reserved pages)
1418static span_t *_rpmalloc_span_map(heap_t *heap, size_t span_count) {
1419 if (span_count <= heap->spans_reserved)
1420 return _rpmalloc_span_map_from_reserve(heap, span_count);
1421 span_t *span = 0;
1422 int use_global_reserve =
1425 if (use_global_reserve) {
1426 // If huge pages, make sure only one thread maps more memory to avoid bloat
1429 if (_memory_global_reserve_count >= span_count) {
1430 size_t reserve_count =
1431 (!heap->spans_reserved ? _memory_heap_reserve_count : span_count);
1432 if (_memory_global_reserve_count < reserve_count)
1433 reserve_count = _memory_global_reserve_count;
1434 span = _rpmalloc_global_get_reserved_spans(reserve_count);
1435 if (span) {
1436 if (reserve_count > span_count) {
1437 span_t *reserved_span = (span_t *)pointer_offset(
1438 span, span_count << _memory_span_size_shift);
1440 reserved_span,
1441 reserve_count - span_count);
1442 }
1443 // Already marked as subspan in _rpmalloc_global_get_reserved_spans
1444 span->span_count = (uint32_t)span_count;
1445 }
1446 }
1447 }
1448 if (!span)
1449 span = _rpmalloc_span_map_aligned_count(heap, span_count);
1450 if (use_global_reserve)
1452 return span;
1453}
1454
1455//! Unmap memory pages for the given number of spans (or mark as unused if no
1456//! partial unmappings)
1457static void _rpmalloc_span_unmap(span_t *span) {
1459 (span->flags & SPAN_FLAG_SUBSPAN),
1460 "Span flag corrupted");
1462 !(span->flags & SPAN_FLAG_SUBSPAN),
1463 "Span flag corrupted");
1464
1465 int is_master = !!(span->flags & SPAN_FLAG_MASTER);
1466 span_t *master =
1467 is_master ? span
1468 : ((span_t *)pointer_offset(
1469 span, -(intptr_t)((uintptr_t)span->offset_from_master *
1471 rpmalloc_assert(is_master || (span->flags & SPAN_FLAG_SUBSPAN),
1472 "Span flag corrupted");
1473 rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted");
1474
1475 size_t span_count = span->span_count;
1476 if (!is_master) {
1477 // Directly unmap subspans (unless huge pages, in which case we defer and
1478 // unmap entire page range with master)
1479 rpmalloc_assert(span->align_offset == 0, "Span align offset corrupted");
1481 _rpmalloc_unmap(span, span_count * _memory_span_size, 0, 0);
1482 } else {
1483 // Special double flag to denote an unmapped master
1484 // It must be kept in memory since span header must be used
1485 span->flags |=
1487 _rpmalloc_stat_add(&_unmapped_master_spans, 1);
1488 }
1489
1490 if (atomic_add32(&master->remaining_spans, -(int32_t)span_count) <= 0) {
1491 // Everything unmapped, unmap the master span with release flag to unmap the
1492 // entire range of the super span
1493 rpmalloc_assert(!!(master->flags & SPAN_FLAG_MASTER) &&
1494 !!(master->flags & SPAN_FLAG_SUBSPAN),
1495 "Span flag corrupted");
1496 size_t unmap_count = master->span_count;
1498 unmap_count = master->total_spans;
1499 _rpmalloc_stat_sub(&_master_spans, 1);
1500 _rpmalloc_stat_sub(&_unmapped_master_spans, 1);
1501 _rpmalloc_unmap(master, unmap_count * _memory_span_size,
1502 master->align_offset,
1503 (size_t)master->total_spans * _memory_span_size);
1504 }
1505}
1506
1507//! Move the span (used for small or medium allocations) to the heap thread
1508//! cache
1510 rpmalloc_assert(heap == span->heap, "Span heap pointer corrupted");
1512 "Invalid span size class");
1513 rpmalloc_assert(span->span_count == 1, "Invalid span count");
1514#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
1515 atomic_decr32(&heap->span_use[0].current);
1516#endif
1517 _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
1518 if (!heap->finalize) {
1519 _rpmalloc_stat_inc(&heap->span_use[0].spans_to_cache);
1520 _rpmalloc_stat_inc(&heap->size_class_use[span->size_class].spans_to_cache);
1521 if (heap->size_class[span->size_class].cache)
1523 heap->size_class[span->size_class].cache);
1524 heap->size_class[span->size_class].cache = span;
1525 } else {
1527 }
1528}
1529
1530//! Initialize a (partial) free list up to next system memory page, while
1531//! reserving the first block as allocated, returning number of blocks in list
1532static uint32_t free_list_partial_init(void **list, void **first_block,
1533 void *page_start, void *block_start,
1534 uint32_t block_count,
1535 uint32_t block_size) {
1536 rpmalloc_assert(block_count, "Internal failure");
1537 *first_block = block_start;
1538 if (block_count > 1) {
1539 void *free_block = pointer_offset(block_start, block_size);
1540 void *block_end =
1541 pointer_offset(block_start, (size_t)block_size * block_count);
1542 // If block size is less than half a memory page, bound init to next memory
1543 // page boundary
1544 if (block_size < (_memory_page_size >> 1)) {
1545 void *page_end = pointer_offset(page_start, _memory_page_size);
1546 if (page_end < block_end)
1547 block_end = page_end;
1548 }
1549 *list = free_block;
1550 block_count = 2;
1551 void *next_block = pointer_offset(free_block, block_size);
1552 while (next_block < block_end) {
1553 *((void **)free_block) = next_block;
1554 free_block = next_block;
1555 ++block_count;
1556 next_block = pointer_offset(next_block, block_size);
1557 }
1558 *((void **)free_block) = 0;
1559 } else {
1560 *list = 0;
1561 }
1562 return block_count;
1563}
1564
1565//! Initialize an unused span (from cache or mapped) to be new active span,
1566//! putting the initial free list in heap class free list
1568 heap_size_class_t *heap_size_class,
1569 span_t *span, uint32_t class_idx) {
1570 rpmalloc_assert(span->span_count == 1, "Internal failure");
1571 size_class_t *size_class = _memory_size_class + class_idx;
1572 span->size_class = class_idx;
1573 span->heap = heap;
1575 span->block_size = size_class->block_size;
1576 span->block_count = size_class->block_count;
1577 span->free_list = 0;
1578 span->list_size = 0;
1580
1581 // Setup free list. Only initialize one system page worth of free blocks in
1582 // list
1583 void *block;
1584 span->free_list_limit =
1585 free_list_partial_init(&heap_size_class->free_list, &block, span,
1587 size_class->block_count, size_class->block_size);
1588 // Link span as partial if there remains blocks to be initialized as free
1589 // list, or full if fully initialized
1590 if (span->free_list_limit < span->block_count) {
1591 _rpmalloc_span_double_link_list_add(&heap_size_class->partial_span, span);
1592 span->used_count = span->free_list_limit;
1593 } else {
1594#if RPMALLOC_FIRST_CLASS_HEAPS
1595 _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
1596#endif
1597 ++heap->full_span_count;
1598 span->used_count = span->block_count;
1599 }
1600 return block;
1601}
1602
1604 // We need acquire semantics on the CAS operation since we are interested in
1605 // the list size Refer to _rpmalloc_deallocate_defer_small_or_medium for
1606 // further comments on this dependency
1607 do {
1608 span->free_list =
1610 } while (span->free_list == INVALID_POINTER);
1611 span->used_count -= span->list_size;
1612 span->list_size = 0;
1614}
1615
1618 "Span free list corrupted");
1619 return !span->free_list && (span->free_list_limit >= span->block_count);
1620}
1621
1622static int _rpmalloc_span_finalize(heap_t *heap, size_t iclass, span_t *span,
1623 span_t **list_head) {
1624 void *free_list = heap->size_class[iclass].free_list;
1625 span_t *class_span = (span_t *)((uintptr_t)free_list & _memory_span_mask);
1626 if (span == class_span) {
1627 // Adopt the heap class free list back into the span free list
1628 void *block = span->free_list;
1629 void *last_block = 0;
1630 while (block) {
1631 last_block = block;
1632 block = *((void **)block);
1633 }
1634 uint32_t free_count = 0;
1635 block = free_list;
1636 while (block) {
1637 ++free_count;
1638 block = *((void **)block);
1639 }
1640 if (last_block) {
1641 *((void **)last_block) = free_list;
1642 } else {
1643 span->free_list = free_list;
1644 }
1645 heap->size_class[iclass].free_list = 0;
1646 span->used_count -= free_count;
1647 }
1648 // If this assert triggers you have memory leaks
1649 rpmalloc_assert(span->list_size == span->used_count, "Memory leak detected");
1650 if (span->list_size == span->used_count) {
1651 _rpmalloc_stat_dec(&heap->span_use[0].current);
1652 _rpmalloc_stat_dec(&heap->size_class_use[iclass].spans_current);
1653 // This function only used for spans in double linked lists
1654 if (list_head)
1657 return 1;
1658 }
1659 return 0;
1660}
1661
1662////////////
1663///
1664/// Global cache
1665///
1666//////
1667
1668#if ENABLE_GLOBAL_CACHE
1669
1670//! Finalize a global cache
1671static void _rpmalloc_global_cache_finalize(global_cache_t *cache) {
1672 while (!atomic_cas32_acquire(&cache->lock, 1, 0))
1674
1675 for (size_t ispan = 0; ispan < cache->count; ++ispan)
1676 _rpmalloc_span_unmap(cache->span[ispan]);
1677 cache->count = 0;
1678
1679 while (cache->overflow) {
1680 span_t *span = cache->overflow;
1681 cache->overflow = span->next;
1683 }
1684
1685 atomic_store32_release(&cache->lock, 0);
1686}
1687
1688static void _rpmalloc_global_cache_insert_spans(span_t **span,
1689 size_t span_count,
1690 size_t count) {
1691 const size_t cache_limit =
1694 (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1));
1695
1696 global_cache_t *cache = &_memory_span_cache[span_count - 1];
1697
1698 size_t insert_count = count;
1699 while (!atomic_cas32_acquire(&cache->lock, 1, 0))
1701
1702#if ENABLE_STATISTICS
1703 cache->insert_count += count;
1704#endif
1705 if ((cache->count + insert_count) > cache_limit)
1706 insert_count = cache_limit - cache->count;
1707
1708 memcpy(cache->span + cache->count, span, sizeof(span_t *) * insert_count);
1709 cache->count += (uint32_t)insert_count;
1710
1712 while (insert_count < count) {
1713#else
1714 // Enable unlimited cache if huge pages, or we will leak since it is unlikely
1715 // that an entire huge page will be unmapped, and we're unable to partially
1716 // decommit a huge page
1717 while ((_memory_page_size > _memory_span_size) && (insert_count < count)) {
1718#endif
1719 span_t *current_span = span[insert_count++];
1720 current_span->next = cache->overflow;
1721 cache->overflow = current_span;
1722 }
1723 atomic_store32_release(&cache->lock, 0);
1724
1725 span_t *keep = 0;
1726 for (size_t ispan = insert_count; ispan < count; ++ispan) {
1727 span_t *current_span = span[ispan];
1728 // Keep master spans that has remaining subspans to avoid dangling them
1729 if ((current_span->flags & SPAN_FLAG_MASTER) &&
1730 (atomic_load32(&current_span->remaining_spans) >
1731 (int32_t)current_span->span_count)) {
1732 current_span->next = keep;
1733 keep = current_span;
1734 } else {
1735 _rpmalloc_span_unmap(current_span);
1736 }
1737 }
1738
1739 if (keep) {
1740 while (!atomic_cas32_acquire(&cache->lock, 1, 0))
1742
1743 size_t islot = 0;
1744 while (keep) {
1745 for (; islot < cache->count; ++islot) {
1746 span_t *current_span = cache->span[islot];
1747 if (!(current_span->flags & SPAN_FLAG_MASTER) ||
1748 ((current_span->flags & SPAN_FLAG_MASTER) &&
1749 (atomic_load32(&current_span->remaining_spans) <=
1750 (int32_t)current_span->span_count))) {
1751 _rpmalloc_span_unmap(current_span);
1752 cache->span[islot] = keep;
1753 break;
1754 }
1755 }
1756 if (islot == cache->count)
1757 break;
1758 keep = keep->next;
1759 }
1760
1761 if (keep) {
1762 span_t *tail = keep;
1763 while (tail->next)
1764 tail = tail->next;
1765 tail->next = cache->overflow;
1766 cache->overflow = keep;
1767 }
1768
1769 atomic_store32_release(&cache->lock, 0);
1770 }
1771}
1772
1773static size_t _rpmalloc_global_cache_extract_spans(span_t **span,
1774 size_t span_count,
1775 size_t count) {
1776 global_cache_t *cache = &_memory_span_cache[span_count - 1];
1777
1778 size_t extract_count = 0;
1779 while (!atomic_cas32_acquire(&cache->lock, 1, 0))
1781
1782#if ENABLE_STATISTICS
1783 cache->extract_count += count;
1784#endif
1785 size_t want = count - extract_count;
1786 if (want > cache->count)
1787 want = cache->count;
1788
1789 memcpy(span + extract_count, cache->span + (cache->count - want),
1790 sizeof(span_t *) * want);
1791 cache->count -= (uint32_t)want;
1792 extract_count += want;
1793
1794 while ((extract_count < count) && cache->overflow) {
1795 span_t *current_span = cache->overflow;
1796 span[extract_count++] = current_span;
1797 cache->overflow = current_span->next;
1798 }
1799
1800#if ENABLE_ASSERTS
1801 for (size_t ispan = 0; ispan < extract_count; ++ispan) {
1802 rpmalloc_assert(span[ispan]->span_count == span_count,
1803 "Global cache span count mismatch");
1804 }
1805#endif
1806
1807 atomic_store32_release(&cache->lock, 0);
1808
1809 return extract_count;
1810}
1811
1812#endif
1813
1814////////////
1815///
1816/// Heap control
1817///
1818//////
1819
1820static void _rpmalloc_deallocate_huge(span_t *);
1821
1822//! Store the given spans as reserve in the given heap
1824 span_t *reserve,
1825 size_t reserve_span_count) {
1826 heap->span_reserve_master = master;
1827 heap->span_reserve = reserve;
1828 heap->spans_reserved = (uint32_t)reserve_span_count;
1829}
1830
1831//! Adopt the deferred span cache list, optionally extracting the first single
1832//! span for immediate re-use
1834 span_t **single_span) {
1835 span_t *span = (span_t *)((void *)atomic_exchange_ptr_acquire(
1836 &heap->span_free_deferred, 0));
1837 while (span) {
1838 span_t *next_span = (span_t *)span->free_list;
1839 rpmalloc_assert(span->heap == heap, "Span heap pointer corrupted");
1840 if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
1841 rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted");
1842 --heap->full_span_count;
1843 _rpmalloc_stat_dec(&heap->span_use[0].spans_deferred);
1844#if RPMALLOC_FIRST_CLASS_HEAPS
1845 _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class],
1846 span);
1847#endif
1848 _rpmalloc_stat_dec(&heap->span_use[0].current);
1849 _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
1850 if (single_span && !*single_span)
1851 *single_span = span;
1852 else
1853 _rpmalloc_heap_cache_insert(heap, span);
1854 } else {
1855 if (span->size_class == SIZE_CLASS_HUGE) {
1857 } else {
1859 "Span size class invalid");
1860 rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted");
1861 --heap->full_span_count;
1862#if RPMALLOC_FIRST_CLASS_HEAPS
1863 _rpmalloc_span_double_link_list_remove(&heap->large_huge_span, span);
1864#endif
1865 uint32_t idx = span->span_count - 1;
1866 _rpmalloc_stat_dec(&heap->span_use[idx].spans_deferred);
1867 _rpmalloc_stat_dec(&heap->span_use[idx].current);
1868 if (!idx && single_span && !*single_span)
1869 *single_span = span;
1870 else
1871 _rpmalloc_heap_cache_insert(heap, span);
1872 }
1873 }
1874 span = next_span;
1875 }
1876}
1877
1878static void _rpmalloc_heap_unmap(heap_t *heap) {
1879 if (!heap->master_heap) {
1880 if ((heap->finalize > 1) && !atomic_load32(&heap->child_count)) {
1881 span_t *span = (span_t *)((uintptr_t)heap & _memory_span_mask);
1883 }
1884 } else {
1885 if (atomic_decr32(&heap->master_heap->child_count) == 0) {
1887 }
1888 }
1889}
1890
1892 if (heap->finalize++ > 1) {
1893 --heap->finalize;
1894 return;
1895 }
1896
1898
1899#if ENABLE_THREAD_CACHE
1900 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
1901 span_cache_t *span_cache;
1902 if (!iclass)
1903 span_cache = &heap->span_cache;
1904 else
1905 span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
1906 for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
1907 _rpmalloc_span_unmap(span_cache->span[ispan]);
1908 span_cache->count = 0;
1909 }
1910#endif
1911
1912 if (heap->full_span_count) {
1913 --heap->finalize;
1914 return;
1915 }
1916
1917 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
1918 if (heap->size_class[iclass].free_list ||
1919 heap->size_class[iclass].partial_span) {
1920 --heap->finalize;
1921 return;
1922 }
1923 }
1924 // Heap is now completely free, unmap and remove from heap list
1925 size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE;
1926 heap_t *list_heap = _memory_heaps[list_idx];
1927 if (list_heap == heap) {
1928 _memory_heaps[list_idx] = heap->next_heap;
1929 } else {
1930 while (list_heap->next_heap != heap)
1931 list_heap = list_heap->next_heap;
1932 list_heap->next_heap = heap->next_heap;
1933 }
1934
1936}
1937
1938//! Insert a single span into thread heap cache, releasing to global cache if
1939//! overflow
1940static void _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span) {
1941 if (UNEXPECTED(heap->finalize != 0)) {
1944 return;
1945 }
1946#if ENABLE_THREAD_CACHE
1947 size_t span_count = span->span_count;
1948 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_to_cache);
1949 if (span_count == 1) {
1950 span_cache_t *span_cache = &heap->span_cache;
1951 span_cache->span[span_cache->count++] = span;
1952 if (span_cache->count == MAX_THREAD_SPAN_CACHE) {
1953 const size_t remain_count =
1955#if ENABLE_GLOBAL_CACHE
1956 _rpmalloc_stat_add64(&heap->thread_to_global,
1958 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global,
1960 _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count,
1961 span_count,
1963#else
1964 for (size_t ispan = 0; ispan < THREAD_SPAN_CACHE_TRANSFER; ++ispan)
1965 _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
1966#endif
1967 span_cache->count = remain_count;
1968 }
1969 } else {
1970 size_t cache_idx = span_count - 2;
1971 span_large_cache_t *span_cache = heap->span_large_cache + cache_idx;
1972 span_cache->span[span_cache->count++] = span;
1973 const size_t cache_limit =
1974 (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1));
1975 if (span_cache->count == cache_limit) {
1976 const size_t transfer_limit = 2 + (cache_limit >> 2);
1977 const size_t transfer_count =
1978 (THREAD_SPAN_LARGE_CACHE_TRANSFER <= transfer_limit
1980 : transfer_limit);
1981 const size_t remain_count = cache_limit - transfer_count;
1982#if ENABLE_GLOBAL_CACHE
1983 _rpmalloc_stat_add64(&heap->thread_to_global,
1984 transfer_count * span_count * _memory_span_size);
1985 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global,
1986 transfer_count);
1987 _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count,
1988 span_count, transfer_count);
1989#else
1990 for (size_t ispan = 0; ispan < transfer_count; ++ispan)
1991 _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
1992#endif
1993 span_cache->count = remain_count;
1994 }
1995 }
1996#else
1997 (void)sizeof(heap);
1999#endif
2000}
2001
2002//! Extract the given number of spans from the different cache levels
2004 size_t span_count) {
2005 span_t *span = 0;
2006#if ENABLE_THREAD_CACHE
2007 span_cache_t *span_cache;
2008 if (span_count == 1)
2009 span_cache = &heap->span_cache;
2010 else
2011 span_cache = (span_cache_t *)(heap->span_large_cache + (span_count - 2));
2012 if (span_cache->count) {
2013 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_cache);
2014 return span_cache->span[--span_cache->count];
2015 }
2016#endif
2017 return span;
2018}
2019
2021 size_t span_count) {
2022 span_t *span = 0;
2023 if (span_count == 1) {
2025 } else {
2027 span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
2028 }
2029 return span;
2030}
2031
2033 size_t span_count) {
2034 if (heap->spans_reserved >= span_count)
2035 return _rpmalloc_span_map(heap, span_count);
2036 return 0;
2037}
2038
2039//! Extract a span from the global cache
2041 size_t span_count) {
2042#if ENABLE_GLOBAL_CACHE
2043#if ENABLE_THREAD_CACHE
2044 span_cache_t *span_cache;
2045 size_t wanted_count;
2046 if (span_count == 1) {
2047 span_cache = &heap->span_cache;
2048 wanted_count = THREAD_SPAN_CACHE_TRANSFER;
2049 } else {
2050 span_cache = (span_cache_t *)(heap->span_large_cache + (span_count - 2));
2051 wanted_count = THREAD_SPAN_LARGE_CACHE_TRANSFER;
2052 }
2053 span_cache->count = _rpmalloc_global_cache_extract_spans(
2054 span_cache->span, span_count, wanted_count);
2055 if (span_cache->count) {
2056 _rpmalloc_stat_add64(&heap->global_to_thread,
2057 span_count * span_cache->count * _memory_span_size);
2058 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global,
2059 span_cache->count);
2060 return span_cache->span[--span_cache->count];
2061 }
2062#else
2063 span_t *span = 0;
2064 size_t count = _rpmalloc_global_cache_extract_spans(&span, span_count, 1);
2065 if (count) {
2066 _rpmalloc_stat_add64(&heap->global_to_thread,
2067 span_count * count * _memory_span_size);
2068 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global,
2069 count);
2070 return span;
2071 }
2072#endif
2073#endif
2074 (void)sizeof(heap);
2075 (void)sizeof(span_count);
2076 return 0;
2077}
2078
2079static void _rpmalloc_inc_span_statistics(heap_t *heap, size_t span_count,
2080 uint32_t class_idx) {
2081 (void)sizeof(heap);
2082 (void)sizeof(span_count);
2083 (void)sizeof(class_idx);
2084#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
2085 uint32_t idx = (uint32_t)span_count - 1;
2086 uint32_t current_count =
2087 (uint32_t)atomic_incr32(&heap->span_use[idx].current);
2088 if (current_count > (uint32_t)atomic_load32(&heap->span_use[idx].high))
2089 atomic_store32(&heap->span_use[idx].high, (int32_t)current_count);
2090 _rpmalloc_stat_add_peak(&heap->size_class_use[class_idx].spans_current, 1,
2091 heap->size_class_use[class_idx].spans_peak);
2092#endif
2093}
2094
2095//! Get a span from one of the cache levels (thread cache, reserved, global
2096//! cache) or fallback to mapping more memory
2097static span_t *
2099 heap_size_class_t *heap_size_class,
2100 size_t span_count, uint32_t class_idx) {
2101 span_t *span;
2102#if ENABLE_THREAD_CACHE
2103 if (heap_size_class && heap_size_class->cache) {
2104 span = heap_size_class->cache;
2105 heap_size_class->cache =
2106 (heap->span_cache.count
2107 ? heap->span_cache.span[--heap->span_cache.count]
2108 : 0);
2109 _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
2110 return span;
2111 }
2112#endif
2113 (void)sizeof(class_idx);
2114 // Allow 50% overhead to increase cache hits
2115 size_t base_span_count = span_count;
2116 size_t limit_span_count =
2117 (span_count > 2) ? (span_count + (span_count >> 1)) : span_count;
2118 if (limit_span_count > LARGE_CLASS_COUNT)
2119 limit_span_count = LARGE_CLASS_COUNT;
2120 do {
2121 span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
2122 if (EXPECTED(span != 0)) {
2123 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
2124 _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
2125 return span;
2126 }
2127 span = _rpmalloc_heap_thread_cache_deferred_extract(heap, span_count);
2128 if (EXPECTED(span != 0)) {
2129 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
2130 _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
2131 return span;
2132 }
2133 span = _rpmalloc_heap_global_cache_extract(heap, span_count);
2134 if (EXPECTED(span != 0)) {
2135 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
2136 _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
2137 return span;
2138 }
2139 span = _rpmalloc_heap_reserved_extract(heap, span_count);
2140 if (EXPECTED(span != 0)) {
2141 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_reserved);
2142 _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
2143 return span;
2144 }
2145 ++span_count;
2146 } while (span_count <= limit_span_count);
2147 // Final fallback, map in more virtual memory
2148 span = _rpmalloc_span_map(heap, base_span_count);
2149 _rpmalloc_inc_span_statistics(heap, base_span_count, class_idx);
2150 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_map_calls);
2151 return span;
2152}
2153
2155 _rpmalloc_memset_const(heap, 0, sizeof(heap_t));
2156 // Get a new heap ID
2157 heap->id = 1 + atomic_incr32(&_memory_heap_id);
2158
2159 // Link in heap in heap ID map
2160 size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE;
2161 heap->next_heap = _memory_heaps[list_idx];
2162 _memory_heaps[list_idx] = heap;
2163}
2164
2165static void _rpmalloc_heap_orphan(heap_t *heap, int first_class) {
2166 heap->owner_thread = (uintptr_t)-1;
2167#if RPMALLOC_FIRST_CLASS_HEAPS
2168 heap_t **heap_list =
2169 (first_class ? &_memory_first_class_orphan_heaps : &_memory_orphan_heaps);
2170#else
2171 (void)sizeof(first_class);
2172 heap_t **heap_list = &_memory_orphan_heaps;
2173#endif
2174 heap->next_orphan = *heap_list;
2175 *heap_list = heap;
2176}
2177
2178//! Allocate a new heap from newly mapped memory pages
2180 // Map in pages for a 16 heaps. If page size is greater than required size for
2181 // this, map a page and use first part for heaps and remaining part for spans
2182 // for allocations. Adds a lot of complexity, but saves a lot of memory on
2183 // systems where page size > 64 spans (4MiB)
2184 size_t heap_size = sizeof(heap_t);
2185 size_t aligned_heap_size = 16 * ((heap_size + 15) / 16);
2186 size_t request_heap_count = 16;
2187 size_t heap_span_count = ((aligned_heap_size * request_heap_count) +
2188 sizeof(span_t) + _memory_span_size - 1) /
2190 size_t block_size = _memory_span_size * heap_span_count;
2191 size_t span_count = heap_span_count;
2192 span_t *span = 0;
2193 // If there are global reserved spans, use these first
2194 if (_memory_global_reserve_count >= heap_span_count) {
2195 span = _rpmalloc_global_get_reserved_spans(heap_span_count);
2196 }
2197 if (!span) {
2198 if (_memory_page_size > block_size) {
2199 span_count = _memory_page_size / _memory_span_size;
2200 block_size = _memory_page_size;
2201 // If using huge pages, make sure to grab enough heaps to avoid
2202 // reallocating a huge page just to serve new heaps
2203 size_t possible_heap_count =
2204 (block_size - sizeof(span_t)) / aligned_heap_size;
2205 if (possible_heap_count >= (request_heap_count * 16))
2206 request_heap_count *= 16;
2207 else if (possible_heap_count < request_heap_count)
2208 request_heap_count = possible_heap_count;
2209 heap_span_count = ((aligned_heap_size * request_heap_count) +
2210 sizeof(span_t) + _memory_span_size - 1) /
2212 }
2213
2214 size_t align_offset = 0;
2215 span = (span_t *)_rpmalloc_mmap(block_size, &align_offset);
2216 if (!span)
2217 return 0;
2218
2219 // Master span will contain the heaps
2220 _rpmalloc_stat_inc(&_master_spans);
2221 _rpmalloc_span_initialize(span, span_count, heap_span_count, align_offset);
2222 }
2223
2224 size_t remain_size = _memory_span_size - sizeof(span_t);
2225 heap_t *heap = (heap_t *)pointer_offset(span, sizeof(span_t));
2227
2228 // Put extra heaps as orphans
2229 size_t num_heaps = remain_size / aligned_heap_size;
2230 if (num_heaps < request_heap_count)
2231 num_heaps = request_heap_count;
2232 atomic_store32(&heap->child_count, (int32_t)num_heaps - 1);
2233 heap_t *extra_heap = (heap_t *)pointer_offset(heap, aligned_heap_size);
2234 while (num_heaps > 1) {
2235 _rpmalloc_heap_initialize(extra_heap);
2236 extra_heap->master_heap = heap;
2237 _rpmalloc_heap_orphan(extra_heap, 1);
2238 extra_heap = (heap_t *)pointer_offset(extra_heap, aligned_heap_size);
2239 --num_heaps;
2240 }
2241
2242 if (span_count > heap_span_count) {
2243 // Cap reserved spans
2244 size_t remain_count = span_count - heap_span_count;
2245 size_t reserve_count =
2247 : remain_count);
2248 span_t *remain_span =
2249 (span_t *)pointer_offset(span, heap_span_count * _memory_span_size);
2250 _rpmalloc_heap_set_reserved_spans(heap, span, remain_span, reserve_count);
2251
2252 if (remain_count > reserve_count) {
2253 // Set to global reserved spans
2254 remain_span = (span_t *)pointer_offset(remain_span,
2255 reserve_count * _memory_span_size);
2256 reserve_count = remain_count - reserve_count;
2257 _rpmalloc_global_set_reserved_spans(span, remain_span, reserve_count);
2258 }
2259 }
2260
2261 return heap;
2262}
2263
2265 heap_t *heap = *heap_list;
2266 *heap_list = (heap ? heap->next_orphan : 0);
2267 return heap;
2268}
2269
2270//! Allocate a new heap, potentially reusing a previously orphaned heap
2271static heap_t *_rpmalloc_heap_allocate(int first_class) {
2272 heap_t *heap = 0;
2275 if (first_class == 0)
2277#if RPMALLOC_FIRST_CLASS_HEAPS
2278 if (!heap)
2279 heap = _rpmalloc_heap_extract_orphan(&_memory_first_class_orphan_heaps);
2280#endif
2281 if (!heap)
2284 if (heap)
2286 return heap;
2287}
2288
2289static void _rpmalloc_heap_release(void *heapptr, int first_class,
2290 int release_cache) {
2291 heap_t *heap = (heap_t *)heapptr;
2292 if (!heap)
2293 return;
2294 // Release thread cache spans back to global cache
2296 if (release_cache || heap->finalize) {
2297#if ENABLE_THREAD_CACHE
2298 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
2299 span_cache_t *span_cache;
2300 if (!iclass)
2301 span_cache = &heap->span_cache;
2302 else
2303 span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
2304 if (!span_cache->count)
2305 continue;
2306#if ENABLE_GLOBAL_CACHE
2307 if (heap->finalize) {
2308 for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
2309 _rpmalloc_span_unmap(span_cache->span[ispan]);
2310 } else {
2311 _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count *
2312 (iclass + 1) *
2314 _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global,
2315 span_cache->count);
2316 _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1,
2317 span_cache->count);
2318 }
2319#else
2320 for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
2321 _rpmalloc_span_unmap(span_cache->span[ispan]);
2322#endif
2323 span_cache->count = 0;
2324 }
2325#endif
2326 }
2327
2328 if (get_thread_heap_raw() == heap)
2329 set_thread_heap(0);
2330
2331#if ENABLE_STATISTICS
2332 atomic_decr32(&_memory_active_heaps);
2333 rpmalloc_assert(atomic_load32(&_memory_active_heaps) >= 0,
2334 "Still active heaps during finalization");
2335#endif
2336
2337 // If we are forcibly terminating with _exit the state of the
2338 // lock atomic is unknown and it's best to just go ahead and exit
2342 }
2343 _rpmalloc_heap_orphan(heap, first_class);
2345}
2346
2347static void _rpmalloc_heap_release_raw(void *heapptr, int release_cache) {
2348 _rpmalloc_heap_release(heapptr, 0, release_cache);
2349}
2350
2351static void _rpmalloc_heap_release_raw_fc(void *heapptr) {
2352 _rpmalloc_heap_release_raw(heapptr, 1);
2353}
2354
2356 if (heap->spans_reserved) {
2357 span_t *span = _rpmalloc_span_map(heap, heap->spans_reserved);
2359 heap->spans_reserved = 0;
2360 }
2361
2363
2364 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
2365 if (heap->size_class[iclass].cache)
2366 _rpmalloc_span_unmap(heap->size_class[iclass].cache);
2367 heap->size_class[iclass].cache = 0;
2368 span_t *span = heap->size_class[iclass].partial_span;
2369 while (span) {
2370 span_t *next = span->next;
2371 _rpmalloc_span_finalize(heap, iclass, span,
2372 &heap->size_class[iclass].partial_span);
2373 span = next;
2374 }
2375 // If class still has a free list it must be a full span
2376 if (heap->size_class[iclass].free_list) {
2377 span_t *class_span =
2378 (span_t *)((uintptr_t)heap->size_class[iclass].free_list &
2380 span_t **list = 0;
2381#if RPMALLOC_FIRST_CLASS_HEAPS
2382 list = &heap->full_span[iclass];
2383#endif
2384 --heap->full_span_count;
2385 if (!_rpmalloc_span_finalize(heap, iclass, class_span, list)) {
2386 if (list)
2389 &heap->size_class[iclass].partial_span, class_span);
2390 }
2391 }
2392 }
2393
2394#if ENABLE_THREAD_CACHE
2395 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
2396 span_cache_t *span_cache;
2397 if (!iclass)
2398 span_cache = &heap->span_cache;
2399 else
2400 span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
2401 for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
2402 _rpmalloc_span_unmap(span_cache->span[ispan]);
2403 span_cache->count = 0;
2404 }
2405#endif
2407 "Heaps still active during finalization");
2408}
2409
2410////////////
2411///
2412/// Allocation entry points
2413///
2414//////
2415
2416//! Pop first block from a free list
2417static void *free_list_pop(void **list) {
2418 void *block = *list;
2419 *list = *((void **)block);
2420 return block;
2421}
2422
2423//! Allocate a small/medium sized memory block from the given heap
2425 heap_t *heap, heap_size_class_t *heap_size_class, uint32_t class_idx) {
2426 span_t *span = heap_size_class->partial_span;
2427 rpmalloc_assume(heap != 0);
2428 if (EXPECTED(span != 0)) {
2430 _memory_size_class[span->size_class].block_count,
2431 "Span block count corrupted");
2433 "Internal failure");
2434 void *block;
2435 if (span->free_list) {
2436 // Span local free list is not empty, swap to size class free list
2437 block = free_list_pop(&span->free_list);
2438 heap_size_class->free_list = span->free_list;
2439 span->free_list = 0;
2440 } else {
2441 // If the span did not fully initialize free list, link up another page
2442 // worth of blocks
2443 void *block_start = pointer_offset(
2444 span, SPAN_HEADER_SIZE +
2445 ((size_t)span->free_list_limit * span->block_size));
2447 &heap_size_class->free_list, &block,
2448 (void *)((uintptr_t)block_start & ~(_memory_page_size - 1)),
2449 block_start, span->block_count - span->free_list_limit,
2450 span->block_size);
2451 }
2453 "Span block count corrupted");
2454 span->used_count = span->free_list_limit;
2455
2456 // Swap in deferred free list if present
2459
2460 // If span is still not fully utilized keep it in partial list and early
2461 // return block
2463 return block;
2464
2465 // The span is fully utilized, unlink from partial list and add to fully
2466 // utilized list
2468 span);
2469#if RPMALLOC_FIRST_CLASS_HEAPS
2470 _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
2471#endif
2472 ++heap->full_span_count;
2473 return block;
2474 }
2475
2476 // Find a span in one of the cache levels
2477 span = _rpmalloc_heap_extract_new_span(heap, heap_size_class, 1, class_idx);
2478 if (EXPECTED(span != 0)) {
2479 // Mark span as owned by this heap and set base data, return first block
2480 return _rpmalloc_span_initialize_new(heap, heap_size_class, span,
2481 class_idx);
2482 }
2483
2484 return 0;
2485}
2486
2487//! Allocate a small sized memory block from the given heap
2488static void *_rpmalloc_allocate_small(heap_t *heap, size_t size) {
2489 rpmalloc_assert(heap, "No thread heap");
2490 // Small sizes have unique size classes
2491 const uint32_t class_idx =
2493 heap_size_class_t *heap_size_class = heap->size_class + class_idx;
2494 _rpmalloc_stat_inc_alloc(heap, class_idx);
2495 if (EXPECTED(heap_size_class->free_list != 0))
2496 return free_list_pop(&heap_size_class->free_list);
2497 return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class,
2498 class_idx);
2499}
2500
2501//! Allocate a medium sized memory block from the given heap
2502static void *_rpmalloc_allocate_medium(heap_t *heap, size_t size) {
2503 rpmalloc_assert(heap, "No thread heap");
2504 // Calculate the size class index and do a dependent lookup of the final class
2505 // index (in case of merged classes)
2506 const uint32_t base_idx =
2508 ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT));
2509 const uint32_t class_idx = _memory_size_class[base_idx].class_idx;
2510 heap_size_class_t *heap_size_class = heap->size_class + class_idx;
2511 _rpmalloc_stat_inc_alloc(heap, class_idx);
2512 if (EXPECTED(heap_size_class->free_list != 0))
2513 return free_list_pop(&heap_size_class->free_list);
2514 return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class,
2515 class_idx);
2516}
2517
2518//! Allocate a large sized memory block from the given heap
2519static void *_rpmalloc_allocate_large(heap_t *heap, size_t size) {
2520 rpmalloc_assert(heap, "No thread heap");
2521 // Calculate number of needed max sized spans (including header)
2522 // Since this function is never called if size > LARGE_SIZE_LIMIT
2523 // the span_count is guaranteed to be <= LARGE_CLASS_COUNT
2524 size += SPAN_HEADER_SIZE;
2525 size_t span_count = size >> _memory_span_size_shift;
2526 if (size & (_memory_span_size - 1))
2527 ++span_count;
2528
2529 // Find a span in one of the cache levels
2530 span_t *span =
2532 if (!span)
2533 return span;
2534
2535 // Mark span as owned by this heap and set base data
2536 rpmalloc_assert(span->span_count >= span_count, "Internal failure");
2538 span->heap = heap;
2539
2540#if RPMALLOC_FIRST_CLASS_HEAPS
2541 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
2542#endif
2543 ++heap->full_span_count;
2544
2545 return pointer_offset(span, SPAN_HEADER_SIZE);
2546}
2547
2548//! Allocate a huge block by mapping memory pages directly
2549static void *_rpmalloc_allocate_huge(heap_t *heap, size_t size) {
2550 rpmalloc_assert(heap, "No thread heap");
2552 size += SPAN_HEADER_SIZE;
2553 size_t num_pages = size >> _memory_page_size_shift;
2554 if (size & (_memory_page_size - 1))
2555 ++num_pages;
2556 size_t align_offset = 0;
2557 span_t *span =
2558 (span_t *)_rpmalloc_mmap(num_pages * _memory_page_size, &align_offset);
2559 if (!span)
2560 return span;
2561
2562 // Store page count in span_count
2564 span->span_count = (uint32_t)num_pages;
2565 span->align_offset = (uint32_t)align_offset;
2566 span->heap = heap;
2567 _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
2568
2569#if RPMALLOC_FIRST_CLASS_HEAPS
2570 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
2571#endif
2572 ++heap->full_span_count;
2573
2574 return pointer_offset(span, SPAN_HEADER_SIZE);
2575}
2576
2577//! Allocate a block of the given size
2578static void *_rpmalloc_allocate(heap_t *heap, size_t size) {
2579 _rpmalloc_stat_add64(&_allocation_counter, 1);
2580 if (EXPECTED(size <= SMALL_SIZE_LIMIT))
2581 return _rpmalloc_allocate_small(heap, size);
2582 else if (size <= _memory_medium_size_limit)
2583 return _rpmalloc_allocate_medium(heap, size);
2584 else if (size <= LARGE_SIZE_LIMIT)
2585 return _rpmalloc_allocate_large(heap, size);
2586 return _rpmalloc_allocate_huge(heap, size);
2587}
2588
2589static void *_rpmalloc_aligned_allocate(heap_t *heap, size_t alignment,
2590 size_t size) {
2591 if (alignment <= SMALL_GRANULARITY)
2592 return _rpmalloc_allocate(heap, size);
2593
2594#if ENABLE_VALIDATE_ARGS
2595 if ((size + alignment) < size) {
2596 errno = EINVAL;
2597 return 0;
2598 }
2599 if (alignment & (alignment - 1)) {
2600 errno = EINVAL;
2601 return 0;
2602 }
2603#endif
2604
2605 if ((alignment <= SPAN_HEADER_SIZE) &&
2607 // If alignment is less or equal to span header size (which is power of
2608 // two), and size aligned to span header size multiples is less than size +
2609 // alignment, then use natural alignment of blocks to provide alignment
2610 size_t multiple_size = size ? (size + (SPAN_HEADER_SIZE - 1)) &
2611 ~(uintptr_t)(SPAN_HEADER_SIZE - 1)
2613 rpmalloc_assert(!(multiple_size % SPAN_HEADER_SIZE),
2614 "Failed alignment calculation");
2615 if (multiple_size <= (size + alignment))
2616 return _rpmalloc_allocate(heap, multiple_size);
2617 }
2618
2619 void *ptr = 0;
2620 size_t align_mask = alignment - 1;
2621 if (alignment <= _memory_page_size) {
2622 ptr = _rpmalloc_allocate(heap, size + alignment);
2623 if ((uintptr_t)ptr & align_mask) {
2624 ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
2625 // Mark as having aligned blocks
2626 span_t *span = (span_t *)((uintptr_t)ptr & _memory_span_mask);
2628 }
2629 return ptr;
2630 }
2631
2632 // Fallback to mapping new pages for this request. Since pointers passed
2633 // to rpfree must be able to reach the start of the span by bitmasking of
2634 // the address with the span size, the returned aligned pointer from this
2635 // function must be with a span size of the start of the mapped area.
2636 // In worst case this requires us to loop and map pages until we get a
2637 // suitable memory address. It also means we can never align to span size
2638 // or greater, since the span header will push alignment more than one
2639 // span size away from span start (thus causing pointer mask to give us
2640 // an invalid span start on free)
2641 if (alignment & align_mask) {
2642 errno = EINVAL;
2643 return 0;
2644 }
2645 if (alignment >= _memory_span_size) {
2646 errno = EINVAL;
2647 return 0;
2648 }
2649
2650 size_t extra_pages = alignment / _memory_page_size;
2651
2652 // Since each span has a header, we will at least need one extra memory page
2653 size_t num_pages = 1 + (size / _memory_page_size);
2654 if (size & (_memory_page_size - 1))
2655 ++num_pages;
2656
2657 if (extra_pages > num_pages)
2658 num_pages = 1 + extra_pages;
2659
2660 size_t original_pages = num_pages;
2661 size_t limit_pages = (_memory_span_size / _memory_page_size) * 2;
2662 if (limit_pages < (original_pages * 2))
2663 limit_pages = original_pages * 2;
2664
2665 size_t mapped_size, align_offset;
2666 span_t *span;
2667
2668retry:
2669 align_offset = 0;
2670 mapped_size = num_pages * _memory_page_size;
2671
2672 span = (span_t *)_rpmalloc_mmap(mapped_size, &align_offset);
2673 if (!span) {
2674 errno = ENOMEM;
2675 return 0;
2676 }
2677 ptr = pointer_offset(span, SPAN_HEADER_SIZE);
2678
2679 if ((uintptr_t)ptr & align_mask)
2680 ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
2681
2682 if (((size_t)pointer_diff(ptr, span) >= _memory_span_size) ||
2683 (pointer_offset(ptr, size) > pointer_offset(span, mapped_size)) ||
2684 (((uintptr_t)ptr & _memory_span_mask) != (uintptr_t)span)) {
2685 _rpmalloc_unmap(span, mapped_size, align_offset, mapped_size);
2686 ++num_pages;
2687 if (num_pages > limit_pages) {
2688 errno = EINVAL;
2689 return 0;
2690 }
2691 goto retry;
2692 }
2693
2694 // Store page count in span_count
2696 span->span_count = (uint32_t)num_pages;
2697 span->align_offset = (uint32_t)align_offset;
2698 span->heap = heap;
2699 _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
2700
2701#if RPMALLOC_FIRST_CLASS_HEAPS
2702 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
2703#endif
2704 ++heap->full_span_count;
2705
2706 _rpmalloc_stat_add64(&_allocation_counter, 1);
2707
2708 return ptr;
2709}
2710
2711////////////
2712///
2713/// Deallocation entry points
2714///
2715//////
2716
2717//! Deallocate the given small/medium memory block in the current thread local
2718//! heap
2720 void *block) {
2721 heap_t *heap = span->heap;
2723 !heap->owner_thread || heap->finalize,
2724 "Internal failure");
2725 // Add block to free list
2727 span->used_count = span->block_count;
2728#if RPMALLOC_FIRST_CLASS_HEAPS
2729 _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class],
2730 span);
2731#endif
2733 &heap->size_class[span->size_class].partial_span, span);
2734 --heap->full_span_count;
2735 }
2736 *((void **)block) = span->free_list;
2737 --span->used_count;
2738 span->free_list = block;
2739 if (UNEXPECTED(span->used_count == span->list_size)) {
2740 // If there are no used blocks it is guaranteed that no other external
2741 // thread is accessing the span
2742 if (span->used_count) {
2743 // Make sure we have synchronized the deferred list and list size by using
2744 // acquire semantics and guarantee that no external thread is accessing
2745 // span concurrently
2746 void *free_list;
2747 do {
2750 } while (free_list == INVALID_POINTER);
2752 }
2754 &heap->size_class[span->size_class].partial_span, span);
2756 }
2757}
2758
2760 if (span->size_class != SIZE_CLASS_HUGE)
2761 _rpmalloc_stat_inc(&heap->span_use[span->span_count - 1].spans_deferred);
2762 // This list does not need ABA protection, no mutable side state
2763 do {
2764 span->free_list = (void *)atomic_load_ptr(&heap->span_free_deferred);
2765 } while (!atomic_cas_ptr(&heap->span_free_deferred, span, span->free_list));
2766}
2767
2768//! Put the block in the deferred free list of the owning span
2770 void *block) {
2771 // The memory ordering here is a bit tricky, to avoid having to ABA protect
2772 // the deferred free list to avoid desynchronization of list and list size
2773 // we need to have acquire semantics on successful CAS of the pointer to
2774 // guarantee the list_size variable validity + release semantics on pointer
2775 // store
2776 void *free_list;
2777 do {
2778 free_list =
2780 } while (free_list == INVALID_POINTER);
2781 *((void **)block) = free_list;
2782 uint32_t free_count = ++span->list_size;
2783 int all_deferred_free = (free_count == span->block_count);
2785 if (all_deferred_free) {
2786 // Span was completely freed by this block. Due to the INVALID_POINTER spin
2787 // lock no other thread can reach this state simultaneously on this span.
2788 // Safe to move to owner heap deferred cache
2790 }
2791}
2792
2795 if (span->flags & SPAN_FLAG_ALIGNED_BLOCKS) {
2796 // Realign pointer to block start
2797 void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
2798 uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
2799 p = pointer_offset(p, -(int32_t)(block_offset % span->block_size));
2800 }
2801 // Check if block belongs to this heap or if deallocation should be deferred
2802#if RPMALLOC_FIRST_CLASS_HEAPS
2803 int defer =
2804 (span->heap->owner_thread &&
2805 (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
2806#else
2807 int defer =
2808 ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
2809#endif
2810 if (!defer)
2812 else
2814}
2815
2816//! Deallocate the given large memory block to the current heap
2818 rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE, "Bad span size class");
2820 !(span->flags & SPAN_FLAG_SUBSPAN),
2821 "Span flag corrupted");
2823 (span->flags & SPAN_FLAG_SUBSPAN),
2824 "Span flag corrupted");
2825 // We must always defer (unless finalizing) if from another heap since we
2826 // cannot touch the list or counters of another heap
2827#if RPMALLOC_FIRST_CLASS_HEAPS
2828 int defer =
2829 (span->heap->owner_thread &&
2830 (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
2831#else
2832 int defer =
2833 ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
2834#endif
2835 if (defer) {
2837 return;
2838 }
2839 rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted");
2840 --span->heap->full_span_count;
2841#if RPMALLOC_FIRST_CLASS_HEAPS
2842 _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
2843#endif
2844#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
2845 // Decrease counter
2846 size_t idx = span->span_count - 1;
2847 atomic_decr32(&span->heap->span_use[idx].current);
2848#endif
2849 heap_t *heap = span->heap;
2850 rpmalloc_assert(heap, "No thread heap");
2851#if ENABLE_THREAD_CACHE
2852 const int set_as_reserved =
2853 ((span->span_count > 1) && (heap->span_cache.count == 0) &&
2854 !heap->finalize && !heap->spans_reserved);
2855#else
2856 const int set_as_reserved =
2857 ((span->span_count > 1) && !heap->finalize && !heap->spans_reserved);
2858#endif
2859 if (set_as_reserved) {
2860 heap->span_reserve = span;
2861 heap->spans_reserved = span->span_count;
2862 if (span->flags & SPAN_FLAG_MASTER) {
2863 heap->span_reserve_master = span;
2864 } else { // SPAN_FLAG_SUBSPAN
2865 span_t *master = (span_t *)pointer_offset(
2866 span,
2867 -(intptr_t)((size_t)span->offset_from_master * _memory_span_size));
2868 heap->span_reserve_master = master;
2869 rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted");
2870 rpmalloc_assert(atomic_load32(&master->remaining_spans) >=
2871 (int32_t)span->span_count,
2872 "Master span count corrupted");
2873 }
2874 _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_reserved);
2875 } else {
2876 // Insert into cache list
2877 _rpmalloc_heap_cache_insert(heap, span);
2878 }
2879}
2880
2881//! Deallocate the given huge span
2883 rpmalloc_assert(span->heap, "No span heap");
2884#if RPMALLOC_FIRST_CLASS_HEAPS
2885 int defer =
2886 (span->heap->owner_thread &&
2887 (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
2888#else
2889 int defer =
2890 ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
2891#endif
2892 if (defer) {
2894 return;
2895 }
2896 rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted");
2897 --span->heap->full_span_count;
2898#if RPMALLOC_FIRST_CLASS_HEAPS
2899 _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
2900#endif
2901
2902 // Oversized allocation, page count is stored in span_count
2903 size_t num_pages = span->span_count;
2904 _rpmalloc_unmap(span, num_pages * _memory_page_size, span->align_offset,
2905 num_pages * _memory_page_size);
2906 _rpmalloc_stat_sub(&_huge_pages_current, num_pages);
2907}
2908
2909//! Deallocate the given block
2910static void _rpmalloc_deallocate(void *p) {
2911 _rpmalloc_stat_add64(&_deallocation_counter, 1);
2912 // Grab the span (always at start of span, using span alignment)
2913 span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
2914 if (UNEXPECTED(!span))
2915 return;
2918 else if (span->size_class == SIZE_CLASS_LARGE)
2920 else
2922}
2923
2924////////////
2925///
2926/// Reallocation entry points
2927///
2928//////
2929
2930static size_t _rpmalloc_usable_size(void *p);
2931
2932//! Reallocate the given block to the given size
2933static void *_rpmalloc_reallocate(heap_t *heap, void *p, size_t size,
2934 size_t oldsize, unsigned int flags) {
2935 if (p) {
2936 // Grab the span using guaranteed span alignment
2937 span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
2938 if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
2939 // Small/medium sized block
2940 rpmalloc_assert(span->span_count == 1, "Span counter corrupted");
2941 void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
2942 uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
2943 uint32_t block_idx = block_offset / span->block_size;
2944 void *block =
2945 pointer_offset(blocks_start, (size_t)block_idx * span->block_size);
2946 if (!oldsize)
2947 oldsize =
2948 (size_t)((ptrdiff_t)span->block_size - pointer_diff(p, block));
2949 if ((size_t)span->block_size >= size) {
2950 // Still fits in block, never mind trying to save memory, but preserve
2951 // data if alignment changed
2952 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
2953 memmove(block, p, oldsize);
2954 return block;
2955 }
2956 } else if (span->size_class == SIZE_CLASS_LARGE) {
2957 // Large block
2958 size_t total_size = size + SPAN_HEADER_SIZE;
2959 size_t num_spans = total_size >> _memory_span_size_shift;
2960 if (total_size & (_memory_span_mask - 1))
2961 ++num_spans;
2962 size_t current_spans = span->span_count;
2963 void *block = pointer_offset(span, SPAN_HEADER_SIZE);
2964 if (!oldsize)
2965 oldsize = (current_spans * _memory_span_size) -
2966 (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
2967 if ((current_spans >= num_spans) && (total_size >= (oldsize / 2))) {
2968 // Still fits in block, never mind trying to save memory, but preserve
2969 // data if alignment changed
2970 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
2971 memmove(block, p, oldsize);
2972 return block;
2973 }
2974 } else {
2975 // Oversized block
2976 size_t total_size = size + SPAN_HEADER_SIZE;
2977 size_t num_pages = total_size >> _memory_page_size_shift;
2978 if (total_size & (_memory_page_size - 1))
2979 ++num_pages;
2980 // Page count is stored in span_count
2981 size_t current_pages = span->span_count;
2982 void *block = pointer_offset(span, SPAN_HEADER_SIZE);
2983 if (!oldsize)
2984 oldsize = (current_pages * _memory_page_size) -
2985 (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
2986 if ((current_pages >= num_pages) && (num_pages >= (current_pages / 2))) {
2987 // Still fits in block, never mind trying to save memory, but preserve
2988 // data if alignment changed
2989 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
2990 memmove(block, p, oldsize);
2991 return block;
2992 }
2993 }
2994 } else {
2995 oldsize = 0;
2996 }
2997
2998 if (!!(flags & RPMALLOC_GROW_OR_FAIL))
2999 return 0;
3000
3001 // Size is greater than block size, need to allocate a new block and
3002 // deallocate the old Avoid hysteresis by overallocating if increase is small
3003 // (below 37%)
3004 size_t lower_bound = oldsize + (oldsize >> 2) + (oldsize >> 3);
3005 size_t new_size =
3006 (size > lower_bound) ? size : ((size > oldsize) ? lower_bound : size);
3007 void *block = _rpmalloc_allocate(heap, new_size);
3008 if (p && block) {
3009 if (!(flags & RPMALLOC_NO_PRESERVE))
3010 memcpy(block, p, oldsize < new_size ? oldsize : new_size);
3012 }
3013
3014 return block;
3015}
3016
3017static void *_rpmalloc_aligned_reallocate(heap_t *heap, void *ptr,
3018 size_t alignment, size_t size,
3019 size_t oldsize, unsigned int flags) {
3020 if (alignment <= SMALL_GRANULARITY)
3021 return _rpmalloc_reallocate(heap, ptr, size, oldsize, flags);
3022
3023 int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL);
3024 size_t usablesize = (ptr ? _rpmalloc_usable_size(ptr) : 0);
3025 if ((usablesize >= size) && !((uintptr_t)ptr & (alignment - 1))) {
3026 if (no_alloc || (size >= (usablesize / 2)))
3027 return ptr;
3028 }
3029 // Aligned alloc marks span as having aligned blocks
3030 void *block =
3031 (!no_alloc ? _rpmalloc_aligned_allocate(heap, alignment, size) : 0);
3032 if (EXPECTED(block != 0)) {
3033 if (!(flags & RPMALLOC_NO_PRESERVE) && ptr) {
3034 if (!oldsize)
3035 oldsize = usablesize;
3036 memcpy(block, ptr, oldsize < size ? oldsize : size);
3037 }
3039 }
3040 return block;
3041}
3042
3043////////////
3044///
3045/// Initialization, finalization and utility
3046///
3047//////
3048
3049//! Get the usable size of the given block
3050static size_t _rpmalloc_usable_size(void *p) {
3051 // Grab the span using guaranteed span alignment
3052 span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
3053 if (span->size_class < SIZE_CLASS_COUNT) {
3054 // Small/medium block
3055 void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
3056 return span->block_size -
3057 ((size_t)pointer_diff(p, blocks_start) % span->block_size);
3058 }
3059 if (span->size_class == SIZE_CLASS_LARGE) {
3060 // Large block
3061 size_t current_spans = span->span_count;
3062 return (current_spans * _memory_span_size) - (size_t)pointer_diff(p, span);
3063 }
3064 // Oversized block, page count is stored in span_count
3065 size_t current_pages = span->span_count;
3066 return (current_pages * _memory_page_size) - (size_t)pointer_diff(p, span);
3067}
3068
3069//! Adjust and optimize the size class properties for the given class
3070static void _rpmalloc_adjust_size_class(size_t iclass) {
3071 size_t block_size = _memory_size_class[iclass].block_size;
3072 size_t block_count = (_memory_span_size - SPAN_HEADER_SIZE) / block_size;
3073
3074 _memory_size_class[iclass].block_count = (uint16_t)block_count;
3075 _memory_size_class[iclass].class_idx = (uint16_t)iclass;
3076
3077 // Check if previous size classes can be merged
3078 if (iclass >= SMALL_CLASS_COUNT) {
3079 size_t prevclass = iclass;
3080 while (prevclass > 0) {
3081 --prevclass;
3082 // A class can be merged if number of pages and number of blocks are equal
3083 if (_memory_size_class[prevclass].block_count ==
3084 _memory_size_class[iclass].block_count)
3086 _memory_size_class + iclass,
3087 sizeof(_memory_size_class[iclass]));
3088 else
3089 break;
3090 }
3091 }
3092}
3093
3094//! Initialize the allocator and setup global data
3095extern inline int rpmalloc_initialize(void) {
3098 return 0;
3099 }
3101}
3102
3106 return 0;
3107 }
3109
3110 if (config)
3111 memcpy(&_memory_config, config, sizeof(rpmalloc_config_t));
3112 else
3114
3115 if (!_memory_config.memory_map || !_memory_config.memory_unmap) {
3116 _memory_config.memory_map = _rpmalloc_mmap_os;
3117 _memory_config.memory_unmap = _rpmalloc_unmap_os;
3118 }
3119
3120#if PLATFORM_WINDOWS
3121 SYSTEM_INFO system_info;
3122 memset(&system_info, 0, sizeof(system_info));
3123 GetSystemInfo(&system_info);
3124 _memory_map_granularity = system_info.dwAllocationGranularity;
3125#else
3126 _memory_map_granularity = (size_t)sysconf(_SC_PAGESIZE);
3127#endif
3128
3129#if RPMALLOC_CONFIGURABLE
3131#else
3133#endif
3135 if (!_memory_page_size) {
3136#if PLATFORM_WINDOWS
3137 _memory_page_size = system_info.dwPageSize;
3138#else
3140 if (_memory_config.enable_huge_pages) {
3141#if defined(__linux__)
3142 size_t huge_page_size = 0;
3143 FILE *meminfo = fopen("/proc/meminfo", "r");
3144 if (meminfo) {
3145 char line[128];
3146 while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) {
3147 line[sizeof(line) - 1] = 0;
3148 if (strstr(line, "Hugepagesize:"))
3149 huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024;
3150 }
3151 fclose(meminfo);
3152 }
3153 if (huge_page_size) {
3155 _memory_page_size = huge_page_size;
3156 _memory_map_granularity = huge_page_size;
3157 }
3158#elif defined(__FreeBSD__)
3159 int rc;
3160 size_t sz = sizeof(rc);
3161
3162 if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 &&
3163 rc == 1) {
3164 static size_t defsize = 2 * 1024 * 1024;
3165 int nsize = 0;
3166 size_t sizes[4] = {0};
3168 _memory_page_size = defsize;
3169 if ((nsize = getpagesizes(sizes, 4)) >= 2) {
3170 nsize--;
3171 for (size_t csize = sizes[nsize]; nsize >= 0 && csize;
3172 --nsize, csize = sizes[nsize]) {
3173 //! Unlikely, but as a precaution..
3174 rpmalloc_assert(!(csize & (csize - 1)) && !(csize % 1024),
3175 "Invalid page size");
3176 if (defsize < csize) {
3177 _memory_page_size = csize;
3178 break;
3179 }
3180 }
3181 }
3183 }
3184#elif defined(__APPLE__) || defined(__NetBSD__)
3186 _memory_page_size = 2 * 1024 * 1024;
3188#endif
3189 }
3190#endif
3191 } else {
3192 if (_memory_config.enable_huge_pages)
3194 }
3195
3196#if PLATFORM_WINDOWS
3197 if (_memory_config.enable_huge_pages) {
3198 HANDLE token = 0;
3199 size_t large_page_minimum = GetLargePageMinimum();
3200 if (large_page_minimum)
3201 OpenProcessToken(GetCurrentProcess(),
3202 TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
3203 if (token) {
3204 LUID luid;
3205 if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) {
3206 TOKEN_PRIVILEGES token_privileges;
3207 memset(&token_privileges, 0, sizeof(token_privileges));
3208 token_privileges.PrivilegeCount = 1;
3209 token_privileges.Privileges[0].Luid = luid;
3210 token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
3211 if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) {
3212 if (GetLastError() == ERROR_SUCCESS)
3214 }
3215 }
3216 CloseHandle(token);
3217 }
3218 if (_memory_huge_pages) {
3219 if (large_page_minimum > _memory_page_size)
3220 _memory_page_size = large_page_minimum;
3221 if (large_page_minimum > _memory_map_granularity)
3222 _memory_map_granularity = large_page_minimum;
3223 }
3224 }
3225#endif
3226
3227 size_t min_span_size = 256;
3228 size_t max_page_size;
3229#if UINTPTR_MAX > 0xFFFFFFFF
3230 max_page_size = 4096ULL * 1024ULL * 1024ULL;
3231#else
3232 max_page_size = 4 * 1024 * 1024;
3233#endif
3234 if (_memory_page_size < min_span_size)
3235 _memory_page_size = min_span_size;
3236 if (_memory_page_size > max_page_size)
3237 _memory_page_size = max_page_size;
3239 size_t page_size_bit = _memory_page_size;
3240 while (page_size_bit != 1) {
3242 page_size_bit >>= 1;
3243 }
3245
3246#if RPMALLOC_CONFIGURABLE
3247 if (!_memory_config.span_size) {
3251 } else {
3252 size_t span_size = _memory_config.span_size;
3253 if (span_size > (256 * 1024))
3254 span_size = (256 * 1024);
3255 _memory_span_size = 4096;
3257 while (_memory_span_size < span_size) {
3258 _memory_span_size <<= 1;
3260 }
3261 _memory_span_mask = ~(uintptr_t)(_memory_span_size - 1);
3262 }
3263#endif
3264
3266 (_memory_config.span_map_count ? _memory_config.span_map_count
3276
3279 _memory_config.span_map_count = _memory_span_map_count;
3280 _memory_config.enable_huge_pages = _memory_huge_pages;
3281
3282#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || \
3283 defined(__TINYC__)
3284 if (pthread_key_create(&_memory_thread_heap, _rpmalloc_heap_release_raw_fc))
3285 return -1;
3286#endif
3287#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
3288 fls_key = FlsAlloc(&_rpmalloc_thread_destructor);
3289#endif
3290
3291 // Setup all small and medium size classes
3292 size_t iclass = 0;
3293 _memory_size_class[iclass].block_size = SMALL_GRANULARITY;
3295 for (iclass = 1; iclass < SMALL_CLASS_COUNT; ++iclass) {
3296 size_t size = iclass * SMALL_GRANULARITY;
3297 _memory_size_class[iclass].block_size = (uint32_t)size;
3299 }
3300 // At least two blocks per span, then fall back to large allocations
3304 for (iclass = 0; iclass < MEDIUM_CLASS_COUNT; ++iclass) {
3305 size_t size = SMALL_SIZE_LIMIT + ((iclass + 1) * MEDIUM_GRANULARITY);
3306 if (size > _memory_medium_size_limit) {
3309 break;
3310 }
3311 _memory_size_class[SMALL_CLASS_COUNT + iclass].block_size = (uint32_t)size;
3313 }
3314
3316#if RPMALLOC_FIRST_CLASS_HEAPS
3317 _memory_first_class_orphan_heaps = 0;
3318#endif
3319#if ENABLE_STATISTICS
3320 atomic_store32(&_memory_active_heaps, 0);
3321 atomic_store32(&_mapped_pages, 0);
3322 _mapped_pages_peak = 0;
3323 atomic_store32(&_master_spans, 0);
3324 atomic_store32(&_mapped_total, 0);
3325 atomic_store32(&_unmapped_total, 0);
3326 atomic_store32(&_mapped_pages_os, 0);
3327 atomic_store32(&_huge_pages_current, 0);
3328 _huge_pages_peak = 0;
3329#endif
3330 memset(_memory_heaps, 0, sizeof(_memory_heaps));
3332
3334
3335 // Initialize this thread
3337 return 0;
3338}
3339
3340//! Finalize the allocator
3343 // rpmalloc_dump_statistics(stdout);
3344
3351 }
3353
3354 // Free all thread caches and fully free spans
3355 for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
3356 heap_t *heap = _memory_heaps[list_idx];
3357 while (heap) {
3358 heap_t *next_heap = heap->next_heap;
3359 heap->finalize = 1;
3361 heap = next_heap;
3362 }
3363 }
3364
3365#if ENABLE_GLOBAL_CACHE
3366 // Free global caches
3367 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass)
3368 _rpmalloc_global_cache_finalize(&_memory_span_cache[iclass]);
3369#endif
3370
3371#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
3372 pthread_key_delete(_memory_thread_heap);
3373#endif
3374#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
3375 FlsFree(fls_key);
3376 fls_key = 0;
3377#endif
3378#if ENABLE_STATISTICS
3379 // If you hit these asserts you probably have memory leaks (perhaps global
3380 // scope data doing dynamic allocations) or double frees in your code
3381 rpmalloc_assert(atomic_load32(&_mapped_pages) == 0, "Memory leak detected");
3382 rpmalloc_assert(atomic_load32(&_mapped_pages_os) == 0,
3383 "Memory leak detected");
3384#endif
3385
3387}
3388
3389//! Initialize thread, assign heap
3390extern inline void rpmalloc_thread_initialize(void) {
3391 if (!get_thread_heap_raw()) {
3393 if (heap) {
3394 _rpmalloc_stat_inc(&_memory_active_heaps);
3395 set_thread_heap(heap);
3396#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
3397 FlsSetValue(fls_key, heap);
3398#endif
3399 }
3400 }
3401}
3402
3403//! Finalize thread, orphan heap
3404void rpmalloc_thread_finalize(int release_caches) {
3405 heap_t *heap = get_thread_heap_raw();
3406 if (heap)
3407 _rpmalloc_heap_release_raw(heap, release_caches);
3408 set_thread_heap(0);
3409#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
3410 FlsSetValue(fls_key, 0);
3411#endif
3412}
3413
3415 return (get_thread_heap_raw() != 0) ? 1 : 0;
3416}
3417
3419
3420// Extern interface
3421
3422extern inline RPMALLOC_ALLOCATOR void *rpmalloc(size_t size) {
3423#if ENABLE_VALIDATE_ARGS
3424 if (size >= MAX_ALLOC_SIZE) {
3425 errno = EINVAL;
3426 return 0;
3427 }
3428#endif
3429 heap_t *heap = get_thread_heap();
3430 return _rpmalloc_allocate(heap, size);
3431}
3432
3433extern inline void rpfree(void *ptr) { _rpmalloc_deallocate(ptr); }
3434
3435extern inline RPMALLOC_ALLOCATOR void *rpcalloc(size_t num, size_t size) {
3436 size_t total;
3437#if ENABLE_VALIDATE_ARGS
3438#if PLATFORM_WINDOWS
3439 int err = SizeTMult(num, size, &total);
3440 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
3441 errno = EINVAL;
3442 return 0;
3443 }
3444#else
3445 int err = __builtin_umull_overflow(num, size, &total);
3446 if (err || (total >= MAX_ALLOC_SIZE)) {
3447 errno = EINVAL;
3448 return 0;
3449 }
3450#endif
3451#else
3452 total = num * size;
3453#endif
3454 heap_t *heap = get_thread_heap();
3455 void *block = _rpmalloc_allocate(heap, total);
3456 if (block)
3457 memset(block, 0, total);
3458 return block;
3459}
3460
3461extern inline RPMALLOC_ALLOCATOR void *rprealloc(void *ptr, size_t size) {
3462#if ENABLE_VALIDATE_ARGS
3463 if (size >= MAX_ALLOC_SIZE) {
3464 errno = EINVAL;
3465 return ptr;
3466 }
3467#endif
3468 heap_t *heap = get_thread_heap();
3469 return _rpmalloc_reallocate(heap, ptr, size, 0, 0);
3470}
3471
3472extern RPMALLOC_ALLOCATOR void *rpaligned_realloc(void *ptr, size_t alignment,
3473 size_t size, size_t oldsize,
3474 unsigned int flags) {
3475#if ENABLE_VALIDATE_ARGS
3476 if ((size + alignment < size) || (alignment > _memory_page_size)) {
3477 errno = EINVAL;
3478 return 0;
3479 }
3480#endif
3481 heap_t *heap = get_thread_heap();
3482 return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, oldsize,
3483 flags);
3484}
3485
3486extern RPMALLOC_ALLOCATOR void *rpaligned_alloc(size_t alignment, size_t size) {
3487 heap_t *heap = get_thread_heap();
3488 return _rpmalloc_aligned_allocate(heap, alignment, size);
3489}
3490
3491extern inline RPMALLOC_ALLOCATOR void *
3492rpaligned_calloc(size_t alignment, size_t num, size_t size) {
3493 size_t total;
3494#if ENABLE_VALIDATE_ARGS
3495#if PLATFORM_WINDOWS
3496 int err = SizeTMult(num, size, &total);
3497 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
3498 errno = EINVAL;
3499 return 0;
3500 }
3501#else
3502 int err = __builtin_umull_overflow(num, size, &total);
3503 if (err || (total >= MAX_ALLOC_SIZE)) {
3504 errno = EINVAL;
3505 return 0;
3506 }
3507#endif
3508#else
3509 total = num * size;
3510#endif
3511 void *block = rpaligned_alloc(alignment, total);
3512 if (block)
3513 memset(block, 0, total);
3514 return block;
3515}
3516
3517extern inline RPMALLOC_ALLOCATOR void *rpmemalign(size_t alignment,
3518 size_t size) {
3519 return rpaligned_alloc(alignment, size);
3520}
3521
3522extern inline int rpposix_memalign(void **memptr, size_t alignment,
3523 size_t size) {
3524 if (memptr)
3525 *memptr = rpaligned_alloc(alignment, size);
3526 else
3527 return EINVAL;
3528 return *memptr ? 0 : ENOMEM;
3529}
3530
3531extern inline size_t rpmalloc_usable_size(void *ptr) {
3532 return (ptr ? _rpmalloc_usable_size(ptr) : 0);
3533}
3534
3535extern inline void rpmalloc_thread_collect(void) {}
3536
3538 memset(stats, 0, sizeof(rpmalloc_thread_statistics_t));
3539 heap_t *heap = get_thread_heap_raw();
3540 if (!heap)
3541 return;
3542
3543 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
3544 size_class_t *size_class = _memory_size_class + iclass;
3545 span_t *span = heap->size_class[iclass].partial_span;
3546 while (span) {
3547 size_t free_count = span->list_size;
3548 size_t block_count = size_class->block_count;
3549 if (span->free_list_limit < block_count)
3550 block_count = span->free_list_limit;
3551 free_count += (block_count - span->used_count);
3552 stats->sizecache += free_count * size_class->block_size;
3553 span = span->next;
3554 }
3555 }
3556
3557#if ENABLE_THREAD_CACHE
3558 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3559 span_cache_t *span_cache;
3560 if (!iclass)
3561 span_cache = &heap->span_cache;
3562 else
3563 span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
3564 stats->spancache += span_cache->count * (iclass + 1) * _memory_span_size;
3565 }
3566#endif
3567
3568 span_t *deferred = (span_t *)atomic_load_ptr(&heap->span_free_deferred);
3569 while (deferred) {
3570 if (deferred->size_class != SIZE_CLASS_HUGE)
3571 stats->spancache += (size_t)deferred->span_count * _memory_span_size;
3572 deferred = (span_t *)deferred->free_list;
3573 }
3574
3575#if ENABLE_STATISTICS
3576 stats->thread_to_global = (size_t)atomic_load64(&heap->thread_to_global);
3577 stats->global_to_thread = (size_t)atomic_load64(&heap->global_to_thread);
3578
3579 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3580 stats->span_use[iclass].current =
3581 (size_t)atomic_load32(&heap->span_use[iclass].current);
3582 stats->span_use[iclass].peak =
3583 (size_t)atomic_load32(&heap->span_use[iclass].high);
3584 stats->span_use[iclass].to_global =
3585 (size_t)atomic_load32(&heap->span_use[iclass].spans_to_global);
3586 stats->span_use[iclass].from_global =
3587 (size_t)atomic_load32(&heap->span_use[iclass].spans_from_global);
3588 stats->span_use[iclass].to_cache =
3589 (size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache);
3590 stats->span_use[iclass].from_cache =
3591 (size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache);
3592 stats->span_use[iclass].to_reserved =
3593 (size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved);
3594 stats->span_use[iclass].from_reserved =
3595 (size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved);
3596 stats->span_use[iclass].map_calls =
3597 (size_t)atomic_load32(&heap->span_use[iclass].spans_map_calls);
3598 }
3599 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
3600 stats->size_use[iclass].alloc_current =
3601 (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_current);
3602 stats->size_use[iclass].alloc_peak =
3603 (size_t)heap->size_class_use[iclass].alloc_peak;
3604 stats->size_use[iclass].alloc_total =
3605 (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_total);
3606 stats->size_use[iclass].free_total =
3607 (size_t)atomic_load32(&heap->size_class_use[iclass].free_total);
3608 stats->size_use[iclass].spans_to_cache =
3609 (size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache);
3610 stats->size_use[iclass].spans_from_cache =
3611 (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache);
3612 stats->size_use[iclass].spans_from_reserved = (size_t)atomic_load32(
3613 &heap->size_class_use[iclass].spans_from_reserved);
3614 stats->size_use[iclass].map_calls =
3615 (size_t)atomic_load32(&heap->size_class_use[iclass].spans_map_calls);
3616 }
3617#endif
3618}
3619
3621 memset(stats, 0, sizeof(rpmalloc_global_statistics_t));
3622#if ENABLE_STATISTICS
3623 stats->mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
3624 stats->mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
3625 stats->mapped_total =
3626 (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
3627 stats->unmapped_total =
3628 (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
3629 stats->huge_alloc =
3630 (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
3631 stats->huge_alloc_peak = (size_t)_huge_pages_peak * _memory_page_size;
3632#endif
3633#if ENABLE_GLOBAL_CACHE
3634 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3635 global_cache_t *cache = &_memory_span_cache[iclass];
3636 while (!atomic_cas32_acquire(&cache->lock, 1, 0))
3638 uint32_t count = cache->count;
3639#if ENABLE_UNLIMITED_CACHE
3640 span_t *current_span = cache->overflow;
3641 while (current_span) {
3642 ++count;
3643 current_span = current_span->next;
3644 }
3645#endif
3646 atomic_store32_release(&cache->lock, 0);
3647 stats->cached += count * (iclass + 1) * _memory_span_size;
3648 }
3649#endif
3650}
3651
3652#if ENABLE_STATISTICS
3653
3654static void _memory_heap_dump_statistics(heap_t *heap, void *file) {
3655 fprintf(file, "Heap %d stats:\n", heap->id);
3656 fprintf(file, "Class CurAlloc PeakAlloc TotAlloc TotFree BlkSize "
3657 "BlkCount SpansCur SpansPeak PeakAllocMiB ToCacheMiB "
3658 "FromCacheMiB FromReserveMiB MmapCalls\n");
3659 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
3660 if (!atomic_load32(&heap->size_class_use[iclass].alloc_total))
3661 continue;
3662 fprintf(
3663 file,
3664 "%3u: %10u %10u %10u %10u %8u %8u %8d %9d %13zu %11zu %12zu %14zu "
3665 "%9u\n",
3666 (uint32_t)iclass,
3667 atomic_load32(&heap->size_class_use[iclass].alloc_current),
3668 heap->size_class_use[iclass].alloc_peak,
3669 atomic_load32(&heap->size_class_use[iclass].alloc_total),
3670 atomic_load32(&heap->size_class_use[iclass].free_total),
3671 _memory_size_class[iclass].block_size,
3672 _memory_size_class[iclass].block_count,
3673 atomic_load32(&heap->size_class_use[iclass].spans_current),
3674 heap->size_class_use[iclass].spans_peak,
3675 ((size_t)heap->size_class_use[iclass].alloc_peak *
3676 (size_t)_memory_size_class[iclass].block_size) /
3677 (size_t)(1024 * 1024),
3678 ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache) *
3680 (size_t)(1024 * 1024),
3681 ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache) *
3683 (size_t)(1024 * 1024),
3684 ((size_t)atomic_load32(
3685 &heap->size_class_use[iclass].spans_from_reserved) *
3687 (size_t)(1024 * 1024),
3688 atomic_load32(&heap->size_class_use[iclass].spans_map_calls));
3689 }
3690 fprintf(file, "Spans Current Peak Deferred PeakMiB Cached ToCacheMiB "
3691 "FromCacheMiB ToReserveMiB FromReserveMiB ToGlobalMiB "
3692 "FromGlobalMiB MmapCalls\n");
3693 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3694 if (!atomic_load32(&heap->span_use[iclass].high) &&
3695 !atomic_load32(&heap->span_use[iclass].spans_map_calls))
3696 continue;
3697 fprintf(
3698 file,
3699 "%4u: %8d %8u %8u %8zu %7u %11zu %12zu %12zu %14zu %11zu %13zu %10u\n",
3700 (uint32_t)(iclass + 1), atomic_load32(&heap->span_use[iclass].current),
3701 atomic_load32(&heap->span_use[iclass].high),
3702 atomic_load32(&heap->span_use[iclass].spans_deferred),
3703 ((size_t)atomic_load32(&heap->span_use[iclass].high) *
3704 (size_t)_memory_span_size * (iclass + 1)) /
3705 (size_t)(1024 * 1024),
3707 (unsigned int)(!iclass ? heap->span_cache.count
3708 : heap->span_large_cache[iclass - 1].count),
3709 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache) *
3710 (iclass + 1) * _memory_span_size) /
3711 (size_t)(1024 * 1024),
3712 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache) *
3713 (iclass + 1) * _memory_span_size) /
3714 (size_t)(1024 * 1024),
3715#else
3716 0, (size_t)0, (size_t)0,
3717#endif
3718 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved) *
3719 (iclass + 1) * _memory_span_size) /
3720 (size_t)(1024 * 1024),
3721 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved) *
3722 (iclass + 1) * _memory_span_size) /
3723 (size_t)(1024 * 1024),
3724 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_global) *
3725 (size_t)_memory_span_size * (iclass + 1)) /
3726 (size_t)(1024 * 1024),
3727 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_global) *
3728 (size_t)_memory_span_size * (iclass + 1)) /
3729 (size_t)(1024 * 1024),
3730 atomic_load32(&heap->span_use[iclass].spans_map_calls));
3731 }
3732 fprintf(file, "Full spans: %zu\n", heap->full_span_count);
3733 fprintf(file, "ThreadToGlobalMiB GlobalToThreadMiB\n");
3734 fprintf(
3735 file, "%17zu %17zu\n",
3736 (size_t)atomic_load64(&heap->thread_to_global) / (size_t)(1024 * 1024),
3737 (size_t)atomic_load64(&heap->global_to_thread) / (size_t)(1024 * 1024));
3738}
3739
3740#endif
3741
3743#if ENABLE_STATISTICS
3744 for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
3745 heap_t *heap = _memory_heaps[list_idx];
3746 while (heap) {
3747 int need_dump = 0;
3748 for (size_t iclass = 0; !need_dump && (iclass < SIZE_CLASS_COUNT);
3749 ++iclass) {
3750 if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) {
3752 !atomic_load32(&heap->size_class_use[iclass].free_total),
3753 "Heap statistics counter mismatch");
3755 !atomic_load32(&heap->size_class_use[iclass].spans_map_calls),
3756 "Heap statistics counter mismatch");
3757 continue;
3758 }
3759 need_dump = 1;
3760 }
3761 for (size_t iclass = 0; !need_dump && (iclass < LARGE_CLASS_COUNT);
3762 ++iclass) {
3763 if (!atomic_load32(&heap->span_use[iclass].high) &&
3764 !atomic_load32(&heap->span_use[iclass].spans_map_calls))
3765 continue;
3766 need_dump = 1;
3767 }
3768 if (need_dump)
3769 _memory_heap_dump_statistics(heap, file);
3770 heap = heap->next_heap;
3771 }
3772 }
3773 fprintf(file, "Global stats:\n");
3774 size_t huge_current =
3775 (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
3776 size_t huge_peak = (size_t)_huge_pages_peak * _memory_page_size;
3777 fprintf(file, "HugeCurrentMiB HugePeakMiB\n");
3778 fprintf(file, "%14zu %11zu\n", huge_current / (size_t)(1024 * 1024),
3779 huge_peak / (size_t)(1024 * 1024));
3780
3781#if ENABLE_GLOBAL_CACHE
3782 fprintf(file, "GlobalCacheMiB\n");
3783 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3784 global_cache_t *cache = _memory_span_cache + iclass;
3785 size_t global_cache = (size_t)cache->count * iclass * _memory_span_size;
3786
3787 size_t global_overflow_cache = 0;
3788 span_t *span = cache->overflow;
3789 while (span) {
3790 global_overflow_cache += iclass * _memory_span_size;
3791 span = span->next;
3792 }
3793 if (global_cache || global_overflow_cache || cache->insert_count ||
3794 cache->extract_count)
3795 fprintf(file,
3796 "%4zu: %8zuMiB (%8zuMiB overflow) %14zu insert %14zu extract\n",
3797 iclass + 1, global_cache / (size_t)(1024 * 1024),
3798 global_overflow_cache / (size_t)(1024 * 1024),
3799 cache->insert_count, cache->extract_count);
3800 }
3801#endif
3802
3803 size_t mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
3804 size_t mapped_os =
3805 (size_t)atomic_load32(&_mapped_pages_os) * _memory_page_size;
3806 size_t mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
3807 size_t mapped_total =
3808 (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
3809 size_t unmapped_total =
3810 (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
3811 fprintf(
3812 file,
3813 "MappedMiB MappedOSMiB MappedPeakMiB MappedTotalMiB UnmappedTotalMiB\n");
3814 fprintf(file, "%9zu %11zu %13zu %14zu %16zu\n",
3815 mapped / (size_t)(1024 * 1024), mapped_os / (size_t)(1024 * 1024),
3816 mapped_peak / (size_t)(1024 * 1024),
3817 mapped_total / (size_t)(1024 * 1024),
3818 unmapped_total / (size_t)(1024 * 1024));
3819
3820 fprintf(file, "\n");
3821#if 0
3822 int64_t allocated = atomic_load64(&_allocation_counter);
3823 int64_t deallocated = atomic_load64(&_deallocation_counter);
3824 fprintf(file, "Allocation count: %lli\n", allocated);
3825 fprintf(file, "Deallocation count: %lli\n", deallocated);
3826 fprintf(file, "Current allocations: %lli\n", (allocated - deallocated));
3827 fprintf(file, "Master spans: %d\n", atomic_load32(&_master_spans));
3828 fprintf(file, "Dangling master spans: %d\n", atomic_load32(&_unmapped_master_spans));
3829#endif
3830#endif
3831 (void)sizeof(file);
3832}
3833
3834#if RPMALLOC_FIRST_CLASS_HEAPS
3835
3836extern inline rpmalloc_heap_t *rpmalloc_heap_acquire(void) {
3837 // Must be a pristine heap from newly mapped memory pages, or else memory
3838 // blocks could already be allocated from the heap which would (wrongly) be
3839 // released when heap is cleared with rpmalloc_heap_free_all(). Also heaps
3840 // guaranteed to be pristine from the dedicated orphan list can be used.
3842 rpmalloc_assume(heap != NULL);
3843 heap->owner_thread = 0;
3844 _rpmalloc_stat_inc(&_memory_active_heaps);
3845 return heap;
3846}
3847
3848extern inline void rpmalloc_heap_release(rpmalloc_heap_t *heap) {
3849 if (heap)
3850 _rpmalloc_heap_release(heap, 1, 1);
3851}
3852
3853extern inline RPMALLOC_ALLOCATOR void *
3854rpmalloc_heap_alloc(rpmalloc_heap_t *heap, size_t size) {
3855#if ENABLE_VALIDATE_ARGS
3856 if (size >= MAX_ALLOC_SIZE) {
3857 errno = EINVAL;
3858 return 0;
3859 }
3860#endif
3861 return _rpmalloc_allocate(heap, size);
3862}
3863
3864extern inline RPMALLOC_ALLOCATOR void *
3865rpmalloc_heap_aligned_alloc(rpmalloc_heap_t *heap, size_t alignment,
3866 size_t size) {
3867#if ENABLE_VALIDATE_ARGS
3868 if (size >= MAX_ALLOC_SIZE) {
3869 errno = EINVAL;
3870 return 0;
3871 }
3872#endif
3873 return _rpmalloc_aligned_allocate(heap, alignment, size);
3874}
3875
3876extern inline RPMALLOC_ALLOCATOR void *
3877rpmalloc_heap_calloc(rpmalloc_heap_t *heap, size_t num, size_t size) {
3878 return rpmalloc_heap_aligned_calloc(heap, 0, num, size);
3879}
3880
3881extern inline RPMALLOC_ALLOCATOR void *
3882rpmalloc_heap_aligned_calloc(rpmalloc_heap_t *heap, size_t alignment,
3883 size_t num, size_t size) {
3884 size_t total;
3885#if ENABLE_VALIDATE_ARGS
3886#if PLATFORM_WINDOWS
3887 int err = SizeTMult(num, size, &total);
3888 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
3889 errno = EINVAL;
3890 return 0;
3891 }
3892#else
3893 int err = __builtin_umull_overflow(num, size, &total);
3894 if (err || (total >= MAX_ALLOC_SIZE)) {
3895 errno = EINVAL;
3896 return 0;
3897 }
3898#endif
3899#else
3900 total = num * size;
3901#endif
3902 void *block = _rpmalloc_aligned_allocate(heap, alignment, total);
3903 if (block)
3904 memset(block, 0, total);
3905 return block;
3906}
3907
3908extern inline RPMALLOC_ALLOCATOR void *
3909rpmalloc_heap_realloc(rpmalloc_heap_t *heap, void *ptr, size_t size,
3910 unsigned int flags) {
3911#if ENABLE_VALIDATE_ARGS
3912 if (size >= MAX_ALLOC_SIZE) {
3913 errno = EINVAL;
3914 return ptr;
3915 }
3916#endif
3917 return _rpmalloc_reallocate(heap, ptr, size, 0, flags);
3918}
3919
3920extern inline RPMALLOC_ALLOCATOR void *
3921rpmalloc_heap_aligned_realloc(rpmalloc_heap_t *heap, void *ptr,
3922 size_t alignment, size_t size,
3923 unsigned int flags) {
3924#if ENABLE_VALIDATE_ARGS
3925 if ((size + alignment < size) || (alignment > _memory_page_size)) {
3926 errno = EINVAL;
3927 return 0;
3928 }
3929#endif
3930 return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, 0, flags);
3931}
3932
3933extern inline void rpmalloc_heap_free(rpmalloc_heap_t *heap, void *ptr) {
3934 (void)sizeof(heap);
3936}
3937
3938extern inline void rpmalloc_heap_free_all(rpmalloc_heap_t *heap) {
3939 span_t *span;
3940 span_t *next_span;
3941
3943
3944 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
3945 span = heap->size_class[iclass].partial_span;
3946 while (span) {
3947 next_span = span->next;
3948 _rpmalloc_heap_cache_insert(heap, span);
3949 span = next_span;
3950 }
3951 heap->size_class[iclass].partial_span = 0;
3952 span = heap->full_span[iclass];
3953 while (span) {
3954 next_span = span->next;
3955 _rpmalloc_heap_cache_insert(heap, span);
3956 span = next_span;
3957 }
3958
3959 span = heap->size_class[iclass].cache;
3960 if (span)
3961 _rpmalloc_heap_cache_insert(heap, span);
3962 heap->size_class[iclass].cache = 0;
3963 }
3964 memset(heap->size_class, 0, sizeof(heap->size_class));
3965 memset(heap->full_span, 0, sizeof(heap->full_span));
3966
3967 span = heap->large_huge_span;
3968 while (span) {
3969 next_span = span->next;
3972 else
3973 _rpmalloc_heap_cache_insert(heap, span);
3974 span = next_span;
3975 }
3976 heap->large_huge_span = 0;
3977 heap->full_span_count = 0;
3978
3979#if ENABLE_THREAD_CACHE
3980 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3981 span_cache_t *span_cache;
3982 if (!iclass)
3983 span_cache = &heap->span_cache;
3984 else
3985 span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
3986 if (!span_cache->count)
3987 continue;
3988#if ENABLE_GLOBAL_CACHE
3989 _rpmalloc_stat_add64(&heap->thread_to_global,
3990 span_cache->count * (iclass + 1) * _memory_span_size);
3991 _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global,
3992 span_cache->count);
3993 _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1,
3994 span_cache->count);
3995#else
3996 for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
3997 _rpmalloc_span_unmap(span_cache->span[ispan]);
3998#endif
3999 span_cache->count = 0;
4000 }
4001#endif
4002
4003#if ENABLE_STATISTICS
4004 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
4005 atomic_store32(&heap->size_class_use[iclass].alloc_current, 0);
4006 atomic_store32(&heap->size_class_use[iclass].spans_current, 0);
4007 }
4008 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
4009 atomic_store32(&heap->span_use[iclass].current, 0);
4010 }
4011#endif
4012}
4013
4014extern inline void rpmalloc_heap_thread_set_current(rpmalloc_heap_t *heap) {
4015 heap_t *prev_heap = get_thread_heap_raw();
4016 if (prev_heap != heap) {
4017 set_thread_heap(heap);
4018 if (prev_heap)
4019 rpmalloc_heap_release(prev_heap);
4020 }
4021}
4022
4023extern inline rpmalloc_heap_t *rpmalloc_get_heap_for_ptr(void *ptr) {
4024 // Grab the span, and then the heap from the span
4025 span_t *span = (span_t *)((uintptr_t)ptr & _memory_span_mask);
4026 if (span) {
4027 return span->heap;
4028 }
4029 return 0;
4030}
4031
4032#endif
4033
4034#if ENABLE_PRELOAD || ENABLE_OVERRIDE
4035
4036#include "malloc.c"
4037
4038#endif
4039
#define rc(i)
dot regions Print regions of function to dot file(with no function bodies)"
static const char * name
unify loop Fixup each natural loop to have a single exit block
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1975
#define SMALL_GRANULARITY
Preconfigured limits and sizes.
Definition rpmalloc.c:456
static FORCEINLINE void * atomic_exchange_ptr_acquire(atomicptr_t *dst, void *val)
Definition rpmalloc.c:375
#define LARGE_CLASS_COUNT
Number of large block size classes.
Definition rpmalloc.c:472
#define _memory_default_span_size
Global data.
Definition rpmalloc.c:762
static FORCEINLINE int64_t atomic_load64(atomic64_t *val)
Definition rpmalloc.c:360
#define _memory_span_size_shift
Definition rpmalloc.c:788
static void * _rpmalloc_allocate_from_heap_fallback(heap_t *heap, heap_size_class_t *heap_size_class, uint32_t class_idx)
Allocate a small/medium sized memory block from the given heap.
Definition rpmalloc.c:2424
static heap_t * _rpmalloc_heap_allocate(int first_class)
Allocate a new heap, potentially reusing a previously orphaned heap.
Definition rpmalloc.c:2271
#define MEDIUM_GRANULARITY
Granularity of a medium allocation block.
Definition rpmalloc.c:464
static void _rpmalloc_heap_unmap(heap_t *heap)
Definition rpmalloc.c:1878
#define SPAN_FLAG_UNMAPPED_MASTER
Flag indicating an unmapped master span.
Definition rpmalloc.c:538
static heap_t * get_thread_heap_raw(void)
Definition rpmalloc.c:877
void rpfree(void *ptr)
Free the given memory block.
Definition rpmalloc.c:3433
#define rpmalloc_assume(cond)
Definition rpmalloc.c:79
static size_t _memory_global_reserve_count
Global reserved count.
Definition rpmalloc.c:810
static void _rpmalloc_span_unmap(span_t *span)
Unmap memory pages for the given number of spans (or mark as unused if no partial unmappings)
Definition rpmalloc.c:1457
static void _rpmalloc_heap_release(void *heapptr, int first_class, int release_cache)
Definition rpmalloc.c:2289
static atomic32_t _memory_global_lock
Used to restrict access to mapping memory for huge pages.
Definition rpmalloc.c:816
static span_t * _memory_global_reserve_master
Global reserved master.
Definition rpmalloc.c:812
static size_t _memory_map_granularity
Granularity at which memory pages are mapped by OS.
Definition rpmalloc.c:777
static FORCEINLINE int32_t atomic_incr32(atomic32_t *val)
Definition rpmalloc.c:343
static FORCEINLINE int atomic_cas32_acquire(atomic32_t *dst, int32_t val, int32_t ref)
Definition rpmalloc.c:352
static void _rpmalloc_adjust_size_class(size_t iclass)
Adjust and optimize the size class properties for the given class.
Definition rpmalloc.c:3070
#define THREAD_SPAN_CACHE_TRANSFER
Number of spans to transfer between thread and global cache.
Definition rpmalloc.c:485
RPMALLOC_ALLOCATOR void * rpmalloc(size_t size)
Allocate a memory block of at least the given size.
Definition rpmalloc.c:3422
static void * _rpmalloc_aligned_reallocate(heap_t *heap, void *ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags)
Definition rpmalloc.c:3017
#define SIZE_CLASS_HUGE
Definition rpmalloc.c:510
static void _rpmalloc_heap_finalize(heap_t *heap)
Definition rpmalloc.c:2355
static span_t * _rpmalloc_span_map_aligned_count(heap_t *heap, size_t span_count)
Map an aligned set of spans, taking configured mapping granularity and the page size into account.
Definition rpmalloc.c:1369
static FORCEINLINE void atomic_store32_release(atomic32_t *dst, int32_t val)
Definition rpmalloc.c:357
int rpmalloc_initialize_config(const rpmalloc_config_t *config)
Initialize allocator with given configuration.
Definition rpmalloc.c:3103
#define THREAD_SPAN_LARGE_CACHE_TRANSFER
Number of spans to transfer between thread and global cache for large spans.
Definition rpmalloc.c:490
static void _rpmalloc_heap_global_finalize(heap_t *heap)
Definition rpmalloc.c:1891
#define SPAN_FLAG_SUBSPAN
Flag indicating span is a secondary (sub) span of a split superspan.
Definition rpmalloc.c:534
static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref)
Definition rpmalloc.c:379
static void _rpmalloc_spin(void)
Definition rpmalloc.c:949
static size_class_t _memory_size_class[SIZE_CLASS_COUNT]
Global size classes.
Definition rpmalloc.c:796
static size_t _memory_page_size
Memory page size.
Definition rpmalloc.c:773
static void _rpmalloc_heap_release_raw_fc(void *heapptr)
Definition rpmalloc.c:2351
static void * _rpmalloc_allocate(heap_t *heap, size_t size)
Allocate a block of the given size.
Definition rpmalloc.c:2578
#define TLS_MODEL
Thread local heap and ID.
Definition rpmalloc.c:866
static size_t _memory_span_map_count
Number of spans to map in each map call.
Definition rpmalloc.c:792
static void * _rpmalloc_allocate_small(heap_t *heap, size_t size)
Allocate a small sized memory block from the given heap.
Definition rpmalloc.c:2488
#define _memory_default_span_size_shift
Definition rpmalloc.c:763
static heap_t * _memory_heaps[HEAP_ARRAY_SIZE]
All heaps.
Definition rpmalloc.c:814
#define _memory_span_mask
Definition rpmalloc.c:789
#define SIZE_CLASS_LARGE
Definition rpmalloc.c:509
static span_t * _memory_global_reserve
Global reserved spans.
Definition rpmalloc.c:808
const rpmalloc_config_t * rpmalloc_config(void)
Get allocator configuration.
Definition rpmalloc.c:3418
volatile _Atomic(int32_t)
Atomic access abstraction (since MSVC does not do C11 yet)
Definition rpmalloc.c:333
static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add)
Definition rpmalloc.c:363
static void _rpmalloc_span_double_link_list_pop_head(span_t **head, span_t *span)
Pop head span from double linked list.
Definition rpmalloc.c:1271
#define _rpmalloc_stat_add64(counter, value)
Definition rpmalloc.c:434
static void set_thread_heap(heap_t *heap)
Set the current thread heap.
Definition rpmalloc.c:931
#define _rpmalloc_stat_add(counter, value)
Definition rpmalloc.c:431
static void * _rpmalloc_aligned_allocate(heap_t *heap, size_t alignment, size_t size)
Definition rpmalloc.c:2589
static int _memory_huge_pages
Huge page support.
Definition rpmalloc.c:802
static FORCEINLINE int32_t atomic_decr32(atomic32_t *val)
Definition rpmalloc.c:346
RPMALLOC_ALLOCATOR void * rpaligned_realloc(void *ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags)
Reallocate the given block to at least the given size and alignment,.
Definition rpmalloc.c:3472
#define MAX_THREAD_SPAN_LARGE_CACHE
Number of spans in thread cache for large spans (must be greater than LARGE_CLASS_COUNT / 2)
Definition rpmalloc.c:488
void rpmalloc_set_main_thread(void)
Set main thread ID.
Definition rpmalloc.c:945
#define MEDIUM_CLASS_COUNT
Number of medium block size classes.
Definition rpmalloc.c:468
static void _rpmalloc_inc_span_statistics(heap_t *heap, size_t span_count, uint32_t class_idx)
Definition rpmalloc.c:2079
RPMALLOC_ALLOCATOR void * rpcalloc(size_t num, size_t size)
Definition rpmalloc.c:3435
static void _rpmalloc_deallocate_large(span_t *span)
Deallocate the given large memory block to the current heap.
Definition rpmalloc.c:2817
#define _memory_span_size
Hardwired span size.
Definition rpmalloc.c:787
static void _rpmalloc_heap_initialize(heap_t *heap)
Definition rpmalloc.c:2154
#define _rpmalloc_stat_add_peak(counter, value, peak)
Definition rpmalloc.c:437
static void _rpmalloc_heap_orphan(heap_t *heap, int first_class)
Definition rpmalloc.c:2165
RPMALLOC_ALLOCATOR void * rpmemalign(size_t alignment, size_t size)
Allocate a memory block of at least the given size and alignment.
Definition rpmalloc.c:3517
static void _rpmalloc_deallocate_huge(span_t *)
Global cache.
Definition rpmalloc.c:2882
static void _rpmalloc_span_release_to_cache(heap_t *heap, span_t *span)
Move the span (used for small or medium allocations) to the heap thread cache.
Definition rpmalloc.c:1509
static int _rpmalloc_initialized
Initialized flag.
Definition rpmalloc.c:767
#define SPAN_HEADER_SIZE
Size of a span header (must be a multiple of SMALL_GRANULARITY and a power of two)
Definition rpmalloc.c:481
static heap_t * _rpmalloc_heap_allocate_new(void)
Allocate a new heap from newly mapped memory pages.
Definition rpmalloc.c:2179
static void * _rpmalloc_span_initialize_new(heap_t *heap, heap_size_class_t *heap_size_class, span_t *span, uint32_t class_idx)
Initialize an unused span (from cache or mapped) to be new active span, putting the initial free list...
Definition rpmalloc.c:1567
#define SMALL_GRANULARITY_SHIFT
Small granularity shift count.
Definition rpmalloc.c:458
static void _rpmalloc_set_name(void *address, size_t size)
Low level memory map/unmap.
Definition rpmalloc.c:1034
static size_t _rpmalloc_span_align_count(size_t span_count)
Get the aligned number of spans to map in based on wanted count, configured mapping granularity and t...
Definition rpmalloc.c:1344
#define LARGE_SIZE_LIMIT
Maximum size of a large block.
Definition rpmalloc.c:477
static void _rpmalloc_unmap_os(void *address, size_t size, size_t offset, size_t release)
Default implementation to unmap pages from virtual memory.
Definition rpmalloc.c:1177
#define ENABLE_THREAD_CACHE
Enable per-thread cache.
Definition rpmalloc.c:88
static void _rpmalloc_deallocate_direct_small_or_medium(span_t *span, void *block)
Deallocation entry points.
Definition rpmalloc.c:2719
static void _rpmalloc_deallocate_defer_small_or_medium(span_t *span, void *block)
Put the block in the deferred free list of the owning span.
Definition rpmalloc.c:2769
void rpmalloc_thread_collect(void)
Perform deferred deallocations pending for the calling thread heap.
Definition rpmalloc.c:3535
void rpmalloc_thread_finalize(int release_caches)
Finalize thread, orphan heap.
Definition rpmalloc.c:3404
int rpmalloc_is_thread_initialized(void)
Query if allocator is initialized for calling thread.
Definition rpmalloc.c:3414
void rpmalloc_linker_reference(void)
Dummy empty function for forcing linker symbol inclusion.
Definition rpmalloc.c:4040
#define FORCEINLINE
Platform and arch specifics.
Definition rpmalloc.c:172
void rpmalloc_dump_statistics(void *file)
Dump all statistics in human readable format to file (should be a FILE*)
Definition rpmalloc.c:3742
RPMALLOC_ALLOCATOR void * rpaligned_alloc(size_t alignment, size_t size)
Allocate a memory block of at least the given size and alignment.
Definition rpmalloc.c:3486
size_t rpmalloc_usable_size(void *ptr)
Query the usable size of the given memory block (from given pointer to the end of block)
Definition rpmalloc.c:3531
static void _rpmalloc_deallocate_small_or_medium(span_t *span, void *p)
Definition rpmalloc.c:2793
static span_t * _rpmalloc_global_get_reserved_spans(size_t span_count)
Use global reserved spans to fulfill a memory map request (reserve size must be checked by caller)
Definition rpmalloc.c:1234
#define _rpmalloc_stat_sub(counter, value)
Definition rpmalloc.c:440
static void * _rpmalloc_mmap_os(size_t size, size_t *offset)
Default implementation to map new pages to virtual memory.
Definition rpmalloc.c:1086
#define _rpmalloc_memcpy_const(x, y, s)
Definition rpmalloc.c:64
static uintptr_t get_thread_id(void)
Fast thread ID.
Definition rpmalloc.c:899
void rpmalloc_thread_initialize(void)
Initialize thread, assign heap.
Definition rpmalloc.c:3390
static span_t * _rpmalloc_heap_thread_cache_deferred_extract(heap_t *heap, size_t span_count)
Definition rpmalloc.c:2020
static void * _rpmalloc_allocate_large(heap_t *heap, size_t size)
Allocate a large sized memory block from the given heap.
Definition rpmalloc.c:2519
RPMALLOC_ALLOCATOR void * rprealloc(void *ptr, size_t size)
Reallocate the given block to at least the given size.
Definition rpmalloc.c:3461
#define EXPECTED(x)
Definition rpmalloc.c:384
static FORCEINLINE void * atomic_load_ptr(atomicptr_t *src)
Definition rpmalloc.c:366
static void _rpmalloc_span_mark_as_subspan_unless_master(span_t *master, span_t *subspan, size_t span_count)
Declare the span to be a subspan and store distance from master span and span count.
Definition rpmalloc.c:1309
static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val)
Definition rpmalloc.c:372
#define MEDIUM_GRANULARITY_SHIFT
Medium granularity shift count.
Definition rpmalloc.c:466
#define INVALID_POINTER
Definition rpmalloc.c:507
static size_t _memory_medium_size_limit
Run-time size limit of medium blocks.
Definition rpmalloc.c:798
#define SPAN_FLAG_MASTER
Flag indicating span is the first (master) span of a split superspan.
Definition rpmalloc.c:532
static void * _rpmalloc_reallocate(heap_t *heap, void *p, size_t size, size_t oldsize, unsigned int flags)
Reallocate the given block to the given size.
Definition rpmalloc.c:2933
int rpmalloc_initialize(void)
Initialize the allocator and setup global data.
Definition rpmalloc.c:3095
static span_t * _rpmalloc_heap_reserved_extract(heap_t *heap, size_t span_count)
Definition rpmalloc.c:2032
static size_t _memory_page_size_shift
Shift to divide by page size.
Definition rpmalloc.c:775
static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val)
Definition rpmalloc.c:340
static void _rpmalloc_heap_release_raw(void *heapptr, int release_cache)
Definition rpmalloc.c:2347
static void _rpmalloc_global_set_reserved_spans(span_t *master, span_t *reserve, size_t reserve_span_count)
Store the given spans as global reserve (must only be called from within new heap allocation,...
Definition rpmalloc.c:1249
#define SIZE_CLASS_COUNT
Total number of small + medium size classes.
Definition rpmalloc.c:470
static int _rpmalloc_span_finalize(heap_t *heap, size_t iclass, span_t *span, span_t **list_head)
Definition rpmalloc.c:1622
#define _rpmalloc_stat_dec(counter)
Definition rpmalloc.c:428
static uintptr_t _rpmalloc_main_thread_id
Main thread ID.
Definition rpmalloc.c:769
#define _rpmalloc_stat_inc_free(heap, class_idx)
Definition rpmalloc.c:446
static span_t * _rpmalloc_heap_thread_cache_extract(heap_t *heap, size_t span_count)
Extract the given number of spans from the different cache levels.
Definition rpmalloc.c:2003
#define pointer_offset(ptr, ofs)
Definition rpmalloc.c:503
#define _rpmalloc_stat_inc(counter)
Statistics related functions (evaluate to nothing when statistics not enabled)
Definition rpmalloc.c:425
#define _rpmalloc_memset_const(x, y, s)
Definition rpmalloc.c:65
static void * _rpmalloc_allocate_huge(heap_t *heap, size_t size)
Allocate a huge block by mapping memory pages directly.
Definition rpmalloc.c:2549
#define _rpmalloc_stat_inc_alloc(heap, class_idx)
Definition rpmalloc.c:443
static void _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span)
Span control.
Definition rpmalloc.c:1940
static span_t * _rpmalloc_heap_global_cache_extract(heap_t *heap, size_t span_count)
Extract a span from the global cache.
Definition rpmalloc.c:2040
static void _rpmalloc_unmap(void *address, size_t size, size_t offset, size_t release)
Unmap virtual memory.
Definition rpmalloc.c:1072
static void _rpmalloc_span_double_link_list_add(span_t **head, span_t *span)
Span linked list management.
Definition rpmalloc.c:1263
static void * _rpmalloc_allocate_medium(heap_t *heap, size_t size)
Allocate a medium sized memory block from the given heap.
Definition rpmalloc.c:2502
#define HEAP_ARRAY_SIZE
Size of heap hashmap.
Definition rpmalloc.c:84
static void _rpmalloc_deallocate_defer_free_span(heap_t *heap, span_t *span)
Definition rpmalloc.c:2759
static void * free_list_pop(void **list)
Allocation entry points.
Definition rpmalloc.c:2417
void rpmalloc_thread_statistics(rpmalloc_thread_statistics_t *stats)
Get per-thread statistics.
Definition rpmalloc.c:3537
static heap_t * _rpmalloc_heap_extract_orphan(heap_t **heap_list)
Definition rpmalloc.c:2264
static span_t * _rpmalloc_heap_extract_new_span(heap_t *heap, heap_size_class_t *heap_size_class, size_t span_count, uint32_t class_idx)
Get a span from one of the cache levels (thread cache, reserved, global cache) or fallback to mapping...
Definition rpmalloc.c:2098
#define rpmalloc_assert(truth, message)
Definition rpmalloc.c:258
#define SPAN_FLAG_ALIGNED_BLOCKS
Flag indicating span has blocks with increased alignment.
Definition rpmalloc.c:536
static void _rpmalloc_span_initialize(span_t *span, size_t total_span_count, size_t span_count, size_t align_offset)
Setup a newly mapped span.
Definition rpmalloc.c:1356
#define _memory_default_span_mask
Definition rpmalloc.c:764
static size_t _memory_heap_reserve_count
Number of spans to keep reserved in each heap.
Definition rpmalloc.c:794
static size_t _rpmalloc_usable_size(void *p)
Reallocation entry points.
Definition rpmalloc.c:3050
static uint32_t free_list_partial_init(void **list, void **first_block, void *page_start, void *block_start, uint32_t block_count, uint32_t block_size)
Initialize a (partial) free list up to next system memory page, while reserving the first block as al...
Definition rpmalloc.c:1532
static int _rpmalloc_span_is_fully_utilized(span_t *span)
Definition rpmalloc.c:1616
static rpmalloc_config_t _memory_config
Configuration.
Definition rpmalloc.c:771
static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val)
Definition rpmalloc.c:369
static void _rpmalloc_deallocate(void *p)
Deallocate the given block.
Definition rpmalloc.c:2910
static heap_t * _memory_orphan_heaps
Orphaned heaps.
Definition rpmalloc.c:818
static span_t * _rpmalloc_span_map(heap_t *heap, size_t span_count)
Map in memory pages for the given number of spans (or use previously reserved pages)
Definition rpmalloc.c:1418
static void _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master, span_t *reserve, size_t reserve_span_count)
Store the given spans as reserve in the given heap.
Definition rpmalloc.c:1823
RPMALLOC_ALLOCATOR void * rpaligned_calloc(size_t alignment, size_t num, size_t size)
Definition rpmalloc.c:3492
void rpmalloc_global_statistics(rpmalloc_global_statistics_t *stats)
Get global statistics.
Definition rpmalloc.c:3620
int rpposix_memalign(void **memptr, size_t alignment, size_t size)
Allocate a memory block of at least the given size and alignment.
Definition rpmalloc.c:3522
#define MAX_THREAD_SPAN_CACHE
Number of spans in thread cache.
Definition rpmalloc.c:483
static void _rpmalloc_span_double_link_list_remove(span_t **head, span_t *span)
Remove a span from double linked list.
Definition rpmalloc.c:1279
#define GLOBAL_CACHE_MULTIPLIER
Multiplier for global cache.
Definition rpmalloc.c:133
#define MEDIUM_SIZE_LIMIT
Maximum size of a medium block.
Definition rpmalloc.c:474
#define SMALL_SIZE_LIMIT
Maximum size of a small block.
Definition rpmalloc.c:462
static void _rpmalloc_span_extract_free_list_deferred(span_t *span)
Definition rpmalloc.c:1603
struct span_list_t span_list_t
Span list.
Definition rpmalloc.c:523
static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add)
Definition rpmalloc.c:349
static atomic32_t _memory_heap_id
Heap ID counter.
Definition rpmalloc.c:800
static void _rpmalloc_heap_cache_adopt_deferred(heap_t *heap, span_t **single_span)
Adopt the deferred span cache list, optionally extracting the first single span for immediate re-use.
Definition rpmalloc.c:1833
#define pointer_diff(first, second)
Definition rpmalloc.c:504
struct span_active_t span_active_t
Span active data.
Definition rpmalloc.c:525
#define DEFAULT_SPAN_MAP_COUNT
Default number of spans to map in call to map more virtual memory (default values yield 4MiB here)
Definition rpmalloc.c:129
#define UNEXPECTED(x)
Definition rpmalloc.c:385
static span_t * _rpmalloc_span_map_from_reserve(heap_t *heap, size_t span_count)
Use reserved spans to fulfill a memory map request (reserve size must be checked by caller)
Definition rpmalloc.c:1326
static void * _rpmalloc_mmap(size_t size, size_t *offset)
Map more virtual memory.
Definition rpmalloc.c:1054
static heap_t * get_thread_heap(void)
Get the current thread heap.
Definition rpmalloc.c:886
#define ENABLE_UNLIMITED_CACHE
Enable unlimited global cache (no unmapping until finalization)
Definition rpmalloc.c:120
#define SMALL_CLASS_COUNT
Number of small block size classes.
Definition rpmalloc.c:460
void rpmalloc_finalize(void)
Finalize the allocator.
Definition rpmalloc.c:3341
#define RPMALLOC_ALLOCATOR
Definition rpmalloc.h:46
#define RPMALLOC_GROW_OR_FAIL
Flag to rpaligned_realloc to fail and return null pointer if grow cannot be done in-place,...
Definition rpmalloc.h:73
#define RPMALLOC_NO_PRESERVE
Flag to rpaligned_realloc to not preserve content in reallocation.
Definition rpmalloc.h:68
uint32_t count
Cache count.
Definition rpmalloc.c:742
span_t * overflow
Unlimited cache overflow.
Definition rpmalloc.c:752
atomic32_t lock
Cache lock.
Definition rpmalloc.c:740
span_t * span[GLOBAL_CACHE_MULTIPLIER *MAX_THREAD_SPAN_CACHE]
Cached spans.
Definition rpmalloc.c:750
span_t * cache
Early level cache of fully free spans.
Definition rpmalloc.c:664
void * free_list
Free list of active span.
Definition rpmalloc.c:659
span_t * partial_span
Double linked list of partially used spans with free blocks.
Definition rpmalloc.c:662
int finalize
Finalization state flag.
Definition rpmalloc.c:698
int32_t id
Heap ID.
Definition rpmalloc.c:696
heap_t * master_heap
Master heap owning the memory pages.
Definition rpmalloc.c:700
atomicptr_t span_free_deferred
List of deferred free spans (single linked list)
Definition rpmalloc.c:680
uintptr_t owner_thread
Owning thread ID.
Definition rpmalloc.c:672
atomic32_t child_count
Child count.
Definition rpmalloc.c:690
size_t full_span_count
Number of full spans.
Definition rpmalloc.c:682
heap_size_class_t size_class[SIZE_CLASS_COUNT]
Free lists for each size class.
Definition rpmalloc.c:674
heap_t * next_heap
Next heap in id list.
Definition rpmalloc.c:692
heap_t * next_orphan
Next heap in orphan list.
Definition rpmalloc.c:694
uint32_t spans_reserved
Number of mapped but unused spans.
Definition rpmalloc.c:688
span_t * span_reserve
Mapped but unused spans.
Definition rpmalloc.c:684
span_t * span_reserve_master
Master span for mapped but unused spans.
Definition rpmalloc.c:686
uint32_t block_size
Size of blocks in this class.
Definition rpmalloc.c:730
uint16_t class_idx
Class index this class is merged with.
Definition rpmalloc.c:734
uint16_t block_count
Number of blocks in each chunk.
Definition rpmalloc.c:732
size_t count
Definition rpmalloc.c:646
span_t * span[MAX_THREAD_SPAN_CACHE]
Definition rpmalloc.c:647
span_t * span[MAX_THREAD_SPAN_LARGE_CACHE]
Definition rpmalloc.c:653
uint32_t offset_from_master
Offset from master span for subspans.
Definition rpmalloc.c:631
uint32_t align_offset
Alignment offset.
Definition rpmalloc.c:635
atomicptr_t free_list_deferred
Deferred free list.
Definition rpmalloc.c:619
heap_t * heap
Owning heap.
Definition rpmalloc.c:637
span_t * prev
Previous span.
Definition rpmalloc.c:641
atomic32_t remaining_spans
Remaining span counter, for master spans.
Definition rpmalloc.c:633
uint32_t flags
Flags and counters.
Definition rpmalloc.c:625
uint32_t size_class
Size class.
Definition rpmalloc.c:613
uint32_t block_count
Total block count of size class.
Definition rpmalloc.c:611
void * free_list
Free list.
Definition rpmalloc.c:609
uint32_t block_size
Size of a block.
Definition rpmalloc.c:623
uint32_t used_count
Number of used blocks remaining when in partial state.
Definition rpmalloc.c:617
span_t * next
Next span.
Definition rpmalloc.c:639
uint32_t list_size
Size of deferred free list, or list of spans when part of a cache list.
Definition rpmalloc.c:621
uint32_t span_count
Number of spans.
Definition rpmalloc.c:627
uint32_t free_list_limit
Index of last block initialized in free list.
Definition rpmalloc.c:615
uint32_t total_spans
Total span counter for master spans.
Definition rpmalloc.c:629