diff options
Diffstat (limited to 'externals/jemalloc/include/internal/arena.h')
-rw-r--r-- | externals/jemalloc/include/internal/arena.h | 537 |
1 files changed, 537 insertions, 0 deletions
diff --git a/externals/jemalloc/include/internal/arena.h b/externals/jemalloc/include/internal/arena.h new file mode 100644 index 00000000000..bb4ce2a54f7 --- /dev/null +++ b/externals/jemalloc/include/internal/arena.h @@ -0,0 +1,537 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * Subpages are an artificially designated partitioning of pages. Their only + * purpose is to support subpage-spaced size classes. + * + * There must be at least 4 subpages per page, due to the way size classes are + * handled. + */ +#define LG_SUBPAGE 8 +#define SUBPAGE ((size_t)(1U << LG_SUBPAGE)) +#define SUBPAGE_MASK (SUBPAGE - 1) + +/* Return the smallest subpage multiple that is >= s. */ +#define SUBPAGE_CEILING(s) \ + (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK) + +#ifdef JEMALLOC_TINY + /* Smallest size class to support. */ +# define LG_TINY_MIN LG_SIZEOF_PTR +#endif + +/* + * Maximum size class that is a multiple of the quantum, but not (necessarily) + * a power of 2. Above this size, allocations are rounded up to the nearest + * power of 2. + */ +#define LG_QSPACE_MAX_DEFAULT 7 + +/* + * Maximum size class that is a multiple of the cacheline, but not (necessarily) + * a power of 2. Above this size, allocations are rounded up to the nearest + * power of 2. + */ +#define LG_CSPACE_MAX_DEFAULT 9 + +/* + * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized + * as small as possible such that this setting is still honored, without + * violating other constraints. The goal is to make runs as small as possible + * without exceeding a per run external fragmentation threshold. + * + * We use binary fixed point math for overhead computations, where the binary + * point is implicitly RUN_BFP bits to the left. + * + * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be + * honored for some/all object sizes, since there is one bit of header overhead + * per object (plus a constant). This constraint is relaxed (ignored) for runs + * that are so small that the per-region overhead is greater than: + * + * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP)) + */ +#define RUN_BFP 12 +/* \/ Implicit binary fixed point. */ +#define RUN_MAX_OVRHD 0x0000003dU +#define RUN_MAX_OVRHD_RELAX 0x00001800U + +/* + * The minimum ratio of active:dirty pages per arena is computed as: + * + * (nactive >> opt_lg_dirty_mult) >= ndirty + * + * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32 + * times as many active pages as dirty pages. + */ +#define LG_DIRTY_MULT_DEFAULT 5 + +typedef struct arena_chunk_map_s arena_chunk_map_t; +typedef struct arena_chunk_s arena_chunk_t; +typedef struct arena_run_s arena_run_t; +typedef struct arena_bin_s arena_bin_t; +typedef struct arena_s arena_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +/* Each element of the chunk map corresponds to one page within the chunk. */ +struct arena_chunk_map_s { + union { + /* + * Linkage for run trees. There are two disjoint uses: + * + * 1) arena_t's runs_avail_{clean,dirty} trees. + * 2) arena_run_t conceptually uses this linkage for in-use + * non-full runs, rather than directly embedding linkage. + */ + rb_node(arena_chunk_map_t) rb_link; + /* + * List of runs currently in purgatory. arena_chunk_purge() + * temporarily allocates runs that contain dirty pages while + * purging, so that other threads cannot use the runs while the + * purging thread is operating without the arena lock held. + */ + ql_elm(arena_chunk_map_t) ql_link; + } u; + +#ifdef JEMALLOC_PROF + /* Profile counters, used for large object runs. */ + prof_thr_cnt_t *prof_cnt; +#endif + + /* + * Run address (or size) and various flags are stored together. The bit + * layout looks like (assuming 32-bit system): + * + * ???????? ???????? ????---- ----dzla + * + * ? : Unallocated: Run address for first/last pages, unset for internal + * pages. + * Small: Run page offset. + * Large: Run size for first page, unset for trailing pages. + * - : Unused. + * d : dirty? + * z : zeroed? + * l : large? + * a : allocated? + * + * Following are example bit patterns for the three types of runs. + * + * p : run page offset + * s : run size + * c : size class (used only if prof_promote is true) + * x : don't care + * - : 0 + * + : 1 + * [DZLA] : bit set + * [dzla] : bit unset + * + * Unallocated (clean): + * ssssssss ssssssss ssss---- ----dz-- + * xxxxxxxx xxxxxxxx xxxx---- -----Zxx + * ssssssss ssssssss ssss---- ----dZ-- + * + * Unallocated (dirty): + * ssssssss ssssssss ssss---- ----D--- + * xxxxxxxx xxxxxxxx xxxx---- ----xxxx + * ssssssss ssssssss ssss---- ----D--- + * + * Small: + * pppppppp pppppppp pppp---- ----d--a + * pppppppp pppppppp pppp---- -------a + * pppppppp pppppppp pppp---- ----d--a + * + * Large: + * ssssssss ssssssss ssss++++ ++++D-la + * xxxxxxxx xxxxxxxx xxxx---- ----xxxx + * -------- -------- -------- ----D-la + * + * Large (sampled, size <= PAGE_SIZE): + * ssssssss ssssssss sssscccc ccccD-la + * + * Large (not sampled, size == PAGE_SIZE): + * ssssssss ssssssss ssss++++ ++++D-la + */ + size_t bits; +#ifdef JEMALLOC_PROF +#define CHUNK_MAP_CLASS_SHIFT 4 +#define CHUNK_MAP_CLASS_MASK ((size_t)0xff0U) +#endif +#define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU) +#define CHUNK_MAP_DIRTY ((size_t)0x8U) +#define CHUNK_MAP_ZEROED ((size_t)0x4U) +#define CHUNK_MAP_LARGE ((size_t)0x2U) +#define CHUNK_MAP_ALLOCATED ((size_t)0x1U) +#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED +}; +typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t; +typedef rb_tree(arena_chunk_map_t) arena_run_tree_t; + +/* Arena chunk header. */ +struct arena_chunk_s { + /* Arena that owns the chunk. */ + arena_t *arena; + + /* Linkage for the arena's chunks_dirty list. */ + ql_elm(arena_chunk_t) link_dirty; + + /* + * True if the chunk is currently in the chunks_dirty list, due to + * having at some point contained one or more dirty pages. Removal + * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible. + */ + bool dirtied; + + /* Number of dirty pages. */ + size_t ndirty; + + /* Map of pages within chunk that keeps track of free/large/small. */ + arena_chunk_map_t map[1]; /* Dynamically sized. */ +}; +typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; + +struct arena_run_s { +#ifdef JEMALLOC_DEBUG + uint32_t magic; +# define ARENA_RUN_MAGIC 0x384adf93 +#endif + + /* Bin this run is associated with. */ + arena_bin_t *bin; + + /* Stack of available freed regions, or NULL. */ + void *avail; + + /* Next region that has never been allocated, or run boundary. */ + void *next; + + /* Number of free regions in run. */ + unsigned nfree; +}; + +struct arena_bin_s { + /* + * All operations on runcur, runs, and stats require that lock be + * locked. Run allocation/deallocation are protected by the arena lock, + * which may be acquired while holding one or more bin locks, but not + * vise versa. + */ + malloc_mutex_t lock; + + /* + * Current run being used to service allocations of this bin's size + * class. + */ + arena_run_t *runcur; + + /* + * Tree of non-full runs. This tree is used when looking for an + * existing run when runcur is no longer usable. We choose the + * non-full run that is lowest in memory; this policy tends to keep + * objects packed well, and it can also help reduce the number of + * almost-empty chunks. + */ + arena_run_tree_t runs; + + /* Size of regions in a run for this bin's size class. */ + size_t reg_size; + + /* Total size of a run for this bin's size class. */ + size_t run_size; + + /* Total number of regions in a run for this bin's size class. */ + uint32_t nregs; + +#ifdef JEMALLOC_PROF + /* + * Offset of first (prof_cnt_t *) in a run header for this bin's size + * class, or 0 if (opt_prof == false). + */ + uint32_t cnt0_offset; +#endif + + /* Offset of first region in a run for this bin's size class. */ + uint32_t reg0_offset; + +#ifdef JEMALLOC_STATS + /* Bin statistics. */ + malloc_bin_stats_t stats; +#endif +}; + +struct arena_s { +#ifdef JEMALLOC_DEBUG + uint32_t magic; +# define ARENA_MAGIC 0x947d3d24 +#endif + + /* This arena's index within the arenas array. */ + unsigned ind; + + /* + * All non-bin-related operations on this arena require that lock be + * locked. + */ + malloc_mutex_t lock; + +#ifdef JEMALLOC_STATS + arena_stats_t stats; +# ifdef JEMALLOC_TCACHE + /* + * List of tcaches for extant threads associated with this arena. + * Stats from these are merged incrementally, and at exit. + */ + ql_head(tcache_t) tcache_ql; +# endif +#endif + +#ifdef JEMALLOC_PROF + uint64_t prof_accumbytes; +#endif + + /* List of dirty-page-containing chunks this arena manages. */ + ql_head(arena_chunk_t) chunks_dirty; + + /* + * In order to avoid rapid chunk allocation/deallocation when an arena + * oscillates right on the cusp of needing a new chunk, cache the most + * recently freed chunk. The spare is left in the arena's chunk trees + * until it is deleted. + * + * There is one spare chunk per arena, rather than one spare total, in + * order to avoid interactions between multiple threads that could make + * a single spare inadequate. + */ + arena_chunk_t *spare; + + /* Number of pages in active runs. */ + size_t nactive; + + /* + * Current count of pages within unused runs that are potentially + * dirty, and for which madvise(... MADV_DONTNEED) has not been called. + * By tracking this, we can institute a limit on how much dirty unused + * memory is mapped for each arena. + */ + size_t ndirty; + + /* + * Approximate number of pages being purged. It is possible for + * multiple threads to purge dirty pages concurrently, and they use + * npurgatory to indicate the total number of pages all threads are + * attempting to purge. + */ + size_t npurgatory; + + /* + * Size/address-ordered trees of this arena's available runs. The trees + * are used for first-best-fit run allocation. The dirty tree contains + * runs with dirty pages (i.e. very likely to have been touched and + * therefore have associated physical pages), whereas the clean tree + * contains runs with pages that either have no associated physical + * pages, or have pages that the kernel may recycle at any time due to + * previous madvise(2) calls. The dirty tree is used in preference to + * the clean tree for allocations, because using dirty pages reduces + * the amount of dirty purging necessary to keep the active:dirty page + * ratio below the purge threshold. + */ + arena_avail_tree_t runs_avail_clean; + arena_avail_tree_t runs_avail_dirty; + + /* + * bins is used to store trees of free regions of the following sizes, + * assuming a 16-byte quantum, 4 KiB page size, and default + * JEMALLOC_OPTIONS. + * + * bins[i] | size | + * --------+--------+ + * 0 | 2 | + * 1 | 4 | + * 2 | 8 | + * --------+--------+ + * 3 | 16 | + * 4 | 32 | + * 5 | 48 | + * : : + * 8 | 96 | + * 9 | 112 | + * 10 | 128 | + * --------+--------+ + * 11 | 192 | + * 12 | 256 | + * 13 | 320 | + * 14 | 384 | + * 15 | 448 | + * 16 | 512 | + * --------+--------+ + * 17 | 768 | + * 18 | 1024 | + * 19 | 1280 | + * : : + * 27 | 3328 | + * 28 | 3584 | + * 29 | 3840 | + * --------+--------+ + * 30 | 4 KiB | + * 31 | 6 KiB | + * 33 | 8 KiB | + * : : + * 43 | 28 KiB | + * 44 | 30 KiB | + * 45 | 32 KiB | + * --------+--------+ + */ + arena_bin_t bins[1]; /* Dynamically sized. */ +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern size_t opt_lg_qspace_max; +extern size_t opt_lg_cspace_max; +extern ssize_t opt_lg_dirty_mult; +extern uint8_t const *small_size2bin; + +/* Various bin-related settings. */ +#ifdef JEMALLOC_TINY /* Number of (2^n)-spaced tiny bins. */ +# define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN)) +#else +# define ntbins 0 +#endif +extern unsigned nqbins; /* Number of quantum-spaced bins. */ +extern unsigned ncbins; /* Number of cacheline-spaced bins. */ +extern unsigned nsbins; /* Number of subpage-spaced bins. */ +extern unsigned nbins; +#ifdef JEMALLOC_TINY +# define tspace_max ((size_t)(QUANTUM >> 1)) +#endif +#define qspace_min QUANTUM +extern size_t qspace_max; +extern size_t cspace_min; +extern size_t cspace_max; +extern size_t sspace_min; +extern size_t sspace_max; +#define small_maxclass sspace_max + +#define nlclasses (chunk_npages - arena_chunk_header_npages) + +#ifdef JEMALLOC_TCACHE +void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, + size_t binind +# ifdef JEMALLOC_PROF + , uint64_t prof_accumbytes +# endif + ); +#endif +#ifdef JEMALLOC_PROF +void arena_prof_accum(arena_t *arena, uint64_t accumbytes); +#endif +void *arena_malloc_small(arena_t *arena, size_t size, bool zero); +void *arena_malloc_large(arena_t *arena, size_t size, bool zero); +void *arena_malloc(size_t size, bool zero); +void *arena_palloc(arena_t *arena, size_t alignment, size_t size, + size_t alloc_size); +size_t arena_salloc(const void *ptr); +#ifdef JEMALLOC_PROF +void arena_prof_promoted(const void *ptr, size_t size); +size_t arena_salloc_demote(const void *ptr); +prof_thr_cnt_t *arena_prof_cnt_get(const void *ptr); +void arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt); +#endif +void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, + arena_chunk_map_t *mapelm); +void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); +#ifdef JEMALLOC_STATS +void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, + arena_stats_t *astats, malloc_bin_stats_t *bstats, + malloc_large_stats_t *lstats); +#endif +void *arena_ralloc(void *ptr, size_t size, size_t oldsize); +bool arena_new(arena_t *arena, unsigned ind); +bool arena_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) +JEMALLOC_INLINE void +arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) +{ + size_t pageind; + arena_chunk_map_t *mapelm; + + assert(arena != NULL); + assert(arena->magic == ARENA_MAGIC); + assert(chunk->arena == arena); + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); + mapelm = &chunk->map[pageind]; + assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { + /* Small allocation. */ +#ifdef JEMALLOC_TCACHE + tcache_t *tcache; + + if ((tcache = tcache_get()) != NULL) + tcache_dalloc_small(tcache, ptr); + else { +#endif + arena_run_t *run; + arena_bin_t *bin; + + run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapelm->bits >> + PAGE_SHIFT)) << PAGE_SHIFT)); + assert(run->magic == ARENA_RUN_MAGIC); + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)run->bin->reg0_offset)) % + run->bin->reg_size == 0); + bin = run->bin; + malloc_mutex_lock(&bin->lock); + arena_dalloc_bin(arena, chunk, ptr, mapelm); + malloc_mutex_unlock(&bin->lock); +#ifdef JEMALLOC_TCACHE + } +#endif + } else { +#ifdef JEMALLOC_TCACHE + size_t size = mapelm->bits & ~PAGE_MASK; + + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + if (size <= tcache_maxclass) { + tcache_t *tcache; + + if ((tcache = tcache_get()) != NULL) + tcache_dalloc_large(tcache, ptr, size); + else { + malloc_mutex_lock(&arena->lock); + arena_dalloc_large(arena, chunk, ptr); + malloc_mutex_unlock(&arena->lock); + } + } else { + malloc_mutex_lock(&arena->lock); + arena_dalloc_large(arena, chunk, ptr); + malloc_mutex_unlock(&arena->lock); + } +#else + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + malloc_mutex_lock(&arena->lock); + arena_dalloc_large(arena, chunk, ptr); + malloc_mutex_unlock(&arena->lock); +#endif + } +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ |