aboutsummaryrefslogtreecommitdiff
path: root/externals/jemalloc/include/internal/arena.h
diff options
context:
space:
mode:
Diffstat (limited to 'externals/jemalloc/include/internal/arena.h')
-rw-r--r--externals/jemalloc/include/internal/arena.h537
1 files changed, 537 insertions, 0 deletions
diff --git a/externals/jemalloc/include/internal/arena.h b/externals/jemalloc/include/internal/arena.h
new file mode 100644
index 00000000000..bb4ce2a54f7
--- /dev/null
+++ b/externals/jemalloc/include/internal/arena.h
@@ -0,0 +1,537 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/*
+ * Subpages are an artificially designated partitioning of pages. Their only
+ * purpose is to support subpage-spaced size classes.
+ *
+ * There must be at least 4 subpages per page, due to the way size classes are
+ * handled.
+ */
+#define LG_SUBPAGE 8
+#define SUBPAGE ((size_t)(1U << LG_SUBPAGE))
+#define SUBPAGE_MASK (SUBPAGE - 1)
+
+/* Return the smallest subpage multiple that is >= s. */
+#define SUBPAGE_CEILING(s) \
+ (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)
+
+#ifdef JEMALLOC_TINY
+ /* Smallest size class to support. */
+# define LG_TINY_MIN LG_SIZEOF_PTR
+#endif
+
+/*
+ * Maximum size class that is a multiple of the quantum, but not (necessarily)
+ * a power of 2. Above this size, allocations are rounded up to the nearest
+ * power of 2.
+ */
+#define LG_QSPACE_MAX_DEFAULT 7
+
+/*
+ * Maximum size class that is a multiple of the cacheline, but not (necessarily)
+ * a power of 2. Above this size, allocations are rounded up to the nearest
+ * power of 2.
+ */
+#define LG_CSPACE_MAX_DEFAULT 9
+
+/*
+ * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized
+ * as small as possible such that this setting is still honored, without
+ * violating other constraints. The goal is to make runs as small as possible
+ * without exceeding a per run external fragmentation threshold.
+ *
+ * We use binary fixed point math for overhead computations, where the binary
+ * point is implicitly RUN_BFP bits to the left.
+ *
+ * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
+ * honored for some/all object sizes, since there is one bit of header overhead
+ * per object (plus a constant). This constraint is relaxed (ignored) for runs
+ * that are so small that the per-region overhead is greater than:
+ *
+ * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
+ */
+#define RUN_BFP 12
+/* \/ Implicit binary fixed point. */
+#define RUN_MAX_OVRHD 0x0000003dU
+#define RUN_MAX_OVRHD_RELAX 0x00001800U
+
+/*
+ * The minimum ratio of active:dirty pages per arena is computed as:
+ *
+ * (nactive >> opt_lg_dirty_mult) >= ndirty
+ *
+ * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32
+ * times as many active pages as dirty pages.
+ */
+#define LG_DIRTY_MULT_DEFAULT 5
+
+typedef struct arena_chunk_map_s arena_chunk_map_t;
+typedef struct arena_chunk_s arena_chunk_t;
+typedef struct arena_run_s arena_run_t;
+typedef struct arena_bin_s arena_bin_t;
+typedef struct arena_s arena_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+/* Each element of the chunk map corresponds to one page within the chunk. */
+struct arena_chunk_map_s {
+ union {
+ /*
+ * Linkage for run trees. There are two disjoint uses:
+ *
+ * 1) arena_t's runs_avail_{clean,dirty} trees.
+ * 2) arena_run_t conceptually uses this linkage for in-use
+ * non-full runs, rather than directly embedding linkage.
+ */
+ rb_node(arena_chunk_map_t) rb_link;
+ /*
+ * List of runs currently in purgatory. arena_chunk_purge()
+ * temporarily allocates runs that contain dirty pages while
+ * purging, so that other threads cannot use the runs while the
+ * purging thread is operating without the arena lock held.
+ */
+ ql_elm(arena_chunk_map_t) ql_link;
+ } u;
+
+#ifdef JEMALLOC_PROF
+ /* Profile counters, used for large object runs. */
+ prof_thr_cnt_t *prof_cnt;
+#endif
+
+ /*
+ * Run address (or size) and various flags are stored together. The bit
+ * layout looks like (assuming 32-bit system):
+ *
+ * ???????? ???????? ????---- ----dzla
+ *
+ * ? : Unallocated: Run address for first/last pages, unset for internal
+ * pages.
+ * Small: Run page offset.
+ * Large: Run size for first page, unset for trailing pages.
+ * - : Unused.
+ * d : dirty?
+ * z : zeroed?
+ * l : large?
+ * a : allocated?
+ *
+ * Following are example bit patterns for the three types of runs.
+ *
+ * p : run page offset
+ * s : run size
+ * c : size class (used only if prof_promote is true)
+ * x : don't care
+ * - : 0
+ * + : 1
+ * [DZLA] : bit set
+ * [dzla] : bit unset
+ *
+ * Unallocated (clean):
+ * ssssssss ssssssss ssss---- ----dz--
+ * xxxxxxxx xxxxxxxx xxxx---- -----Zxx
+ * ssssssss ssssssss ssss---- ----dZ--
+ *
+ * Unallocated (dirty):
+ * ssssssss ssssssss ssss---- ----D---
+ * xxxxxxxx xxxxxxxx xxxx---- ----xxxx
+ * ssssssss ssssssss ssss---- ----D---
+ *
+ * Small:
+ * pppppppp pppppppp pppp---- ----d--a
+ * pppppppp pppppppp pppp---- -------a
+ * pppppppp pppppppp pppp---- ----d--a
+ *
+ * Large:
+ * ssssssss ssssssss ssss++++ ++++D-la
+ * xxxxxxxx xxxxxxxx xxxx---- ----xxxx
+ * -------- -------- -------- ----D-la
+ *
+ * Large (sampled, size <= PAGE_SIZE):
+ * ssssssss ssssssss sssscccc ccccD-la
+ *
+ * Large (not sampled, size == PAGE_SIZE):
+ * ssssssss ssssssss ssss++++ ++++D-la
+ */
+ size_t bits;
+#ifdef JEMALLOC_PROF
+#define CHUNK_MAP_CLASS_SHIFT 4
+#define CHUNK_MAP_CLASS_MASK ((size_t)0xff0U)
+#endif
+#define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU)
+#define CHUNK_MAP_DIRTY ((size_t)0x8U)
+#define CHUNK_MAP_ZEROED ((size_t)0x4U)
+#define CHUNK_MAP_LARGE ((size_t)0x2U)
+#define CHUNK_MAP_ALLOCATED ((size_t)0x1U)
+#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED
+};
+typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
+typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
+
+/* Arena chunk header. */
+struct arena_chunk_s {
+ /* Arena that owns the chunk. */
+ arena_t *arena;
+
+ /* Linkage for the arena's chunks_dirty list. */
+ ql_elm(arena_chunk_t) link_dirty;
+
+ /*
+ * True if the chunk is currently in the chunks_dirty list, due to
+ * having at some point contained one or more dirty pages. Removal
+ * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible.
+ */
+ bool dirtied;
+
+ /* Number of dirty pages. */
+ size_t ndirty;
+
+ /* Map of pages within chunk that keeps track of free/large/small. */
+ arena_chunk_map_t map[1]; /* Dynamically sized. */
+};
+typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
+
+struct arena_run_s {
+#ifdef JEMALLOC_DEBUG
+ uint32_t magic;
+# define ARENA_RUN_MAGIC 0x384adf93
+#endif
+
+ /* Bin this run is associated with. */
+ arena_bin_t *bin;
+
+ /* Stack of available freed regions, or NULL. */
+ void *avail;
+
+ /* Next region that has never been allocated, or run boundary. */
+ void *next;
+
+ /* Number of free regions in run. */
+ unsigned nfree;
+};
+
+struct arena_bin_s {
+ /*
+ * All operations on runcur, runs, and stats require that lock be
+ * locked. Run allocation/deallocation are protected by the arena lock,
+ * which may be acquired while holding one or more bin locks, but not
+ * vise versa.
+ */
+ malloc_mutex_t lock;
+
+ /*
+ * Current run being used to service allocations of this bin's size
+ * class.
+ */
+ arena_run_t *runcur;
+
+ /*
+ * Tree of non-full runs. This tree is used when looking for an
+ * existing run when runcur is no longer usable. We choose the
+ * non-full run that is lowest in memory; this policy tends to keep
+ * objects packed well, and it can also help reduce the number of
+ * almost-empty chunks.
+ */
+ arena_run_tree_t runs;
+
+ /* Size of regions in a run for this bin's size class. */
+ size_t reg_size;
+
+ /* Total size of a run for this bin's size class. */
+ size_t run_size;
+
+ /* Total number of regions in a run for this bin's size class. */
+ uint32_t nregs;
+
+#ifdef JEMALLOC_PROF
+ /*
+ * Offset of first (prof_cnt_t *) in a run header for this bin's size
+ * class, or 0 if (opt_prof == false).
+ */
+ uint32_t cnt0_offset;
+#endif
+
+ /* Offset of first region in a run for this bin's size class. */
+ uint32_t reg0_offset;
+
+#ifdef JEMALLOC_STATS
+ /* Bin statistics. */
+ malloc_bin_stats_t stats;
+#endif
+};
+
+struct arena_s {
+#ifdef JEMALLOC_DEBUG
+ uint32_t magic;
+# define ARENA_MAGIC 0x947d3d24
+#endif
+
+ /* This arena's index within the arenas array. */
+ unsigned ind;
+
+ /*
+ * All non-bin-related operations on this arena require that lock be
+ * locked.
+ */
+ malloc_mutex_t lock;
+
+#ifdef JEMALLOC_STATS
+ arena_stats_t stats;
+# ifdef JEMALLOC_TCACHE
+ /*
+ * List of tcaches for extant threads associated with this arena.
+ * Stats from these are merged incrementally, and at exit.
+ */
+ ql_head(tcache_t) tcache_ql;
+# endif
+#endif
+
+#ifdef JEMALLOC_PROF
+ uint64_t prof_accumbytes;
+#endif
+
+ /* List of dirty-page-containing chunks this arena manages. */
+ ql_head(arena_chunk_t) chunks_dirty;
+
+ /*
+ * In order to avoid rapid chunk allocation/deallocation when an arena
+ * oscillates right on the cusp of needing a new chunk, cache the most
+ * recently freed chunk. The spare is left in the arena's chunk trees
+ * until it is deleted.
+ *
+ * There is one spare chunk per arena, rather than one spare total, in
+ * order to avoid interactions between multiple threads that could make
+ * a single spare inadequate.
+ */
+ arena_chunk_t *spare;
+
+ /* Number of pages in active runs. */
+ size_t nactive;
+
+ /*
+ * Current count of pages within unused runs that are potentially
+ * dirty, and for which madvise(... MADV_DONTNEED) has not been called.
+ * By tracking this, we can institute a limit on how much dirty unused
+ * memory is mapped for each arena.
+ */
+ size_t ndirty;
+
+ /*
+ * Approximate number of pages being purged. It is possible for
+ * multiple threads to purge dirty pages concurrently, and they use
+ * npurgatory to indicate the total number of pages all threads are
+ * attempting to purge.
+ */
+ size_t npurgatory;
+
+ /*
+ * Size/address-ordered trees of this arena's available runs. The trees
+ * are used for first-best-fit run allocation. The dirty tree contains
+ * runs with dirty pages (i.e. very likely to have been touched and
+ * therefore have associated physical pages), whereas the clean tree
+ * contains runs with pages that either have no associated physical
+ * pages, or have pages that the kernel may recycle at any time due to
+ * previous madvise(2) calls. The dirty tree is used in preference to
+ * the clean tree for allocations, because using dirty pages reduces
+ * the amount of dirty purging necessary to keep the active:dirty page
+ * ratio below the purge threshold.
+ */
+ arena_avail_tree_t runs_avail_clean;
+ arena_avail_tree_t runs_avail_dirty;
+
+ /*
+ * bins is used to store trees of free regions of the following sizes,
+ * assuming a 16-byte quantum, 4 KiB page size, and default
+ * JEMALLOC_OPTIONS.
+ *
+ * bins[i] | size |
+ * --------+--------+
+ * 0 | 2 |
+ * 1 | 4 |
+ * 2 | 8 |
+ * --------+--------+
+ * 3 | 16 |
+ * 4 | 32 |
+ * 5 | 48 |
+ * : :
+ * 8 | 96 |
+ * 9 | 112 |
+ * 10 | 128 |
+ * --------+--------+
+ * 11 | 192 |
+ * 12 | 256 |
+ * 13 | 320 |
+ * 14 | 384 |
+ * 15 | 448 |
+ * 16 | 512 |
+ * --------+--------+
+ * 17 | 768 |
+ * 18 | 1024 |
+ * 19 | 1280 |
+ * : :
+ * 27 | 3328 |
+ * 28 | 3584 |
+ * 29 | 3840 |
+ * --------+--------+
+ * 30 | 4 KiB |
+ * 31 | 6 KiB |
+ * 33 | 8 KiB |
+ * : :
+ * 43 | 28 KiB |
+ * 44 | 30 KiB |
+ * 45 | 32 KiB |
+ * --------+--------+
+ */
+ arena_bin_t bins[1]; /* Dynamically sized. */
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern size_t opt_lg_qspace_max;
+extern size_t opt_lg_cspace_max;
+extern ssize_t opt_lg_dirty_mult;
+extern uint8_t const *small_size2bin;
+
+/* Various bin-related settings. */
+#ifdef JEMALLOC_TINY /* Number of (2^n)-spaced tiny bins. */
+# define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN))
+#else
+# define ntbins 0
+#endif
+extern unsigned nqbins; /* Number of quantum-spaced bins. */
+extern unsigned ncbins; /* Number of cacheline-spaced bins. */
+extern unsigned nsbins; /* Number of subpage-spaced bins. */
+extern unsigned nbins;
+#ifdef JEMALLOC_TINY
+# define tspace_max ((size_t)(QUANTUM >> 1))
+#endif
+#define qspace_min QUANTUM
+extern size_t qspace_max;
+extern size_t cspace_min;
+extern size_t cspace_max;
+extern size_t sspace_min;
+extern size_t sspace_max;
+#define small_maxclass sspace_max
+
+#define nlclasses (chunk_npages - arena_chunk_header_npages)
+
+#ifdef JEMALLOC_TCACHE
+void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin,
+ size_t binind
+# ifdef JEMALLOC_PROF
+ , uint64_t prof_accumbytes
+# endif
+ );
+#endif
+#ifdef JEMALLOC_PROF
+void arena_prof_accum(arena_t *arena, uint64_t accumbytes);
+#endif
+void *arena_malloc_small(arena_t *arena, size_t size, bool zero);
+void *arena_malloc_large(arena_t *arena, size_t size, bool zero);
+void *arena_malloc(size_t size, bool zero);
+void *arena_palloc(arena_t *arena, size_t alignment, size_t size,
+ size_t alloc_size);
+size_t arena_salloc(const void *ptr);
+#ifdef JEMALLOC_PROF
+void arena_prof_promoted(const void *ptr, size_t size);
+size_t arena_salloc_demote(const void *ptr);
+prof_thr_cnt_t *arena_prof_cnt_get(const void *ptr);
+void arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt);
+#endif
+void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+ arena_chunk_map_t *mapelm);
+void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr);
+#ifdef JEMALLOC_STATS
+void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty,
+ arena_stats_t *astats, malloc_bin_stats_t *bstats,
+ malloc_large_stats_t *lstats);
+#endif
+void *arena_ralloc(void *ptr, size_t size, size_t oldsize);
+bool arena_new(arena_t *arena, unsigned ind);
+bool arena_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
+JEMALLOC_INLINE void
+arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
+{
+ size_t pageind;
+ arena_chunk_map_t *mapelm;
+
+ assert(arena != NULL);
+ assert(arena->magic == ARENA_MAGIC);
+ assert(chunk->arena == arena);
+ assert(ptr != NULL);
+ assert(CHUNK_ADDR2BASE(ptr) != ptr);
+
+ pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
+ mapelm = &chunk->map[pageind];
+ assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
+ if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
+ /* Small allocation. */
+#ifdef JEMALLOC_TCACHE
+ tcache_t *tcache;
+
+ if ((tcache = tcache_get()) != NULL)
+ tcache_dalloc_small(tcache, ptr);
+ else {
+#endif
+ arena_run_t *run;
+ arena_bin_t *bin;
+
+ run = (arena_run_t *)((uintptr_t)chunk +
+ (uintptr_t)((pageind - (mapelm->bits >>
+ PAGE_SHIFT)) << PAGE_SHIFT));
+ assert(run->magic == ARENA_RUN_MAGIC);
+ assert(((uintptr_t)ptr - ((uintptr_t)run +
+ (uintptr_t)run->bin->reg0_offset)) %
+ run->bin->reg_size == 0);
+ bin = run->bin;
+ malloc_mutex_lock(&bin->lock);
+ arena_dalloc_bin(arena, chunk, ptr, mapelm);
+ malloc_mutex_unlock(&bin->lock);
+#ifdef JEMALLOC_TCACHE
+ }
+#endif
+ } else {
+#ifdef JEMALLOC_TCACHE
+ size_t size = mapelm->bits & ~PAGE_MASK;
+
+ assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+ if (size <= tcache_maxclass) {
+ tcache_t *tcache;
+
+ if ((tcache = tcache_get()) != NULL)
+ tcache_dalloc_large(tcache, ptr, size);
+ else {
+ malloc_mutex_lock(&arena->lock);
+ arena_dalloc_large(arena, chunk, ptr);
+ malloc_mutex_unlock(&arena->lock);
+ }
+ } else {
+ malloc_mutex_lock(&arena->lock);
+ arena_dalloc_large(arena, chunk, ptr);
+ malloc_mutex_unlock(&arena->lock);
+ }
+#else
+ assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+ malloc_mutex_lock(&arena->lock);
+ arena_dalloc_large(arena, chunk, ptr);
+ malloc_mutex_unlock(&arena->lock);
+#endif
+ }
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/