diff options
Diffstat (limited to 'dep/include/jemalloc')
26 files changed, 4559 insertions, 0 deletions
diff --git a/dep/include/jemalloc/internal/arena.h b/dep/include/jemalloc/internal/arena.h new file mode 100644 index 00000000000..bb4ce2a54f7 --- /dev/null +++ b/dep/include/jemalloc/internal/arena.h @@ -0,0 +1,537 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * Subpages are an artificially designated partitioning of pages. Their only + * purpose is to support subpage-spaced size classes. + * + * There must be at least 4 subpages per page, due to the way size classes are + * handled. + */ +#define LG_SUBPAGE 8 +#define SUBPAGE ((size_t)(1U << LG_SUBPAGE)) +#define SUBPAGE_MASK (SUBPAGE - 1) + +/* Return the smallest subpage multiple that is >= s. */ +#define SUBPAGE_CEILING(s) \ + (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK) + +#ifdef JEMALLOC_TINY + /* Smallest size class to support. */ +# define LG_TINY_MIN LG_SIZEOF_PTR +#endif + +/* + * Maximum size class that is a multiple of the quantum, but not (necessarily) + * a power of 2. Above this size, allocations are rounded up to the nearest + * power of 2. + */ +#define LG_QSPACE_MAX_DEFAULT 7 + +/* + * Maximum size class that is a multiple of the cacheline, but not (necessarily) + * a power of 2. Above this size, allocations are rounded up to the nearest + * power of 2. + */ +#define LG_CSPACE_MAX_DEFAULT 9 + +/* + * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized + * as small as possible such that this setting is still honored, without + * violating other constraints. The goal is to make runs as small as possible + * without exceeding a per run external fragmentation threshold. + * + * We use binary fixed point math for overhead computations, where the binary + * point is implicitly RUN_BFP bits to the left. + * + * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be + * honored for some/all object sizes, since there is one bit of header overhead + * per object (plus a constant). This constraint is relaxed (ignored) for runs + * that are so small that the per-region overhead is greater than: + * + * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP)) + */ +#define RUN_BFP 12 +/* \/ Implicit binary fixed point. */ +#define RUN_MAX_OVRHD 0x0000003dU +#define RUN_MAX_OVRHD_RELAX 0x00001800U + +/* + * The minimum ratio of active:dirty pages per arena is computed as: + * + * (nactive >> opt_lg_dirty_mult) >= ndirty + * + * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32 + * times as many active pages as dirty pages. + */ +#define LG_DIRTY_MULT_DEFAULT 5 + +typedef struct arena_chunk_map_s arena_chunk_map_t; +typedef struct arena_chunk_s arena_chunk_t; +typedef struct arena_run_s arena_run_t; +typedef struct arena_bin_s arena_bin_t; +typedef struct arena_s arena_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +/* Each element of the chunk map corresponds to one page within the chunk. */ +struct arena_chunk_map_s { + union { + /* + * Linkage for run trees. There are two disjoint uses: + * + * 1) arena_t's runs_avail_{clean,dirty} trees. + * 2) arena_run_t conceptually uses this linkage for in-use + * non-full runs, rather than directly embedding linkage. + */ + rb_node(arena_chunk_map_t) rb_link; + /* + * List of runs currently in purgatory. arena_chunk_purge() + * temporarily allocates runs that contain dirty pages while + * purging, so that other threads cannot use the runs while the + * purging thread is operating without the arena lock held. + */ + ql_elm(arena_chunk_map_t) ql_link; + } u; + +#ifdef JEMALLOC_PROF + /* Profile counters, used for large object runs. */ + prof_thr_cnt_t *prof_cnt; +#endif + + /* + * Run address (or size) and various flags are stored together. The bit + * layout looks like (assuming 32-bit system): + * + * ???????? ???????? ????---- ----dzla + * + * ? : Unallocated: Run address for first/last pages, unset for internal + * pages. + * Small: Run page offset. + * Large: Run size for first page, unset for trailing pages. + * - : Unused. + * d : dirty? + * z : zeroed? + * l : large? + * a : allocated? + * + * Following are example bit patterns for the three types of runs. + * + * p : run page offset + * s : run size + * c : size class (used only if prof_promote is true) + * x : don't care + * - : 0 + * + : 1 + * [DZLA] : bit set + * [dzla] : bit unset + * + * Unallocated (clean): + * ssssssss ssssssss ssss---- ----dz-- + * xxxxxxxx xxxxxxxx xxxx---- -----Zxx + * ssssssss ssssssss ssss---- ----dZ-- + * + * Unallocated (dirty): + * ssssssss ssssssss ssss---- ----D--- + * xxxxxxxx xxxxxxxx xxxx---- ----xxxx + * ssssssss ssssssss ssss---- ----D--- + * + * Small: + * pppppppp pppppppp pppp---- ----d--a + * pppppppp pppppppp pppp---- -------a + * pppppppp pppppppp pppp---- ----d--a + * + * Large: + * ssssssss ssssssss ssss++++ ++++D-la + * xxxxxxxx xxxxxxxx xxxx---- ----xxxx + * -------- -------- -------- ----D-la + * + * Large (sampled, size <= PAGE_SIZE): + * ssssssss ssssssss sssscccc ccccD-la + * + * Large (not sampled, size == PAGE_SIZE): + * ssssssss ssssssss ssss++++ ++++D-la + */ + size_t bits; +#ifdef JEMALLOC_PROF +#define CHUNK_MAP_CLASS_SHIFT 4 +#define CHUNK_MAP_CLASS_MASK ((size_t)0xff0U) +#endif +#define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU) +#define CHUNK_MAP_DIRTY ((size_t)0x8U) +#define CHUNK_MAP_ZEROED ((size_t)0x4U) +#define CHUNK_MAP_LARGE ((size_t)0x2U) +#define CHUNK_MAP_ALLOCATED ((size_t)0x1U) +#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED +}; +typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t; +typedef rb_tree(arena_chunk_map_t) arena_run_tree_t; + +/* Arena chunk header. */ +struct arena_chunk_s { + /* Arena that owns the chunk. */ + arena_t *arena; + + /* Linkage for the arena's chunks_dirty list. */ + ql_elm(arena_chunk_t) link_dirty; + + /* + * True if the chunk is currently in the chunks_dirty list, due to + * having at some point contained one or more dirty pages. Removal + * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible. + */ + bool dirtied; + + /* Number of dirty pages. */ + size_t ndirty; + + /* Map of pages within chunk that keeps track of free/large/small. */ + arena_chunk_map_t map[1]; /* Dynamically sized. */ +}; +typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; + +struct arena_run_s { +#ifdef JEMALLOC_DEBUG + uint32_t magic; +# define ARENA_RUN_MAGIC 0x384adf93 +#endif + + /* Bin this run is associated with. */ + arena_bin_t *bin; + + /* Stack of available freed regions, or NULL. */ + void *avail; + + /* Next region that has never been allocated, or run boundary. */ + void *next; + + /* Number of free regions in run. */ + unsigned nfree; +}; + +struct arena_bin_s { + /* + * All operations on runcur, runs, and stats require that lock be + * locked. Run allocation/deallocation are protected by the arena lock, + * which may be acquired while holding one or more bin locks, but not + * vise versa. + */ + malloc_mutex_t lock; + + /* + * Current run being used to service allocations of this bin's size + * class. + */ + arena_run_t *runcur; + + /* + * Tree of non-full runs. This tree is used when looking for an + * existing run when runcur is no longer usable. We choose the + * non-full run that is lowest in memory; this policy tends to keep + * objects packed well, and it can also help reduce the number of + * almost-empty chunks. + */ + arena_run_tree_t runs; + + /* Size of regions in a run for this bin's size class. */ + size_t reg_size; + + /* Total size of a run for this bin's size class. */ + size_t run_size; + + /* Total number of regions in a run for this bin's size class. */ + uint32_t nregs; + +#ifdef JEMALLOC_PROF + /* + * Offset of first (prof_cnt_t *) in a run header for this bin's size + * class, or 0 if (opt_prof == false). + */ + uint32_t cnt0_offset; +#endif + + /* Offset of first region in a run for this bin's size class. */ + uint32_t reg0_offset; + +#ifdef JEMALLOC_STATS + /* Bin statistics. */ + malloc_bin_stats_t stats; +#endif +}; + +struct arena_s { +#ifdef JEMALLOC_DEBUG + uint32_t magic; +# define ARENA_MAGIC 0x947d3d24 +#endif + + /* This arena's index within the arenas array. */ + unsigned ind; + + /* + * All non-bin-related operations on this arena require that lock be + * locked. + */ + malloc_mutex_t lock; + +#ifdef JEMALLOC_STATS + arena_stats_t stats; +# ifdef JEMALLOC_TCACHE + /* + * List of tcaches for extant threads associated with this arena. + * Stats from these are merged incrementally, and at exit. + */ + ql_head(tcache_t) tcache_ql; +# endif +#endif + +#ifdef JEMALLOC_PROF + uint64_t prof_accumbytes; +#endif + + /* List of dirty-page-containing chunks this arena manages. */ + ql_head(arena_chunk_t) chunks_dirty; + + /* + * In order to avoid rapid chunk allocation/deallocation when an arena + * oscillates right on the cusp of needing a new chunk, cache the most + * recently freed chunk. The spare is left in the arena's chunk trees + * until it is deleted. + * + * There is one spare chunk per arena, rather than one spare total, in + * order to avoid interactions between multiple threads that could make + * a single spare inadequate. + */ + arena_chunk_t *spare; + + /* Number of pages in active runs. */ + size_t nactive; + + /* + * Current count of pages within unused runs that are potentially + * dirty, and for which madvise(... MADV_DONTNEED) has not been called. + * By tracking this, we can institute a limit on how much dirty unused + * memory is mapped for each arena. + */ + size_t ndirty; + + /* + * Approximate number of pages being purged. It is possible for + * multiple threads to purge dirty pages concurrently, and they use + * npurgatory to indicate the total number of pages all threads are + * attempting to purge. + */ + size_t npurgatory; + + /* + * Size/address-ordered trees of this arena's available runs. The trees + * are used for first-best-fit run allocation. The dirty tree contains + * runs with dirty pages (i.e. very likely to have been touched and + * therefore have associated physical pages), whereas the clean tree + * contains runs with pages that either have no associated physical + * pages, or have pages that the kernel may recycle at any time due to + * previous madvise(2) calls. The dirty tree is used in preference to + * the clean tree for allocations, because using dirty pages reduces + * the amount of dirty purging necessary to keep the active:dirty page + * ratio below the purge threshold. + */ + arena_avail_tree_t runs_avail_clean; + arena_avail_tree_t runs_avail_dirty; + + /* + * bins is used to store trees of free regions of the following sizes, + * assuming a 16-byte quantum, 4 KiB page size, and default + * JEMALLOC_OPTIONS. + * + * bins[i] | size | + * --------+--------+ + * 0 | 2 | + * 1 | 4 | + * 2 | 8 | + * --------+--------+ + * 3 | 16 | + * 4 | 32 | + * 5 | 48 | + * : : + * 8 | 96 | + * 9 | 112 | + * 10 | 128 | + * --------+--------+ + * 11 | 192 | + * 12 | 256 | + * 13 | 320 | + * 14 | 384 | + * 15 | 448 | + * 16 | 512 | + * --------+--------+ + * 17 | 768 | + * 18 | 1024 | + * 19 | 1280 | + * : : + * 27 | 3328 | + * 28 | 3584 | + * 29 | 3840 | + * --------+--------+ + * 30 | 4 KiB | + * 31 | 6 KiB | + * 33 | 8 KiB | + * : : + * 43 | 28 KiB | + * 44 | 30 KiB | + * 45 | 32 KiB | + * --------+--------+ + */ + arena_bin_t bins[1]; /* Dynamically sized. */ +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern size_t opt_lg_qspace_max; +extern size_t opt_lg_cspace_max; +extern ssize_t opt_lg_dirty_mult; +extern uint8_t const *small_size2bin; + +/* Various bin-related settings. */ +#ifdef JEMALLOC_TINY /* Number of (2^n)-spaced tiny bins. */ +# define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN)) +#else +# define ntbins 0 +#endif +extern unsigned nqbins; /* Number of quantum-spaced bins. */ +extern unsigned ncbins; /* Number of cacheline-spaced bins. */ +extern unsigned nsbins; /* Number of subpage-spaced bins. */ +extern unsigned nbins; +#ifdef JEMALLOC_TINY +# define tspace_max ((size_t)(QUANTUM >> 1)) +#endif +#define qspace_min QUANTUM +extern size_t qspace_max; +extern size_t cspace_min; +extern size_t cspace_max; +extern size_t sspace_min; +extern size_t sspace_max; +#define small_maxclass sspace_max + +#define nlclasses (chunk_npages - arena_chunk_header_npages) + +#ifdef JEMALLOC_TCACHE +void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, + size_t binind +# ifdef JEMALLOC_PROF + , uint64_t prof_accumbytes +# endif + ); +#endif +#ifdef JEMALLOC_PROF +void arena_prof_accum(arena_t *arena, uint64_t accumbytes); +#endif +void *arena_malloc_small(arena_t *arena, size_t size, bool zero); +void *arena_malloc_large(arena_t *arena, size_t size, bool zero); +void *arena_malloc(size_t size, bool zero); +void *arena_palloc(arena_t *arena, size_t alignment, size_t size, + size_t alloc_size); +size_t arena_salloc(const void *ptr); +#ifdef JEMALLOC_PROF +void arena_prof_promoted(const void *ptr, size_t size); +size_t arena_salloc_demote(const void *ptr); +prof_thr_cnt_t *arena_prof_cnt_get(const void *ptr); +void arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt); +#endif +void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, + arena_chunk_map_t *mapelm); +void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); +#ifdef JEMALLOC_STATS +void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, + arena_stats_t *astats, malloc_bin_stats_t *bstats, + malloc_large_stats_t *lstats); +#endif +void *arena_ralloc(void *ptr, size_t size, size_t oldsize); +bool arena_new(arena_t *arena, unsigned ind); +bool arena_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) +JEMALLOC_INLINE void +arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) +{ + size_t pageind; + arena_chunk_map_t *mapelm; + + assert(arena != NULL); + assert(arena->magic == ARENA_MAGIC); + assert(chunk->arena == arena); + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); + mapelm = &chunk->map[pageind]; + assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { + /* Small allocation. */ +#ifdef JEMALLOC_TCACHE + tcache_t *tcache; + + if ((tcache = tcache_get()) != NULL) + tcache_dalloc_small(tcache, ptr); + else { +#endif + arena_run_t *run; + arena_bin_t *bin; + + run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapelm->bits >> + PAGE_SHIFT)) << PAGE_SHIFT)); + assert(run->magic == ARENA_RUN_MAGIC); + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)run->bin->reg0_offset)) % + run->bin->reg_size == 0); + bin = run->bin; + malloc_mutex_lock(&bin->lock); + arena_dalloc_bin(arena, chunk, ptr, mapelm); + malloc_mutex_unlock(&bin->lock); +#ifdef JEMALLOC_TCACHE + } +#endif + } else { +#ifdef JEMALLOC_TCACHE + size_t size = mapelm->bits & ~PAGE_MASK; + + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + if (size <= tcache_maxclass) { + tcache_t *tcache; + + if ((tcache = tcache_get()) != NULL) + tcache_dalloc_large(tcache, ptr, size); + else { + malloc_mutex_lock(&arena->lock); + arena_dalloc_large(arena, chunk, ptr); + malloc_mutex_unlock(&arena->lock); + } + } else { + malloc_mutex_lock(&arena->lock); + arena_dalloc_large(arena, chunk, ptr); + malloc_mutex_unlock(&arena->lock); + } +#else + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + malloc_mutex_lock(&arena->lock); + arena_dalloc_large(arena, chunk, ptr); + malloc_mutex_unlock(&arena->lock); +#endif + } +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/dep/include/jemalloc/internal/base.h b/dep/include/jemalloc/internal/base.h new file mode 100644 index 00000000000..e353f309bd2 --- /dev/null +++ b/dep/include/jemalloc/internal/base.h @@ -0,0 +1,24 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern malloc_mutex_t base_mtx; + +void *base_alloc(size_t size); +extent_node_t *base_node_alloc(void); +void base_node_dealloc(extent_node_t *node); +bool base_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/dep/include/jemalloc/internal/chunk.h b/dep/include/jemalloc/internal/chunk.h new file mode 100644 index 00000000000..1f6abf782f1 --- /dev/null +++ b/dep/include/jemalloc/internal/chunk.h @@ -0,0 +1,61 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * Size and alignment of memory chunks that are allocated by the OS's virtual + * memory system. + */ +#define LG_CHUNK_DEFAULT 22 + +/* Return the chunk address for allocation address a. */ +#define CHUNK_ADDR2BASE(a) \ + ((void *)((uintptr_t)(a) & ~chunksize_mask)) + +/* Return the chunk offset of address a. */ +#define CHUNK_ADDR2OFFSET(a) \ + ((size_t)((uintptr_t)(a) & chunksize_mask)) + +/* Return the smallest chunk multiple that is >= s. */ +#define CHUNK_CEILING(s) \ + (((s) + chunksize_mask) & ~chunksize_mask) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern size_t opt_lg_chunk; +#ifdef JEMALLOC_SWAP +extern bool opt_overcommit; +#endif + +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) +/* Protects stats_chunks; currently not used for any other purpose. */ +extern malloc_mutex_t chunks_mtx; +/* Chunk statistics. */ +extern chunk_stats_t stats_chunks; +#endif + +extern size_t chunksize; +extern size_t chunksize_mask; /* (chunksize - 1). */ +extern size_t chunk_npages; +extern size_t arena_chunk_header_npages; +extern size_t arena_maxclass; /* Max size class for arenas. */ + +void *chunk_alloc(size_t size, bool *zero); +void chunk_dealloc(void *chunk, size_t size); +bool chunk_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + +#include "jemalloc/internal/chunk_swap.h" +#include "jemalloc/internal/chunk_dss.h" +#include "jemalloc/internal/chunk_mmap.h" diff --git a/dep/include/jemalloc/internal/chunk_dss.h b/dep/include/jemalloc/internal/chunk_dss.h new file mode 100644 index 00000000000..6be4ad1f212 --- /dev/null +++ b/dep/include/jemalloc/internal/chunk_dss.h @@ -0,0 +1,29 @@ +#ifdef JEMALLOC_DSS +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +/* + * Protects sbrk() calls. This avoids malloc races among threads, though it + * does not protect against races with threads that call sbrk() directly. + */ +extern malloc_mutex_t dss_mtx; + +void *chunk_alloc_dss(size_t size, bool *zero); +bool chunk_dealloc_dss(void *chunk, size_t size); +bool chunk_dss_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ +#endif /* JEMALLOC_DSS */ diff --git a/dep/include/jemalloc/internal/chunk_mmap.h b/dep/include/jemalloc/internal/chunk_mmap.h new file mode 100644 index 00000000000..8fb90b77c9b --- /dev/null +++ b/dep/include/jemalloc/internal/chunk_mmap.h @@ -0,0 +1,20 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void *chunk_alloc_mmap(size_t size); +void chunk_dealloc_mmap(void *chunk, size_t size); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/dep/include/jemalloc/internal/chunk_swap.h b/dep/include/jemalloc/internal/chunk_swap.h new file mode 100644 index 00000000000..d50cb197449 --- /dev/null +++ b/dep/include/jemalloc/internal/chunk_swap.h @@ -0,0 +1,33 @@ +#ifdef JEMALLOC_SWAP +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern malloc_mutex_t swap_mtx; +extern bool swap_enabled; +extern bool swap_prezeroed; +extern size_t swap_nfds; +extern int *swap_fds; +#ifdef JEMALLOC_STATS +extern size_t swap_avail; +#endif + +void *chunk_alloc_swap(size_t size, bool *zero); +bool chunk_dealloc_swap(void *chunk, size_t size); +bool chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed); +bool chunk_swap_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ +#endif /* JEMALLOC_SWAP */ diff --git a/dep/include/jemalloc/internal/ckh.h b/dep/include/jemalloc/internal/ckh.h new file mode 100644 index 00000000000..c39ea5c75ef --- /dev/null +++ b/dep/include/jemalloc/internal/ckh.h @@ -0,0 +1,95 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct ckh_s ckh_t; +typedef struct ckhc_s ckhc_t; + +/* Typedefs to allow easy function pointer passing. */ +typedef void ckh_hash_t (const void *, unsigned, size_t *, size_t *); +typedef bool ckh_keycomp_t (const void *, const void *); + +/* Maintain counters used to get an idea of performance. */ +/* #define CKH_COUNT */ +/* Print counter values in ckh_delete() (requires CKH_COUNT). */ +/* #define CKH_VERBOSE */ + +/* + * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket. Try to fit + * one bucket per L1 cache line. + */ +#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +/* Hash table cell. */ +struct ckhc_s { + const void *key; + const void *data; +}; + +struct ckh_s { +#ifdef JEMALLOC_DEBUG +#define CKH_MAGIG 0x3af2489d + uint32_t magic; +#endif + +#ifdef CKH_COUNT + /* Counters used to get an idea of performance. */ + uint64_t ngrows; + uint64_t nshrinks; + uint64_t nshrinkfails; + uint64_t ninserts; + uint64_t nrelocs; +#endif + + /* Used for pseudo-random number generation. */ +#define CKH_A 12345 +#define CKH_C 12347 + uint32_t prn_state; + + /* Total number of items. */ + size_t count; + + /* + * Minimum and current number of hash table buckets. There are + * 2^LG_CKH_BUCKET_CELLS cells per bucket. + */ + unsigned lg_minbuckets; + unsigned lg_curbuckets; + + /* Hash and comparison functions. */ + ckh_hash_t *hash; + ckh_keycomp_t *keycomp; + + /* Hash table with 2^lg_curbuckets buckets. */ + ckhc_t *tab; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +bool ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, + ckh_keycomp_t *keycomp); +void ckh_delete(ckh_t *ckh); +size_t ckh_count(ckh_t *ckh); +bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); +bool ckh_insert(ckh_t *ckh, const void *key, const void *data); +bool ckh_remove(ckh_t *ckh, const void *searchkey, void **key, + void **data); +bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data); +void ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, + size_t *hash2); +bool ckh_string_keycomp(const void *k1, const void *k2); +void ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, + size_t *hash2); +bool ckh_pointer_keycomp(const void *k1, const void *k2); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/dep/include/jemalloc/internal/ctl.h b/dep/include/jemalloc/internal/ctl.h new file mode 100644 index 00000000000..7bbf21e0e85 --- /dev/null +++ b/dep/include/jemalloc/internal/ctl.h @@ -0,0 +1,117 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct ctl_node_s ctl_node_t; +typedef struct ctl_arena_stats_s ctl_arena_stats_t; +typedef struct ctl_stats_s ctl_stats_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct ctl_node_s { + bool named; + union { + struct { + const char *name; + /* If (nchildren == 0), this is a terminal node. */ + unsigned nchildren; + const ctl_node_t *children; + } named; + struct { + const ctl_node_t *(*index)(const size_t *, size_t, + size_t); + } indexed; + } u; + int (*ctl)(const size_t *, size_t, void *, size_t *, void *, + size_t); +}; + +struct ctl_arena_stats_s { + bool initialized; + size_t pactive; + size_t pdirty; +#ifdef JEMALLOC_STATS + arena_stats_t astats; + + /* Aggregate stats for small size classes, based on bin stats. */ + size_t allocated_small; + uint64_t nmalloc_small; + uint64_t ndalloc_small; + uint64_t nrequests_small; + + malloc_bin_stats_t *bstats; /* nbins elements. */ + malloc_large_stats_t *lstats; /* nlclasses elements. */ +#endif +}; + +struct ctl_stats_s { +#ifdef JEMALLOC_STATS + size_t allocated; + size_t active; + size_t mapped; + struct { + size_t current; /* stats_chunks.curchunks */ + uint64_t total; /* stats_chunks.nchunks */ + size_t high; /* stats_chunks.highchunks */ + } chunks; + struct { + size_t allocated; /* huge_allocated */ + uint64_t nmalloc; /* huge_nmalloc */ + uint64_t ndalloc; /* huge_ndalloc */ + } huge; +#endif + ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ +#ifdef JEMALLOC_SWAP + size_t swap_avail; +#endif +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +int ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, + size_t newlen); +int ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp); + +int ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen); +bool ctl_boot(void); + +#define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ + if (JEMALLOC_P(mallctl)(name, oldp, oldlenp, newp, newlen) \ + != 0) { \ + malloc_write("<jemalloc>: Invalid xmallctl(\""); \ + malloc_write(name); \ + malloc_write("\", ...) call\n"); \ + abort(); \ + } \ +} while (0) + +#define xmallctlnametomib(name, mibp, miblenp) do { \ + if (JEMALLOC_P(mallctlnametomib)(name, mibp, miblenp) != 0) { \ + malloc_write( \ + "<jemalloc>: Invalid xmallctlnametomib(\""); \ + malloc_write(name); \ + malloc_write("\", ...) call\n"); \ + abort(); \ + } \ +} while (0) + +#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do { \ + if (JEMALLOC_P(mallctlbymib)(mib, miblen, oldp, oldlenp, newp, \ + newlen) != 0) { \ + malloc_write( \ + "<jemalloc>: Invalid xmallctlbymib() call\n"); \ + abort(); \ + } \ +} while (0) + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/dep/include/jemalloc/internal/extent.h b/dep/include/jemalloc/internal/extent.h new file mode 100644 index 00000000000..33a4e9a3852 --- /dev/null +++ b/dep/include/jemalloc/internal/extent.h @@ -0,0 +1,49 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct extent_node_s extent_node_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +/* Tree of extents. */ +struct extent_node_s { +#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) + /* Linkage for the size/address-ordered tree. */ + rb_node(extent_node_t) link_szad; +#endif + + /* Linkage for the address-ordered tree. */ + rb_node(extent_node_t) link_ad; + +#ifdef JEMALLOC_PROF + /* Profile counters, used for huge objects. */ + prof_thr_cnt_t *prof_cnt; +#endif + + /* Pointer to the extent that this tree node is responsible for. */ + void *addr; + + /* Total region size. */ + size_t size; +}; +typedef rb_tree(extent_node_t) extent_tree_t; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) +rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t) +#endif + +rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/dep/include/jemalloc/internal/hash.h b/dep/include/jemalloc/internal/hash.h new file mode 100644 index 00000000000..d12cdb8359f --- /dev/null +++ b/dep/include/jemalloc/internal/hash.h @@ -0,0 +1,70 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +uint64_t hash(const void *key, size_t len, uint64_t seed); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(HASH_C_)) +/* + * The following hash function is based on MurmurHash64A(), placed into the + * public domain by Austin Appleby. See http://murmurhash.googlepages.com/ for + * details. + */ +JEMALLOC_INLINE uint64_t +hash(const void *key, size_t len, uint64_t seed) +{ + const uint64_t m = 0xc6a4a7935bd1e995; + const int r = 47; + uint64_t h = seed ^ (len * m); + const uint64_t *data = (const uint64_t *)key; + const uint64_t *end = data + (len/8); + const unsigned char *data2; + + assert(((uintptr_t)key & 0x7) == 0); + + while(data != end) { + uint64_t k = *data++; + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } + + data2 = (const unsigned char *)data; + switch(len & 7) { + case 7: h ^= ((uint64_t)(data2[6])) << 48; + case 6: h ^= ((uint64_t)(data2[5])) << 40; + case 5: h ^= ((uint64_t)(data2[4])) << 32; + case 4: h ^= ((uint64_t)(data2[3])) << 24; + case 3: h ^= ((uint64_t)(data2[2])) << 16; + case 2: h ^= ((uint64_t)(data2[1])) << 8; + case 1: h ^= ((uint64_t)(data2[0])); + h *= m; + } + + h ^= h >> r; + h *= m; + h ^= h >> r; + + return h; +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/dep/include/jemalloc/internal/huge.h b/dep/include/jemalloc/internal/huge.h new file mode 100644 index 00000000000..3cf32f7506d --- /dev/null +++ b/dep/include/jemalloc/internal/huge.h @@ -0,0 +1,38 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#ifdef JEMALLOC_STATS +/* Huge allocation statistics. */ +extern uint64_t huge_nmalloc; +extern uint64_t huge_ndalloc; +extern size_t huge_allocated; +#endif + +/* Protects chunk-related data structures. */ +extern malloc_mutex_t huge_mtx; + +void *huge_malloc(size_t size, bool zero); +void *huge_palloc(size_t alignment, size_t size); +void *huge_ralloc(void *ptr, size_t size, size_t oldsize); +void huge_dalloc(void *ptr); +size_t huge_salloc(const void *ptr); +#ifdef JEMALLOC_PROF +prof_thr_cnt_t *huge_prof_cnt_get(const void *ptr); +void huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt); +#endif +bool huge_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/dep/include/jemalloc/internal/jemalloc_internal.h b/dep/include/jemalloc/internal/jemalloc_internal.h new file mode 100644 index 00000000000..109510d2962 --- /dev/null +++ b/dep/include/jemalloc/internal/jemalloc_internal.h @@ -0,0 +1,561 @@ +#include <sys/mman.h> +#include <sys/param.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/sysctl.h> +#include <sys/uio.h> + +#include <errno.h> +#include <limits.h> +#ifndef SIZE_T_MAX +# define SIZE_T_MAX SIZE_MAX +#endif +#include <pthread.h> +#include <sched.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <string.h> +#include <strings.h> +#include <unistd.h> +#include <fcntl.h> +#include <pthread.h> + +#define JEMALLOC_MANGLE +#include "../jemalloc.h" + +#ifdef JEMALLOC_LAZY_LOCK +#include <dlfcn.h> +#endif + +#define RB_COMPACT +#include "jemalloc/internal/rb.h" +#include "jemalloc/internal/qr.h" +#include "jemalloc/internal/ql.h" + +extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); + +/* + * Define a custom assert() in order to reduce the chances of deadlock during + * assertion failure. + */ +#ifdef JEMALLOC_DEBUG +# define assert(e) do { \ + if (!(e)) { \ + char line_buf[UMAX2S_BUFSIZE]; \ + malloc_write("<jemalloc>: "); \ + malloc_write(__FILE__); \ + malloc_write(":"); \ + malloc_write(umax2s(__LINE__, 10, line_buf)); \ + malloc_write(": Failed assertion: "); \ + malloc_write("\""); \ + malloc_write(#e); \ + malloc_write("\"\n"); \ + abort(); \ + } \ +} while (0) +#else +#define assert(e) +#endif + +/* + * jemalloc can conceptually be broken into components (arena, tcache, etc.), + * but there are circular dependencies that cannot be broken without + * substantial performance degradation. In order to reduce the effect on + * visual code flow, read the header files in multiple passes, with one of the + * following cpp variables defined during each pass: + * + * JEMALLOC_H_TYPES : Preprocessor-defined constants and psuedo-opaque data + * types. + * JEMALLOC_H_STRUCTS : Data structures. + * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes. + * JEMALLOC_H_INLINES : Inline functions. + */ +/******************************************************************************/ +#define JEMALLOC_H_TYPES + +#define ZU(z) ((size_t)z) + +#ifndef __DECONST +# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) +#endif + +#ifdef JEMALLOC_DEBUG + /* Disable inlining to make debugging easier. */ +# define JEMALLOC_INLINE +# define inline +#else +# define JEMALLOC_ENABLE_INLINE +# define JEMALLOC_INLINE static inline +#endif + +/* Size of stack-allocated buffer passed to strerror_r(). */ +#define STRERROR_BUF 64 + +/* Minimum alignment of allocations is 2^LG_QUANTUM bytes. */ +#ifdef __i386__ +# define LG_QUANTUM 4 +#endif +#ifdef __ia64__ +# define LG_QUANTUM 4 +#endif +#ifdef __alpha__ +# define LG_QUANTUM 4 +#endif +#ifdef __sparc64__ +# define LG_QUANTUM 4 +#endif +#if (defined(__amd64__) || defined(__x86_64__)) +# define LG_QUANTUM 4 +#endif +#ifdef __arm__ +# define LG_QUANTUM 3 +#endif +#ifdef __mips__ +# define LG_QUANTUM 3 +#endif +#ifdef __powerpc__ +# define LG_QUANTUM 4 +#endif +#ifdef __s390x__ +# define LG_QUANTUM 4 +#endif + +#define QUANTUM ((size_t)(1U << LG_QUANTUM)) +#define QUANTUM_MASK (QUANTUM - 1) + +/* Return the smallest quantum multiple that is >= a. */ +#define QUANTUM_CEILING(a) \ + (((a) + QUANTUM_MASK) & ~QUANTUM_MASK) + +#define SIZEOF_PTR (1U << LG_SIZEOF_PTR) + +/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */ +#if (!defined(PIC) && !defined(NO_TLS)) +# define NO_TLS +#endif + +/* + * Maximum size of L1 cache line. This is used to avoid cache line aliasing. + * In addition, this controls the spacing of cacheline-spaced size classes. + */ +#define LG_CACHELINE 6 +#define CACHELINE ((size_t)(1U << LG_CACHELINE)) +#define CACHELINE_MASK (CACHELINE - 1) + +/* Return the smallest cacheline multiple that is >= s. */ +#define CACHELINE_CEILING(s) \ + (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) + +/* + * Page size. STATIC_PAGE_SHIFT is determined by the configure script. If + * DYNAMIC_PAGE_SHIFT is enabled, only use the STATIC_PAGE_* macros where + * compile-time values are required for the purposes of defining data + * structures. + */ +#define STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT)) +#define STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1)) + +#ifdef DYNAMIC_PAGE_SHIFT +# define PAGE_SHIFT lg_pagesize +# define PAGE_SIZE pagesize +# define PAGE_MASK pagesize_mask +#else +# define PAGE_SHIFT STATIC_PAGE_SHIFT +# define PAGE_SIZE STATIC_PAGE_SIZE +# define PAGE_MASK STATIC_PAGE_MASK +#endif + +/* Return the smallest pagesize multiple that is >= s. */ +#define PAGE_CEILING(s) \ + (((s) + PAGE_MASK) & ~PAGE_MASK) + +#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/prof.h" + +#undef JEMALLOC_H_TYPES +/******************************************************************************/ +#define JEMALLOC_H_STRUCTS + +#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/prof.h" + +#undef JEMALLOC_H_STRUCTS +/******************************************************************************/ +#define JEMALLOC_H_EXTERNS + +extern bool opt_abort; +#ifdef JEMALLOC_FILL +extern bool opt_junk; +#endif +#ifdef JEMALLOC_SYSV +extern bool opt_sysv; +#endif +#ifdef JEMALLOC_XMALLOC +extern bool opt_xmalloc; +#endif +#ifdef JEMALLOC_FILL +extern bool opt_zero; +#endif + +#ifdef DYNAMIC_PAGE_SHIFT +extern size_t pagesize; +extern size_t pagesize_mask; +extern size_t lg_pagesize; +#endif + +/* Number of CPUs. */ +extern unsigned ncpus; + +extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ +#ifndef NO_TLS +/* + * Map of pthread_self() --> arenas[???], used for selecting an arena to use + * for allocations. + */ +extern __thread arena_t *arenas_map JEMALLOC_ATTR(tls_model("initial-exec")); +#endif +/* + * Arenas that are used to service external requests. Not all elements of the + * arenas array are necessarily used; arenas are created lazily as needed. + */ +extern arena_t **arenas; +extern unsigned narenas; + +arena_t *arenas_extend(unsigned ind); +#ifndef NO_TLS +arena_t *choose_arena_hard(void); +#endif + +#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/prof.h" + +#undef JEMALLOC_H_EXTERNS +/******************************************************************************/ +#define JEMALLOC_H_INLINES + +#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" + +#ifndef JEMALLOC_ENABLE_INLINE +void malloc_write(const char *s); +arena_t *choose_arena(void); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +/* + * Wrapper around malloc_message() that avoids the need for + * JEMALLOC_P(malloc_message)(...) throughout the code. + */ +JEMALLOC_INLINE void +malloc_write(const char *s) +{ + + JEMALLOC_P(malloc_message)(NULL, s); +} + +/* + * Choose an arena based on a per-thread value (fast-path code, calls slow-path + * code if necessary). + */ +JEMALLOC_INLINE arena_t * +choose_arena(void) +{ + arena_t *ret; + + /* + * We can only use TLS if this is a PIC library, since for the static + * library version, libc's malloc is used by TLS allocation, which + * introduces a bootstrapping issue. + */ +#ifndef NO_TLS + ret = arenas_map; + if (ret == NULL) { + ret = choose_arena_hard(); + assert(ret != NULL); + } +#else + if (isthreaded && narenas > 1) { + unsigned long ind; + + /* + * Hash pthread_self() to one of the arenas. There is a prime + * number of arenas, so this has a reasonable chance of + * working. Even so, the hashing can be easily thwarted by + * inconvenient pthread_self() values. Without specific + * knowledge of how pthread_self() calculates values, we can't + * easily do much better than this. + */ + ind = (unsigned long) pthread_self() % narenas; + + /* + * Optimistially assume that arenas[ind] has been initialized. + * At worst, we find out that some other thread has already + * done so, after acquiring the lock in preparation. Note that + * this lazy locking also has the effect of lazily forcing + * cache coherency; without the lock acquisition, there's no + * guarantee that modification of arenas[ind] by another thread + * would be seen on this CPU for an arbitrary amount of time. + * + * In general, this approach to modifying a synchronized value + * isn't a good idea, but in this case we only ever modify the + * value once, so things work out well. + */ + ret = arenas[ind]; + if (ret == NULL) { + /* + * Avoid races with another thread that may have already + * initialized arenas[ind]. + */ + malloc_mutex_lock(&arenas_lock); + if (arenas[ind] == NULL) + ret = arenas_extend((unsigned)ind); + else + ret = arenas[ind]; + malloc_mutex_unlock(&arenas_lock); + } + } else + ret = arenas[0]; +#endif + + assert(ret != NULL); + return (ret); +} +#endif + +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/prof.h" + +#ifndef JEMALLOC_ENABLE_INLINE +void *imalloc(size_t size); +void *icalloc(size_t size); +void *ipalloc(size_t alignment, size_t size); +size_t isalloc(const void *ptr); +void *iralloc(void *ptr, size_t size); +void idalloc(void *ptr); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +JEMALLOC_INLINE void * +imalloc(size_t size) +{ + + assert(size != 0); + + if (size <= arena_maxclass) + return (arena_malloc(size, false)); + else + return (huge_malloc(size, false)); +} + +JEMALLOC_INLINE void * +icalloc(size_t size) +{ + + if (size <= arena_maxclass) + return (arena_malloc(size, true)); + else + return (huge_malloc(size, true)); +} + +JEMALLOC_INLINE void * +ipalloc(size_t alignment, size_t size) +{ + void *ret; + size_t ceil_size; + + /* + * Round size up to the nearest multiple of alignment. + * + * This done, we can take advantage of the fact that for each small + * size class, every object is aligned at the smallest power of two + * that is non-zero in the base two representation of the size. For + * example: + * + * Size | Base 2 | Minimum alignment + * -----+----------+------------------ + * 96 | 1100000 | 32 + * 144 | 10100000 | 32 + * 192 | 11000000 | 64 + * + * Depending on runtime settings, it is possible that arena_malloc() + * will further round up to a power of two, but that never causes + * correctness issues. + */ + ceil_size = (size + (alignment - 1)) & (-alignment); + /* + * (ceil_size < size) protects against the combination of maximal + * alignment and size greater than maximal alignment. + */ + if (ceil_size < size) { + /* size_t overflow. */ + return (NULL); + } + + if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE + && ceil_size <= arena_maxclass)) + ret = arena_malloc(ceil_size, false); + else { + size_t run_size; + + /* + * We can't achieve subpage alignment, so round up alignment + * permanently; it makes later calculations simpler. + */ + alignment = PAGE_CEILING(alignment); + ceil_size = PAGE_CEILING(size); + /* + * (ceil_size < size) protects against very large sizes within + * PAGE_SIZE of SIZE_T_MAX. + * + * (ceil_size + alignment < ceil_size) protects against the + * combination of maximal alignment and ceil_size large enough + * to cause overflow. This is similar to the first overflow + * check above, but it needs to be repeated due to the new + * ceil_size value, which may now be *equal* to maximal + * alignment, whereas before we only detected overflow if the + * original size was *greater* than maximal alignment. + */ + if (ceil_size < size || ceil_size + alignment < ceil_size) { + /* size_t overflow. */ + return (NULL); + } + + /* + * Calculate the size of the over-size run that arena_palloc() + * would need to allocate in order to guarantee the alignment. + */ + if (ceil_size >= alignment) + run_size = ceil_size + alignment - PAGE_SIZE; + else { + /* + * It is possible that (alignment << 1) will cause + * overflow, but it doesn't matter because we also + * subtract PAGE_SIZE, which in the case of overflow + * leaves us with a very large run_size. That causes + * the first conditional below to fail, which means + * that the bogus run_size value never gets used for + * anything important. + */ + run_size = (alignment << 1) - PAGE_SIZE; + } + + if (run_size <= arena_maxclass) { + ret = arena_palloc(choose_arena(), alignment, ceil_size, + run_size); + } else if (alignment <= chunksize) + ret = huge_malloc(ceil_size, false); + else + ret = huge_palloc(alignment, ceil_size); + } + + assert(((uintptr_t)ret & (alignment - 1)) == 0); + return (ret); +} + +JEMALLOC_INLINE size_t +isalloc(const void *ptr) +{ + size_t ret; + arena_chunk_t *chunk; + + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) { + /* Region. */ + assert(chunk->arena->magic == ARENA_MAGIC); + +#ifdef JEMALLOC_PROF + ret = arena_salloc_demote(ptr); +#else + ret = arena_salloc(ptr); +#endif + } else + ret = huge_salloc(ptr); + + return (ret); +} + +JEMALLOC_INLINE void * +iralloc(void *ptr, size_t size) +{ + size_t oldsize; + + assert(ptr != NULL); + assert(size != 0); + + oldsize = isalloc(ptr); + + if (size <= arena_maxclass) + return (arena_ralloc(ptr, size, oldsize)); + else + return (huge_ralloc(ptr, size, oldsize)); +} + +JEMALLOC_INLINE void +idalloc(void *ptr) +{ + arena_chunk_t *chunk; + + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) + arena_dalloc(chunk->arena, chunk, ptr); + else + huge_dalloc(ptr); +} +#endif + +#undef JEMALLOC_H_INLINES +/******************************************************************************/ diff --git a/dep/include/jemalloc/internal/jemalloc_internal.h.in b/dep/include/jemalloc/internal/jemalloc_internal.h.in new file mode 100644 index 00000000000..2c3f32f126d --- /dev/null +++ b/dep/include/jemalloc/internal/jemalloc_internal.h.in @@ -0,0 +1,561 @@ +#include <sys/mman.h> +#include <sys/param.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/sysctl.h> +#include <sys/uio.h> + +#include <errno.h> +#include <limits.h> +#ifndef SIZE_T_MAX +# define SIZE_T_MAX SIZE_MAX +#endif +#include <pthread.h> +#include <sched.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <string.h> +#include <strings.h> +#include <unistd.h> +#include <fcntl.h> +#include <pthread.h> + +#define JEMALLOC_MANGLE +#include "../jemalloc@install_suffix@.h" + +#ifdef JEMALLOC_LAZY_LOCK +#include <dlfcn.h> +#endif + +#define RB_COMPACT +#include "jemalloc/internal/rb.h" +#include "jemalloc/internal/qr.h" +#include "jemalloc/internal/ql.h" + +extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); + +/* + * Define a custom assert() in order to reduce the chances of deadlock during + * assertion failure. + */ +#ifdef JEMALLOC_DEBUG +# define assert(e) do { \ + if (!(e)) { \ + char line_buf[UMAX2S_BUFSIZE]; \ + malloc_write("<jemalloc>: "); \ + malloc_write(__FILE__); \ + malloc_write(":"); \ + malloc_write(umax2s(__LINE__, 10, line_buf)); \ + malloc_write(": Failed assertion: "); \ + malloc_write("\""); \ + malloc_write(#e); \ + malloc_write("\"\n"); \ + abort(); \ + } \ +} while (0) +#else +#define assert(e) +#endif + +/* + * jemalloc can conceptually be broken into components (arena, tcache, etc.), + * but there are circular dependencies that cannot be broken without + * substantial performance degradation. In order to reduce the effect on + * visual code flow, read the header files in multiple passes, with one of the + * following cpp variables defined during each pass: + * + * JEMALLOC_H_TYPES : Preprocessor-defined constants and psuedo-opaque data + * types. + * JEMALLOC_H_STRUCTS : Data structures. + * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes. + * JEMALLOC_H_INLINES : Inline functions. + */ +/******************************************************************************/ +#define JEMALLOC_H_TYPES + +#define ZU(z) ((size_t)z) + +#ifndef __DECONST +# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) +#endif + +#ifdef JEMALLOC_DEBUG + /* Disable inlining to make debugging easier. */ +# define JEMALLOC_INLINE +# define inline +#else +# define JEMALLOC_ENABLE_INLINE +# define JEMALLOC_INLINE static inline +#endif + +/* Size of stack-allocated buffer passed to strerror_r(). */ +#define STRERROR_BUF 64 + +/* Minimum alignment of allocations is 2^LG_QUANTUM bytes. */ +#ifdef __i386__ +# define LG_QUANTUM 4 +#endif +#ifdef __ia64__ +# define LG_QUANTUM 4 +#endif +#ifdef __alpha__ +# define LG_QUANTUM 4 +#endif +#ifdef __sparc64__ +# define LG_QUANTUM 4 +#endif +#if (defined(__amd64__) || defined(__x86_64__)) +# define LG_QUANTUM 4 +#endif +#ifdef __arm__ +# define LG_QUANTUM 3 +#endif +#ifdef __mips__ +# define LG_QUANTUM 3 +#endif +#ifdef __powerpc__ +# define LG_QUANTUM 4 +#endif +#ifdef __s390x__ +# define LG_QUANTUM 4 +#endif + +#define QUANTUM ((size_t)(1U << LG_QUANTUM)) +#define QUANTUM_MASK (QUANTUM - 1) + +/* Return the smallest quantum multiple that is >= a. */ +#define QUANTUM_CEILING(a) \ + (((a) + QUANTUM_MASK) & ~QUANTUM_MASK) + +#define SIZEOF_PTR (1U << LG_SIZEOF_PTR) + +/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */ +#if (!defined(PIC) && !defined(NO_TLS)) +# define NO_TLS +#endif + +/* + * Maximum size of L1 cache line. This is used to avoid cache line aliasing. + * In addition, this controls the spacing of cacheline-spaced size classes. + */ +#define LG_CACHELINE 6 +#define CACHELINE ((size_t)(1U << LG_CACHELINE)) +#define CACHELINE_MASK (CACHELINE - 1) + +/* Return the smallest cacheline multiple that is >= s. */ +#define CACHELINE_CEILING(s) \ + (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) + +/* + * Page size. STATIC_PAGE_SHIFT is determined by the configure script. If + * DYNAMIC_PAGE_SHIFT is enabled, only use the STATIC_PAGE_* macros where + * compile-time values are required for the purposes of defining data + * structures. + */ +#define STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT)) +#define STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1)) + +#ifdef DYNAMIC_PAGE_SHIFT +# define PAGE_SHIFT lg_pagesize +# define PAGE_SIZE pagesize +# define PAGE_MASK pagesize_mask +#else +# define PAGE_SHIFT STATIC_PAGE_SHIFT +# define PAGE_SIZE STATIC_PAGE_SIZE +# define PAGE_MASK STATIC_PAGE_MASK +#endif + +/* Return the smallest pagesize multiple that is >= s. */ +#define PAGE_CEILING(s) \ + (((s) + PAGE_MASK) & ~PAGE_MASK) + +#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/prof.h" + +#undef JEMALLOC_H_TYPES +/******************************************************************************/ +#define JEMALLOC_H_STRUCTS + +#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/prof.h" + +#undef JEMALLOC_H_STRUCTS +/******************************************************************************/ +#define JEMALLOC_H_EXTERNS + +extern bool opt_abort; +#ifdef JEMALLOC_FILL +extern bool opt_junk; +#endif +#ifdef JEMALLOC_SYSV +extern bool opt_sysv; +#endif +#ifdef JEMALLOC_XMALLOC +extern bool opt_xmalloc; +#endif +#ifdef JEMALLOC_FILL +extern bool opt_zero; +#endif + +#ifdef DYNAMIC_PAGE_SHIFT +extern size_t pagesize; +extern size_t pagesize_mask; +extern size_t lg_pagesize; +#endif + +/* Number of CPUs. */ +extern unsigned ncpus; + +extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ +#ifndef NO_TLS +/* + * Map of pthread_self() --> arenas[???], used for selecting an arena to use + * for allocations. + */ +extern __thread arena_t *arenas_map JEMALLOC_ATTR(tls_model("initial-exec")); +#endif +/* + * Arenas that are used to service external requests. Not all elements of the + * arenas array are necessarily used; arenas are created lazily as needed. + */ +extern arena_t **arenas; +extern unsigned narenas; + +arena_t *arenas_extend(unsigned ind); +#ifndef NO_TLS +arena_t *choose_arena_hard(void); +#endif + +#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/prof.h" + +#undef JEMALLOC_H_EXTERNS +/******************************************************************************/ +#define JEMALLOC_H_INLINES + +#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" + +#ifndef JEMALLOC_ENABLE_INLINE +void malloc_write(const char *s); +arena_t *choose_arena(void); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +/* + * Wrapper around malloc_message() that avoids the need for + * JEMALLOC_P(malloc_message)(...) throughout the code. + */ +JEMALLOC_INLINE void +malloc_write(const char *s) +{ + + JEMALLOC_P(malloc_message)(NULL, s); +} + +/* + * Choose an arena based on a per-thread value (fast-path code, calls slow-path + * code if necessary). + */ +JEMALLOC_INLINE arena_t * +choose_arena(void) +{ + arena_t *ret; + + /* + * We can only use TLS if this is a PIC library, since for the static + * library version, libc's malloc is used by TLS allocation, which + * introduces a bootstrapping issue. + */ +#ifndef NO_TLS + ret = arenas_map; + if (ret == NULL) { + ret = choose_arena_hard(); + assert(ret != NULL); + } +#else + if (isthreaded && narenas > 1) { + unsigned long ind; + + /* + * Hash pthread_self() to one of the arenas. There is a prime + * number of arenas, so this has a reasonable chance of + * working. Even so, the hashing can be easily thwarted by + * inconvenient pthread_self() values. Without specific + * knowledge of how pthread_self() calculates values, we can't + * easily do much better than this. + */ + ind = (unsigned long) pthread_self() % narenas; + + /* + * Optimistially assume that arenas[ind] has been initialized. + * At worst, we find out that some other thread has already + * done so, after acquiring the lock in preparation. Note that + * this lazy locking also has the effect of lazily forcing + * cache coherency; without the lock acquisition, there's no + * guarantee that modification of arenas[ind] by another thread + * would be seen on this CPU for an arbitrary amount of time. + * + * In general, this approach to modifying a synchronized value + * isn't a good idea, but in this case we only ever modify the + * value once, so things work out well. + */ + ret = arenas[ind]; + if (ret == NULL) { + /* + * Avoid races with another thread that may have already + * initialized arenas[ind]. + */ + malloc_mutex_lock(&arenas_lock); + if (arenas[ind] == NULL) + ret = arenas_extend((unsigned)ind); + else + ret = arenas[ind]; + malloc_mutex_unlock(&arenas_lock); + } + } else + ret = arenas[0]; +#endif + + assert(ret != NULL); + return (ret); +} +#endif + +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/prof.h" + +#ifndef JEMALLOC_ENABLE_INLINE +void *imalloc(size_t size); +void *icalloc(size_t size); +void *ipalloc(size_t alignment, size_t size); +size_t isalloc(const void *ptr); +void *iralloc(void *ptr, size_t size); +void idalloc(void *ptr); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +JEMALLOC_INLINE void * +imalloc(size_t size) +{ + + assert(size != 0); + + if (size <= arena_maxclass) + return (arena_malloc(size, false)); + else + return (huge_malloc(size, false)); +} + +JEMALLOC_INLINE void * +icalloc(size_t size) +{ + + if (size <= arena_maxclass) + return (arena_malloc(size, true)); + else + return (huge_malloc(size, true)); +} + +JEMALLOC_INLINE void * +ipalloc(size_t alignment, size_t size) +{ + void *ret; + size_t ceil_size; + + /* + * Round size up to the nearest multiple of alignment. + * + * This done, we can take advantage of the fact that for each small + * size class, every object is aligned at the smallest power of two + * that is non-zero in the base two representation of the size. For + * example: + * + * Size | Base 2 | Minimum alignment + * -----+----------+------------------ + * 96 | 1100000 | 32 + * 144 | 10100000 | 32 + * 192 | 11000000 | 64 + * + * Depending on runtime settings, it is possible that arena_malloc() + * will further round up to a power of two, but that never causes + * correctness issues. + */ + ceil_size = (size + (alignment - 1)) & (-alignment); + /* + * (ceil_size < size) protects against the combination of maximal + * alignment and size greater than maximal alignment. + */ + if (ceil_size < size) { + /* size_t overflow. */ + return (NULL); + } + + if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE + && ceil_size <= arena_maxclass)) + ret = arena_malloc(ceil_size, false); + else { + size_t run_size; + + /* + * We can't achieve subpage alignment, so round up alignment + * permanently; it makes later calculations simpler. + */ + alignment = PAGE_CEILING(alignment); + ceil_size = PAGE_CEILING(size); + /* + * (ceil_size < size) protects against very large sizes within + * PAGE_SIZE of SIZE_T_MAX. + * + * (ceil_size + alignment < ceil_size) protects against the + * combination of maximal alignment and ceil_size large enough + * to cause overflow. This is similar to the first overflow + * check above, but it needs to be repeated due to the new + * ceil_size value, which may now be *equal* to maximal + * alignment, whereas before we only detected overflow if the + * original size was *greater* than maximal alignment. + */ + if (ceil_size < size || ceil_size + alignment < ceil_size) { + /* size_t overflow. */ + return (NULL); + } + + /* + * Calculate the size of the over-size run that arena_palloc() + * would need to allocate in order to guarantee the alignment. + */ + if (ceil_size >= alignment) + run_size = ceil_size + alignment - PAGE_SIZE; + else { + /* + * It is possible that (alignment << 1) will cause + * overflow, but it doesn't matter because we also + * subtract PAGE_SIZE, which in the case of overflow + * leaves us with a very large run_size. That causes + * the first conditional below to fail, which means + * that the bogus run_size value never gets used for + * anything important. + */ + run_size = (alignment << 1) - PAGE_SIZE; + } + + if (run_size <= arena_maxclass) { + ret = arena_palloc(choose_arena(), alignment, ceil_size, + run_size); + } else if (alignment <= chunksize) + ret = huge_malloc(ceil_size, false); + else + ret = huge_palloc(alignment, ceil_size); + } + + assert(((uintptr_t)ret & (alignment - 1)) == 0); + return (ret); +} + +JEMALLOC_INLINE size_t +isalloc(const void *ptr) +{ + size_t ret; + arena_chunk_t *chunk; + + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) { + /* Region. */ + assert(chunk->arena->magic == ARENA_MAGIC); + +#ifdef JEMALLOC_PROF + ret = arena_salloc_demote(ptr); +#else + ret = arena_salloc(ptr); +#endif + } else + ret = huge_salloc(ptr); + + return (ret); +} + +JEMALLOC_INLINE void * +iralloc(void *ptr, size_t size) +{ + size_t oldsize; + + assert(ptr != NULL); + assert(size != 0); + + oldsize = isalloc(ptr); + + if (size <= arena_maxclass) + return (arena_ralloc(ptr, size, oldsize)); + else + return (huge_ralloc(ptr, size, oldsize)); +} + +JEMALLOC_INLINE void +idalloc(void *ptr) +{ + arena_chunk_t *chunk; + + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) + arena_dalloc(chunk->arena, chunk, ptr); + else + huge_dalloc(ptr); +} +#endif + +#undef JEMALLOC_H_INLINES +/******************************************************************************/ diff --git a/dep/include/jemalloc/internal/mb.h b/dep/include/jemalloc/internal/mb.h new file mode 100644 index 00000000000..1707aa91d68 --- /dev/null +++ b/dep/include/jemalloc/internal/mb.h @@ -0,0 +1,108 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void mb_write(void); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(MB_C_)) +#ifdef __i386__ +/* + * According to the Intel Architecture Software Developer's Manual, current + * processors execute instructions in order from the perspective of other + * processors in a multiprocessor system, but 1) Intel reserves the right to + * change that, and 2) the compiler's optimizer could re-order instructions if + * there weren't some form of barrier. Therefore, even if running on an + * architecture that does not need memory barriers (everything through at least + * i686), an "optimizer barrier" is necessary. + */ +JEMALLOC_INLINE void +mb_write(void) +{ + +# if 0 + /* This is a true memory barrier. */ + asm volatile ("pusha;" + "xor %%eax,%%eax;" + "cpuid;" + "popa;" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +#else + /* + * This is hopefully enough to keep the compiler from reordering + * instructions around this one. + */ + asm volatile ("nop;" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +#endif +} +#elif (defined(__amd64_) || defined(__x86_64__)) +JEMALLOC_INLINE void +mb_write(void) +{ + + asm volatile ("sfence" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +#elif defined(__powerpc__) +JEMALLOC_INLINE void +mb_write(void) +{ + + asm volatile ("eieio" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +#elif defined(__sparc64__) +JEMALLOC_INLINE void +mb_write(void) +{ + + asm volatile ("membar #StoreStore" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +#else +/* + * This is much slower than a simple memory barrier, but the semantics of mutex + * unlock make this work. + */ +JEMALLOC_INLINE void +mb_write(void) +{ + malloc_mutex_t mtx; + + malloc_mutex_init(&mtx); + malloc_mutex_lock(&mtx); + malloc_mutex_unlock(&mtx); +} +#endif +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/dep/include/jemalloc/internal/mutex.h b/dep/include/jemalloc/internal/mutex.h new file mode 100644 index 00000000000..108bfa8abfd --- /dev/null +++ b/dep/include/jemalloc/internal/mutex.h @@ -0,0 +1,61 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef pthread_mutex_t malloc_mutex_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#ifdef JEMALLOC_LAZY_LOCK +extern bool isthreaded; +#else +# define isthreaded true +#endif + +bool malloc_mutex_init(malloc_mutex_t *mutex); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void malloc_mutex_lock(malloc_mutex_t *mutex); +bool malloc_mutex_trylock(malloc_mutex_t *mutex); +void malloc_mutex_unlock(malloc_mutex_t *mutex); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) +JEMALLOC_INLINE void +malloc_mutex_lock(malloc_mutex_t *mutex) +{ + + if (isthreaded) + pthread_mutex_lock(mutex); +} + +JEMALLOC_INLINE bool +malloc_mutex_trylock(malloc_mutex_t *mutex) +{ + + if (isthreaded) + return (pthread_mutex_trylock(mutex) != 0); + else + return (false); +} + +JEMALLOC_INLINE void +malloc_mutex_unlock(malloc_mutex_t *mutex) +{ + + if (isthreaded) + pthread_mutex_unlock(mutex); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/dep/include/jemalloc/internal/prn.h b/dep/include/jemalloc/internal/prn.h new file mode 100644 index 00000000000..0709d708012 --- /dev/null +++ b/dep/include/jemalloc/internal/prn.h @@ -0,0 +1,60 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * Simple linear congruential pseudo-random number generator: + * + * prn(y) = (a*x + c) % m + * + * where the following constants ensure maximal period: + * + * a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4. + * c == Odd number (relatively prime to 2^n). + * m == 2^32 + * + * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints. + * + * This choice of m has the disadvantage that the quality of the bits is + * proportional to bit position. For example. the lowest bit has a cycle of 2, + * the next has a cycle of 4, etc. For this reason, we prefer to use the upper + * bits. + * + * Macro parameters: + * uint32_t r : Result. + * unsigned lg_range : (0..32], number of least significant bits to return. + * uint32_t state : Seed value. + * const uint32_t a, c : See above discussion. + */ +#define prn32(r, lg_range, state, a, c) do { \ + assert(lg_range > 0); \ + assert(lg_range <= 32); \ + \ + r = (state * (a)) + (c); \ + state = r; \ + r >>= (32 - lg_range); \ +} while (false) + +/* Same as prn32(), but 64 bits of pseudo-randomness, using uint64_t. */ +#define prn64(r, lg_range, state, a, c) do { \ + assert(lg_range > 0); \ + assert(lg_range <= 64); \ + \ + r = (state * (a)) + (c); \ + state = r; \ + r >>= (64 - lg_range); \ +} while (false) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/dep/include/jemalloc/internal/prof.h b/dep/include/jemalloc/internal/prof.h new file mode 100644 index 00000000000..6e71552d85e --- /dev/null +++ b/dep/include/jemalloc/internal/prof.h @@ -0,0 +1,171 @@ +#ifdef JEMALLOC_PROF +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct prof_bt_s prof_bt_t; +typedef struct prof_cnt_s prof_cnt_t; +typedef struct prof_thr_cnt_s prof_thr_cnt_t; +typedef struct prof_ctx_s prof_ctx_t; +typedef struct prof_s prof_t; + +/* Option defaults. */ +#define LG_PROF_BT_MAX_DEFAULT 2 +#define LG_PROF_SAMPLE_DEFAULT 0 +#define LG_PROF_INTERVAL_DEFAULT 30 + +/* + * Hard limit on stack backtrace depth. Note that the version of + * prof_backtrace() that is based on __builtin_return_address() necessarily has + * a hard-coded number of backtrace frame handlers, so increasing + * LG_PROF_BT_MAX requires changing prof_backtrace(). + */ +#define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */ +#define PROF_BT_MAX (1U << LG_PROF_BT_MAX) + +/* Initial hash table size. */ +#define PROF_CKH_MINITEMS 64 + +/* Size of memory buffer to use when writing dump files. */ +#define PROF_DUMP_BUF_SIZE 65536 + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct prof_bt_s { + /* Backtrace, stored as len program counters. */ + void **vec; + unsigned len; +}; + +#ifdef JEMALLOC_PROF_LIBGCC +/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ +typedef struct { + prof_bt_t *bt; + unsigned nignore; + unsigned max; +} prof_unwind_data_t; +#endif + +struct prof_cnt_s { + /* + * Profiling counters. An allocation/deallocation pair can operate on + * different prof_thr_cnt_t objects that are linked into the same + * prof_ctx_t sets_ql, so it is possible for the cur* counters to go + * negative. In principle it is possible for the *bytes counters to + * overflow/underflow, but a general solution would require some form + * of 128-bit counter solution; this implementation doesn't bother to + * solve that problem. + */ + int64_t curobjs; + int64_t curbytes; + uint64_t accumobjs; + uint64_t accumbytes; +}; + +struct prof_thr_cnt_s { + /* Linkage into prof_ctx_t's sets_ql. */ + ql_elm(prof_thr_cnt_t) link; + + /* + * Associated context. If a thread frees an object that it did not + * allocate, it is possible that the context is not cached in the + * thread's hash table, in which case it must be able to look up the + * context, insert a new prof_thr_cnt_t into the thread's hash table, + * and link it into the prof_ctx_t's sets_ql. + */ + prof_ctx_t *ctx; + + /* + * Threads use memory barriers to update the counters. Since there is + * only ever one writer, the only challenge is for the reader to get a + * consistent read of the counters. + * + * The writer uses this series of operations: + * + * 1) Increment epoch to an odd number. + * 2) Update counters. + * 3) Increment epoch to an even number. + * + * The reader must assure 1) that the epoch is even while it reads the + * counters, and 2) that the epoch doesn't change between the time it + * starts and finishes reading the counters. + */ + unsigned epoch; + + /* Profiling counters. */ + prof_cnt_t cnts; +}; + +struct prof_ctx_s { + /* Protects cnt_merged and sets_ql. */ + malloc_mutex_t lock; + + /* Temporary storage for aggregation during dump. */ + prof_cnt_t cnt_dump; + + /* When threads exit, they merge their stats into cnt_merged. */ + prof_cnt_t cnt_merged; + + /* + * List of profile counters, one for each thread that has allocated in + * this context. + */ + ql_head(prof_thr_cnt_t) cnts_ql; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern bool opt_prof; +/* + * Even if opt_prof is true, sampling can be temporarily disabled by setting + * opt_prof_active to false. No locking is used when updating opt_prof_active, + * so there are no guarantees regarding how long it will take for all threads + * to notice state changes. + */ +extern bool opt_prof_active; +extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */ +extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ +extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ +extern bool opt_prof_udump; /* High-water memory dumping. */ +extern bool opt_prof_leak; /* Dump leak summary at exit. */ + +/* + * Profile dump interval, measured in bytes allocated. Each arena triggers a + * profile dump when it reaches this threshold. The effect is that the + * interval between profile dumps averages prof_interval, though the actual + * interval between dumps will tend to be sporadic, and the interval will be a + * maximum of approximately (prof_interval * narenas). + */ +extern uint64_t prof_interval; + +/* + * If true, promote small sampled objects to large objects, since small run + * headers do not have embedded profile context pointers. + */ +extern bool prof_promote; + +bool prof_init(prof_t *prof, bool master); +void prof_destroy(prof_t *prof); + +prof_thr_cnt_t *prof_alloc_prep(size_t size); +prof_thr_cnt_t *prof_cnt_get(const void *ptr); +void prof_malloc(const void *ptr, prof_thr_cnt_t *cnt); +void prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, + size_t old_size, prof_thr_cnt_t *old_cnt); +void prof_free(const void *ptr); +void prof_idump(void); +bool prof_mdump(const char *filename); +void prof_udump(void); +void prof_boot0(void); +bool prof_boot1(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ +#endif /* JEMALLOC_PROF */ diff --git a/dep/include/jemalloc/internal/ql.h b/dep/include/jemalloc/internal/ql.h new file mode 100644 index 00000000000..a9ed2393f0c --- /dev/null +++ b/dep/include/jemalloc/internal/ql.h @@ -0,0 +1,83 @@ +/* + * List definitions. + */ +#define ql_head(a_type) \ +struct { \ + a_type *qlh_first; \ +} + +#define ql_head_initializer(a_head) {NULL} + +#define ql_elm(a_type) qr(a_type) + +/* List functions. */ +#define ql_new(a_head) do { \ + (a_head)->qlh_first = NULL; \ +} while (0) + +#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field) + +#define ql_first(a_head) ((a_head)->qlh_first) + +#define ql_last(a_head, a_field) \ + ((ql_first(a_head) != NULL) \ + ? qr_prev(ql_first(a_head), a_field) : NULL) + +#define ql_next(a_head, a_elm, a_field) \ + ((ql_last(a_head, a_field) != (a_elm)) \ + ? qr_next((a_elm), a_field) : NULL) + +#define ql_prev(a_head, a_elm, a_field) \ + ((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field) \ + : NULL) + +#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do { \ + qr_before_insert((a_qlelm), (a_elm), a_field); \ + if (ql_first(a_head) == (a_qlelm)) { \ + ql_first(a_head) = (a_elm); \ + } \ +} while (0) + +#define ql_after_insert(a_qlelm, a_elm, a_field) \ + qr_after_insert((a_qlelm), (a_elm), a_field) + +#define ql_head_insert(a_head, a_elm, a_field) do { \ + if (ql_first(a_head) != NULL) { \ + qr_before_insert(ql_first(a_head), (a_elm), a_field); \ + } \ + ql_first(a_head) = (a_elm); \ +} while (0) + +#define ql_tail_insert(a_head, a_elm, a_field) do { \ + if (ql_first(a_head) != NULL) { \ + qr_before_insert(ql_first(a_head), (a_elm), a_field); \ + } \ + ql_first(a_head) = qr_next((a_elm), a_field); \ +} while (0) + +#define ql_remove(a_head, a_elm, a_field) do { \ + if (ql_first(a_head) == (a_elm)) { \ + ql_first(a_head) = qr_next(ql_first(a_head), a_field); \ + } \ + if (ql_first(a_head) != (a_elm)) { \ + qr_remove((a_elm), a_field); \ + } else { \ + ql_first(a_head) = NULL; \ + } \ +} while (0) + +#define ql_head_remove(a_head, a_type, a_field) do { \ + a_type *t = ql_first(a_head); \ + ql_remove((a_head), t, a_field); \ +} while (0) + +#define ql_tail_remove(a_head, a_type, a_field) do { \ + a_type *t = ql_last(a_head, a_field); \ + ql_remove((a_head), t, a_field); \ +} while (0) + +#define ql_foreach(a_var, a_head, a_field) \ + qr_foreach((a_var), ql_first(a_head), a_field) + +#define ql_reverse_foreach(a_var, a_head, a_field) \ + qr_reverse_foreach((a_var), ql_first(a_head), a_field) diff --git a/dep/include/jemalloc/internal/qr.h b/dep/include/jemalloc/internal/qr.h new file mode 100644 index 00000000000..fe22352fedd --- /dev/null +++ b/dep/include/jemalloc/internal/qr.h @@ -0,0 +1,67 @@ +/* Ring definitions. */ +#define qr(a_type) \ +struct { \ + a_type *qre_next; \ + a_type *qre_prev; \ +} + +/* Ring functions. */ +#define qr_new(a_qr, a_field) do { \ + (a_qr)->a_field.qre_next = (a_qr); \ + (a_qr)->a_field.qre_prev = (a_qr); \ +} while (0) + +#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next) + +#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev) + +#define qr_before_insert(a_qrelm, a_qr, a_field) do { \ + (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev; \ + (a_qr)->a_field.qre_next = (a_qrelm); \ + (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr); \ + (a_qrelm)->a_field.qre_prev = (a_qr); \ +} while (0) + +#define qr_after_insert(a_qrelm, a_qr, a_field) \ + do \ + { \ + (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next; \ + (a_qr)->a_field.qre_prev = (a_qrelm); \ + (a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr); \ + (a_qrelm)->a_field.qre_next = (a_qr); \ + } while (0) + +#define qr_meld(a_qr_a, a_qr_b, a_field) do { \ + void *t; \ + (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b); \ + (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a); \ + t = (a_qr_a)->a_field.qre_prev; \ + (a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev; \ + (a_qr_b)->a_field.qre_prev = t; \ +} while (0) + +/* qr_meld() and qr_split() are functionally equivalent, so there's no need to + * have two copies of the code. */ +#define qr_split(a_qr_a, a_qr_b, a_field) \ + qr_meld((a_qr_a), (a_qr_b), a_field) + +#define qr_remove(a_qr, a_field) do { \ + (a_qr)->a_field.qre_prev->a_field.qre_next \ + = (a_qr)->a_field.qre_next; \ + (a_qr)->a_field.qre_next->a_field.qre_prev \ + = (a_qr)->a_field.qre_prev; \ + (a_qr)->a_field.qre_next = (a_qr); \ + (a_qr)->a_field.qre_prev = (a_qr); \ +} while (0) + +#define qr_foreach(var, a_qr, a_field) \ + for ((var) = (a_qr); \ + (var) != NULL; \ + (var) = (((var)->a_field.qre_next != (a_qr)) \ + ? (var)->a_field.qre_next : NULL)) + +#define qr_reverse_foreach(var, a_qr, a_field) \ + for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL; \ + (var) != NULL; \ + (var) = (((var) != (a_qr)) \ + ? (var)->a_field.qre_prev : NULL)) diff --git a/dep/include/jemalloc/internal/rb.h b/dep/include/jemalloc/internal/rb.h new file mode 100644 index 00000000000..ee9b009d235 --- /dev/null +++ b/dep/include/jemalloc/internal/rb.h @@ -0,0 +1,973 @@ +/*- + ******************************************************************************* + * + * cpp macro implementation of left-leaning 2-3 red-black trees. Parent + * pointers are not used, and color bits are stored in the least significant + * bit of right-child pointers (if RB_COMPACT is defined), thus making node + * linkage as compact as is possible for red-black trees. + * + * Usage: + * + * #include <stdint.h> + * #include <stdbool.h> + * #define NDEBUG // (Optional, see assert(3).) + * #include <assert.h> + * #define RB_COMPACT // (Optional, embed color bits in right-child pointers.) + * #include <rb.h> + * ... + * + ******************************************************************************* + */ + +#ifndef RB_H_ +#define RB_H_ + +#if 0 +__FBSDID("$FreeBSD: head/lib/libc/stdlib/rb.h 204493 2010-02-28 22:57:13Z jasone $"); +#endif + +#ifdef RB_COMPACT +/* Node structure. */ +#define rb_node(a_type) \ +struct { \ + a_type *rbn_left; \ + a_type *rbn_right_red; \ +} +#else +#define rb_node(a_type) \ +struct { \ + a_type *rbn_left; \ + a_type *rbn_right; \ + bool rbn_red; \ +} +#endif + +/* Root structure. */ +#define rb_tree(a_type) \ +struct { \ + a_type *rbt_root; \ + a_type rbt_nil; \ +} + +/* Left accessors. */ +#define rbtn_left_get(a_type, a_field, a_node) \ + ((a_node)->a_field.rbn_left) +#define rbtn_left_set(a_type, a_field, a_node, a_left) do { \ + (a_node)->a_field.rbn_left = a_left; \ +} while (0) + +#ifdef RB_COMPACT +/* Right accessors. */ +#define rbtn_right_get(a_type, a_field, a_node) \ + ((a_type *) (((intptr_t) (a_node)->a_field.rbn_right_red) \ + & ((ssize_t)-2))) +#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ + (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) a_right) \ + | (((uintptr_t) (a_node)->a_field.rbn_right_red) & ((size_t)1))); \ +} while (0) + +/* Color accessors. */ +#define rbtn_red_get(a_type, a_field, a_node) \ + ((bool) (((uintptr_t) (a_node)->a_field.rbn_right_red) \ + & ((size_t)1))) +#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ + (a_node)->a_field.rbn_right_red = (a_type *) ((((intptr_t) \ + (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)) \ + | ((ssize_t)a_red)); \ +} while (0) +#define rbtn_red_set(a_type, a_field, a_node) do { \ + (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) \ + (a_node)->a_field.rbn_right_red) | ((size_t)1)); \ +} while (0) +#define rbtn_black_set(a_type, a_field, a_node) do { \ + (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t) \ + (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)); \ +} while (0) +#else +/* Right accessors. */ +#define rbtn_right_get(a_type, a_field, a_node) \ + ((a_node)->a_field.rbn_right) +#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ + (a_node)->a_field.rbn_right = a_right; \ +} while (0) + +/* Color accessors. */ +#define rbtn_red_get(a_type, a_field, a_node) \ + ((a_node)->a_field.rbn_red) +#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ + (a_node)->a_field.rbn_red = (a_red); \ +} while (0) +#define rbtn_red_set(a_type, a_field, a_node) do { \ + (a_node)->a_field.rbn_red = true; \ +} while (0) +#define rbtn_black_set(a_type, a_field, a_node) do { \ + (a_node)->a_field.rbn_red = false; \ +} while (0) +#endif + +/* Node initializer. */ +#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ + rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ + rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ + rbtn_red_set(a_type, a_field, (a_node)); \ +} while (0) + +/* Tree initializer. */ +#define rb_new(a_type, a_field, a_rbt) do { \ + (a_rbt)->rbt_root = &(a_rbt)->rbt_nil; \ + rbt_node_new(a_type, a_field, a_rbt, &(a_rbt)->rbt_nil); \ + rbtn_black_set(a_type, a_field, &(a_rbt)->rbt_nil); \ +} while (0) + +/* Internal utility macros. */ +#define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do { \ + (r_node) = (a_root); \ + if ((r_node) != &(a_rbt)->rbt_nil) { \ + for (; \ + rbtn_left_get(a_type, a_field, (r_node)) != &(a_rbt)->rbt_nil;\ + (r_node) = rbtn_left_get(a_type, a_field, (r_node))) { \ + } \ + } \ +} while (0) + +#define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do { \ + (r_node) = (a_root); \ + if ((r_node) != &(a_rbt)->rbt_nil) { \ + for (; rbtn_right_get(a_type, a_field, (r_node)) != \ + &(a_rbt)->rbt_nil; (r_node) = rbtn_right_get(a_type, a_field, \ + (r_node))) { \ + } \ + } \ +} while (0) + +#define rbtn_rotate_left(a_type, a_field, a_node, r_node) do { \ + (r_node) = rbtn_right_get(a_type, a_field, (a_node)); \ + rbtn_right_set(a_type, a_field, (a_node), \ + rbtn_left_get(a_type, a_field, (r_node))); \ + rbtn_left_set(a_type, a_field, (r_node), (a_node)); \ +} while (0) + +#define rbtn_rotate_right(a_type, a_field, a_node, r_node) do { \ + (r_node) = rbtn_left_get(a_type, a_field, (a_node)); \ + rbtn_left_set(a_type, a_field, (a_node), \ + rbtn_right_get(a_type, a_field, (r_node))); \ + rbtn_right_set(a_type, a_field, (r_node), (a_node)); \ +} while (0) + +/* + * The rb_proto() macro generates function prototypes that correspond to the + * functions generated by an equivalently parameterized call to rb_gen(). + */ + +#define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \ +a_attr void \ +a_prefix##new(a_rbt_type *rbtree); \ +a_attr a_type * \ +a_prefix##first(a_rbt_type *rbtree); \ +a_attr a_type * \ +a_prefix##last(a_rbt_type *rbtree); \ +a_attr a_type * \ +a_prefix##next(a_rbt_type *rbtree, a_type *node); \ +a_attr a_type * \ +a_prefix##prev(a_rbt_type *rbtree, a_type *node); \ +a_attr a_type * \ +a_prefix##search(a_rbt_type *rbtree, a_type *key); \ +a_attr a_type * \ +a_prefix##nsearch(a_rbt_type *rbtree, a_type *key); \ +a_attr a_type * \ +a_prefix##psearch(a_rbt_type *rbtree, a_type *key); \ +a_attr void \ +a_prefix##insert(a_rbt_type *rbtree, a_type *node); \ +a_attr void \ +a_prefix##remove(a_rbt_type *rbtree, a_type *node); \ +a_attr a_type * \ +a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ + a_rbt_type *, a_type *, void *), void *arg); \ +a_attr a_type * \ +a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg); + +/* + * The rb_gen() macro generates a type-specific red-black tree implementation, + * based on the above cpp macros. + * + * Arguments: + * + * a_attr : Function attribute for generated functions (ex: static). + * a_prefix : Prefix for generated functions (ex: ex_). + * a_rb_type : Type for red-black tree data structure (ex: ex_t). + * a_type : Type for red-black tree node data structure (ex: ex_node_t). + * a_field : Name of red-black tree node linkage (ex: ex_link). + * a_cmp : Node comparison function name, with the following prototype: + * int (a_cmp *)(a_type *a_node, a_type *a_other); + * ^^^^^^ + * or a_key + * Interpretation of comparision function return values: + * -1 : a_node < a_other + * 0 : a_node == a_other + * 1 : a_node > a_other + * In all cases, the a_node or a_key macro argument is the first + * argument to the comparison function, which makes it possible + * to write comparison functions that treat the first argument + * specially. + * + * Assuming the following setup: + * + * typedef struct ex_node_s ex_node_t; + * struct ex_node_s { + * rb_node(ex_node_t) ex_link; + * }; + * typedef rb_tree(ex_node_t) ex_t; + * rb_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_cmp) + * + * The following API is generated: + * + * static void + * ex_new(ex_t *extree); + * Description: Initialize a red-black tree structure. + * Args: + * extree: Pointer to an uninitialized red-black tree object. + * + * static ex_node_t * + * ex_first(ex_t *extree); + * static ex_node_t * + * ex_last(ex_t *extree); + * Description: Get the first/last node in extree. + * Args: + * extree: Pointer to an initialized red-black tree object. + * Ret: First/last node in extree, or NULL if extree is empty. + * + * static ex_node_t * + * ex_next(ex_t *extree, ex_node_t *node); + * static ex_node_t * + * ex_prev(ex_t *extree, ex_node_t *node); + * Description: Get node's successor/predecessor. + * Args: + * extree: Pointer to an initialized red-black tree object. + * node : A node in extree. + * Ret: node's successor/predecessor in extree, or NULL if node is + * last/first. + * + * static ex_node_t * + * ex_search(ex_t *extree, ex_node_t *key); + * Description: Search for node that matches key. + * Args: + * extree: Pointer to an initialized red-black tree object. + * key : Search key. + * Ret: Node in extree that matches key, or NULL if no match. + * + * static ex_node_t * + * ex_nsearch(ex_t *extree, ex_node_t *key); + * static ex_node_t * + * ex_psearch(ex_t *extree, ex_node_t *key); + * Description: Search for node that matches key. If no match is found, + * return what would be key's successor/predecessor, were + * key in extree. + * Args: + * extree: Pointer to an initialized red-black tree object. + * key : Search key. + * Ret: Node in extree that matches key, or if no match, hypothetical + * node's successor/predecessor (NULL if no successor/predecessor). + * + * static void + * ex_insert(ex_t *extree, ex_node_t *node); + * Description: Insert node into extree. + * Args: + * extree: Pointer to an initialized red-black tree object. + * node : Node to be inserted into extree. + * + * static void + * ex_remove(ex_t *extree, ex_node_t *node); + * Description: Remove node from extree. + * Args: + * extree: Pointer to an initialized red-black tree object. + * node : Node in extree to be removed. + * + * static ex_node_t * + * ex_iter(ex_t *extree, ex_node_t *start, ex_node_t *(*cb)(ex_t *, + * ex_node_t *, void *), void *arg); + * static ex_node_t * + * ex_reverse_iter(ex_t *extree, ex_node_t *start, ex_node *(*cb)(ex_t *, + * ex_node_t *, void *), void *arg); + * Description: Iterate forward/backward over extree, starting at node. + * If extree is modified, iteration must be immediately + * terminated by the callback function that causes the + * modification. + * Args: + * extree: Pointer to an initialized red-black tree object. + * start : Node at which to start iteration, or NULL to start at + * first/last node. + * cb : Callback function, which is called for each node during + * iteration. Under normal circumstances the callback function + * should return NULL, which causes iteration to continue. If a + * callback function returns non-NULL, iteration is immediately + * terminated and the non-NULL return value is returned by the + * iterator. This is useful for re-starting iteration after + * modifying extree. + * arg : Opaque pointer passed to cb(). + * Ret: NULL if iteration completed, or the non-NULL callback return value + * that caused termination of the iteration. + */ +#define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \ +a_attr void \ +a_prefix##new(a_rbt_type *rbtree) { \ + rb_new(a_type, a_field, rbtree); \ +} \ +a_attr a_type * \ +a_prefix##first(a_rbt_type *rbtree) { \ + a_type *ret; \ + rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ + if (ret == &rbtree->rbt_nil) { \ + ret = NULL; \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##last(a_rbt_type *rbtree) { \ + a_type *ret; \ + rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ + if (ret == &rbtree->rbt_nil) { \ + ret = NULL; \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##next(a_rbt_type *rbtree, a_type *node) { \ + a_type *ret; \ + if (rbtn_right_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ + rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type, \ + a_field, node), ret); \ + } else { \ + a_type *tnode = rbtree->rbt_root; \ + assert(tnode != &rbtree->rbt_nil); \ + ret = &rbtree->rbt_nil; \ + while (true) { \ + int cmp = (a_cmp)(node, tnode); \ + if (cmp < 0) { \ + ret = tnode; \ + tnode = rbtn_left_get(a_type, a_field, tnode); \ + } else if (cmp > 0) { \ + tnode = rbtn_right_get(a_type, a_field, tnode); \ + } else { \ + break; \ + } \ + assert(tnode != &rbtree->rbt_nil); \ + } \ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ + a_type *ret; \ + if (rbtn_left_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ + rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type, \ + a_field, node), ret); \ + } else { \ + a_type *tnode = rbtree->rbt_root; \ + assert(tnode != &rbtree->rbt_nil); \ + ret = &rbtree->rbt_nil; \ + while (true) { \ + int cmp = (a_cmp)(node, tnode); \ + if (cmp < 0) { \ + tnode = rbtn_left_get(a_type, a_field, tnode); \ + } else if (cmp > 0) { \ + ret = tnode; \ + tnode = rbtn_right_get(a_type, a_field, tnode); \ + } else { \ + break; \ + } \ + assert(tnode != &rbtree->rbt_nil); \ + } \ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##search(a_rbt_type *rbtree, a_type *key) { \ + a_type *ret; \ + int cmp; \ + ret = rbtree->rbt_root; \ + while (ret != &rbtree->rbt_nil \ + && (cmp = (a_cmp)(key, ret)) != 0) { \ + if (cmp < 0) { \ + ret = rbtn_left_get(a_type, a_field, ret); \ + } else { \ + ret = rbtn_right_get(a_type, a_field, ret); \ + } \ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) { \ + a_type *ret; \ + a_type *tnode = rbtree->rbt_root; \ + ret = &rbtree->rbt_nil; \ + while (tnode != &rbtree->rbt_nil) { \ + int cmp = (a_cmp)(key, tnode); \ + if (cmp < 0) { \ + ret = tnode; \ + tnode = rbtn_left_get(a_type, a_field, tnode); \ + } else if (cmp > 0) { \ + tnode = rbtn_right_get(a_type, a_field, tnode); \ + } else { \ + ret = tnode; \ + break; \ + } \ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##psearch(a_rbt_type *rbtree, a_type *key) { \ + a_type *ret; \ + a_type *tnode = rbtree->rbt_root; \ + ret = &rbtree->rbt_nil; \ + while (tnode != &rbtree->rbt_nil) { \ + int cmp = (a_cmp)(key, tnode); \ + if (cmp < 0) { \ + tnode = rbtn_left_get(a_type, a_field, tnode); \ + } else if (cmp > 0) { \ + ret = tnode; \ + tnode = rbtn_right_get(a_type, a_field, tnode); \ + } else { \ + ret = tnode; \ + break; \ + } \ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ + return (ret); \ +} \ +a_attr void \ +a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ + struct { \ + a_type *node; \ + int cmp; \ + } path[sizeof(void *) << 4], *pathp; \ + rbt_node_new(a_type, a_field, rbtree, node); \ + /* Wind. */ \ + path->node = rbtree->rbt_root; \ + for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ + int cmp = pathp->cmp = a_cmp(node, pathp->node); \ + assert(cmp != 0); \ + if (cmp < 0) { \ + pathp[1].node = rbtn_left_get(a_type, a_field, \ + pathp->node); \ + } else { \ + pathp[1].node = rbtn_right_get(a_type, a_field, \ + pathp->node); \ + } \ + } \ + pathp->node = node; \ + /* Unwind. */ \ + for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \ + a_type *cnode = pathp->node; \ + if (pathp->cmp < 0) { \ + a_type *left = pathp[1].node; \ + rbtn_left_set(a_type, a_field, cnode, left); \ + if (rbtn_red_get(a_type, a_field, left)) { \ + a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ + if (rbtn_red_get(a_type, a_field, leftleft)) { \ + /* Fix up 4-node. */ \ + a_type *tnode; \ + rbtn_black_set(a_type, a_field, leftleft); \ + rbtn_rotate_right(a_type, a_field, cnode, tnode); \ + cnode = tnode; \ + } \ + } else { \ + return; \ + } \ + } else { \ + a_type *right = pathp[1].node; \ + rbtn_right_set(a_type, a_field, cnode, right); \ + if (rbtn_red_get(a_type, a_field, right)) { \ + a_type *left = rbtn_left_get(a_type, a_field, cnode); \ + if (rbtn_red_get(a_type, a_field, left)) { \ + /* Split 4-node. */ \ + rbtn_black_set(a_type, a_field, left); \ + rbtn_black_set(a_type, a_field, right); \ + rbtn_red_set(a_type, a_field, cnode); \ + } else { \ + /* Lean left. */ \ + a_type *tnode; \ + bool tred = rbtn_red_get(a_type, a_field, cnode); \ + rbtn_rotate_left(a_type, a_field, cnode, tnode); \ + rbtn_color_set(a_type, a_field, tnode, tred); \ + rbtn_red_set(a_type, a_field, cnode); \ + cnode = tnode; \ + } \ + } else { \ + return; \ + } \ + } \ + pathp->node = cnode; \ + } \ + /* Set root, and make it black. */ \ + rbtree->rbt_root = path->node; \ + rbtn_black_set(a_type, a_field, rbtree->rbt_root); \ +} \ +a_attr void \ +a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ + struct { \ + a_type *node; \ + int cmp; \ + } *pathp, *nodep, path[sizeof(void *) << 4]; \ + /* Wind. */ \ + nodep = NULL; /* Silence compiler warning. */ \ + path->node = rbtree->rbt_root; \ + for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ + int cmp = pathp->cmp = a_cmp(node, pathp->node); \ + if (cmp < 0) { \ + pathp[1].node = rbtn_left_get(a_type, a_field, \ + pathp->node); \ + } else { \ + pathp[1].node = rbtn_right_get(a_type, a_field, \ + pathp->node); \ + if (cmp == 0) { \ + /* Find node's successor, in preparation for swap. */ \ + pathp->cmp = 1; \ + nodep = pathp; \ + for (pathp++; pathp->node != &rbtree->rbt_nil; \ + pathp++) { \ + pathp->cmp = -1; \ + pathp[1].node = rbtn_left_get(a_type, a_field, \ + pathp->node); \ + } \ + break; \ + } \ + } \ + } \ + assert(nodep->node == node); \ + pathp--; \ + if (pathp->node != node) { \ + /* Swap node with its successor. */ \ + bool tred = rbtn_red_get(a_type, a_field, pathp->node); \ + rbtn_color_set(a_type, a_field, pathp->node, \ + rbtn_red_get(a_type, a_field, node)); \ + rbtn_left_set(a_type, a_field, pathp->node, \ + rbtn_left_get(a_type, a_field, node)); \ + /* If node's successor is its right child, the following code */\ + /* will do the wrong thing for the right child pointer. */\ + /* However, it doesn't matter, because the pointer will be */\ + /* properly set when the successor is pruned. */\ + rbtn_right_set(a_type, a_field, pathp->node, \ + rbtn_right_get(a_type, a_field, node)); \ + rbtn_color_set(a_type, a_field, node, tred); \ + /* The pruned leaf node's child pointers are never accessed */\ + /* again, so don't bother setting them to nil. */\ + nodep->node = pathp->node; \ + pathp->node = node; \ + if (nodep == path) { \ + rbtree->rbt_root = nodep->node; \ + } else { \ + if (nodep[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, nodep[-1].node, \ + nodep->node); \ + } else { \ + rbtn_right_set(a_type, a_field, nodep[-1].node, \ + nodep->node); \ + } \ + } \ + } else { \ + a_type *left = rbtn_left_get(a_type, a_field, node); \ + if (left != &rbtree->rbt_nil) { \ + /* node has no successor, but it has a left child. */\ + /* Splice node out, without losing the left child. */\ + assert(rbtn_red_get(a_type, a_field, node) == false); \ + assert(rbtn_red_get(a_type, a_field, left)); \ + rbtn_black_set(a_type, a_field, left); \ + if (pathp == path) { \ + rbtree->rbt_root = left; \ + } else { \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + left); \ + } else { \ + rbtn_right_set(a_type, a_field, pathp[-1].node, \ + left); \ + } \ + } \ + return; \ + } else if (pathp == path) { \ + /* The tree only contained one node. */ \ + rbtree->rbt_root = &rbtree->rbt_nil; \ + return; \ + } \ + } \ + if (rbtn_red_get(a_type, a_field, pathp->node)) { \ + /* Prune red node, which requires no fixup. */ \ + assert(pathp[-1].cmp < 0); \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + &rbtree->rbt_nil); \ + return; \ + } \ + /* The node to be pruned is black, so unwind until balance is */\ + /* restored. */\ + pathp->node = &rbtree->rbt_nil; \ + for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \ + assert(pathp->cmp != 0); \ + if (pathp->cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp->node, \ + pathp[1].node); \ + assert(rbtn_red_get(a_type, a_field, pathp[1].node) \ + == false); \ + if (rbtn_red_get(a_type, a_field, pathp->node)) { \ + a_type *right = rbtn_right_get(a_type, a_field, \ + pathp->node); \ + a_type *rightleft = rbtn_left_get(a_type, a_field, \ + right); \ + a_type *tnode; \ + if (rbtn_red_get(a_type, a_field, rightleft)) { \ + /* In the following diagrams, ||, //, and \\ */\ + /* indicate the path to the removed node. */\ + /* */\ + /* || */\ + /* pathp(r) */\ + /* // \ */\ + /* (b) (b) */\ + /* / */\ + /* (r) */\ + /* */\ + rbtn_black_set(a_type, a_field, pathp->node); \ + rbtn_rotate_right(a_type, a_field, right, tnode); \ + rbtn_right_set(a_type, a_field, pathp->node, tnode);\ + rbtn_rotate_left(a_type, a_field, pathp->node, \ + tnode); \ + } else { \ + /* || */\ + /* pathp(r) */\ + /* // \ */\ + /* (b) (b) */\ + /* / */\ + /* (b) */\ + /* */\ + rbtn_rotate_left(a_type, a_field, pathp->node, \ + tnode); \ + } \ + /* Balance restored, but rotation modified subtree */\ + /* root. */\ + assert((uintptr_t)pathp > (uintptr_t)path); \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } \ + return; \ + } else { \ + a_type *right = rbtn_right_get(a_type, a_field, \ + pathp->node); \ + a_type *rightleft = rbtn_left_get(a_type, a_field, \ + right); \ + if (rbtn_red_get(a_type, a_field, rightleft)) { \ + /* || */\ + /* pathp(b) */\ + /* // \ */\ + /* (b) (b) */\ + /* / */\ + /* (r) */\ + a_type *tnode; \ + rbtn_black_set(a_type, a_field, rightleft); \ + rbtn_rotate_right(a_type, a_field, right, tnode); \ + rbtn_right_set(a_type, a_field, pathp->node, tnode);\ + rbtn_rotate_left(a_type, a_field, pathp->node, \ + tnode); \ + /* Balance restored, but rotation modified */\ + /* subree root, which may actually be the tree */\ + /* root. */\ + if (pathp == path) { \ + /* Set root. */ \ + rbtree->rbt_root = tnode; \ + } else { \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, \ + pathp[-1].node, tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, \ + pathp[-1].node, tnode); \ + } \ + } \ + return; \ + } else { \ + /* || */\ + /* pathp(b) */\ + /* // \ */\ + /* (b) (b) */\ + /* / */\ + /* (b) */\ + a_type *tnode; \ + rbtn_red_set(a_type, a_field, pathp->node); \ + rbtn_rotate_left(a_type, a_field, pathp->node, \ + tnode); \ + pathp->node = tnode; \ + } \ + } \ + } else { \ + a_type *left; \ + rbtn_right_set(a_type, a_field, pathp->node, \ + pathp[1].node); \ + left = rbtn_left_get(a_type, a_field, pathp->node); \ + if (rbtn_red_get(a_type, a_field, left)) { \ + a_type *tnode; \ + a_type *leftright = rbtn_right_get(a_type, a_field, \ + left); \ + a_type *leftrightleft = rbtn_left_get(a_type, a_field, \ + leftright); \ + if (rbtn_red_get(a_type, a_field, leftrightleft)) { \ + /* || */\ + /* pathp(b) */\ + /* / \\ */\ + /* (r) (b) */\ + /* \ */\ + /* (b) */\ + /* / */\ + /* (r) */\ + a_type *unode; \ + rbtn_black_set(a_type, a_field, leftrightleft); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + unode); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + tnode); \ + rbtn_right_set(a_type, a_field, unode, tnode); \ + rbtn_rotate_left(a_type, a_field, unode, tnode); \ + } else { \ + /* || */\ + /* pathp(b) */\ + /* / \\ */\ + /* (r) (b) */\ + /* \ */\ + /* (b) */\ + /* / */\ + /* (b) */\ + assert(leftright != &rbtree->rbt_nil); \ + rbtn_red_set(a_type, a_field, leftright); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + tnode); \ + rbtn_black_set(a_type, a_field, tnode); \ + } \ + /* Balance restored, but rotation modified subtree */\ + /* root, which may actually be the tree root. */\ + if (pathp == path) { \ + /* Set root. */ \ + rbtree->rbt_root = tnode; \ + } else { \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } \ + } \ + return; \ + } else if (rbtn_red_get(a_type, a_field, pathp->node)) { \ + a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ + if (rbtn_red_get(a_type, a_field, leftleft)) { \ + /* || */\ + /* pathp(r) */\ + /* / \\ */\ + /* (b) (b) */\ + /* / */\ + /* (r) */\ + a_type *tnode; \ + rbtn_black_set(a_type, a_field, pathp->node); \ + rbtn_red_set(a_type, a_field, left); \ + rbtn_black_set(a_type, a_field, leftleft); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + tnode); \ + /* Balance restored, but rotation modified */\ + /* subtree root. */\ + assert((uintptr_t)pathp > (uintptr_t)path); \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } \ + return; \ + } else { \ + /* || */\ + /* pathp(r) */\ + /* / \\ */\ + /* (b) (b) */\ + /* / */\ + /* (b) */\ + rbtn_red_set(a_type, a_field, left); \ + rbtn_black_set(a_type, a_field, pathp->node); \ + /* Balance restored. */ \ + return; \ + } \ + } else { \ + a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ + if (rbtn_red_get(a_type, a_field, leftleft)) { \ + /* || */\ + /* pathp(b) */\ + /* / \\ */\ + /* (b) (b) */\ + /* / */\ + /* (r) */\ + a_type *tnode; \ + rbtn_black_set(a_type, a_field, leftleft); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + tnode); \ + /* Balance restored, but rotation modified */\ + /* subtree root, which may actually be the tree */\ + /* root. */\ + if (pathp == path) { \ + /* Set root. */ \ + rbtree->rbt_root = tnode; \ + } else { \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, \ + pathp[-1].node, tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, \ + pathp[-1].node, tnode); \ + } \ + } \ + return; \ + } else { \ + /* || */\ + /* pathp(b) */\ + /* / \\ */\ + /* (b) (b) */\ + /* / */\ + /* (b) */\ + rbtn_red_set(a_type, a_field, left); \ + } \ + } \ + } \ + } \ + /* Set root. */ \ + rbtree->rbt_root = path->node; \ + assert(rbtn_red_get(a_type, a_field, rbtree->rbt_root) == false); \ +} \ +a_attr a_type * \ +a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ + if (node == &rbtree->rbt_nil) { \ + return (&rbtree->rbt_nil); \ + } else { \ + a_type *ret; \ + if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type, \ + a_field, node), cb, arg)) != &rbtree->rbt_nil \ + || (ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ + a_field, node), cb, arg)); \ + } \ +} \ +a_attr a_type * \ +a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ + int cmp = a_cmp(start, node); \ + if (cmp < 0) { \ + a_type *ret; \ + if ((ret = a_prefix##iter_start(rbtree, start, \ + rbtn_left_get(a_type, a_field, node), cb, arg)) != \ + &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ + a_field, node), cb, arg)); \ + } else if (cmp > 0) { \ + return (a_prefix##iter_start(rbtree, start, \ + rbtn_right_get(a_type, a_field, node), cb, arg)); \ + } else { \ + a_type *ret; \ + if ((ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ + a_field, node), cb, arg)); \ + } \ +} \ +a_attr a_type * \ +a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ + a_rbt_type *, a_type *, void *), void *arg) { \ + a_type *ret; \ + if (start != NULL) { \ + ret = a_prefix##iter_start(rbtree, start, rbtree->rbt_root, \ + cb, arg); \ + } else { \ + ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = NULL; \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ + if (node == &rbtree->rbt_nil) { \ + return (&rbtree->rbt_nil); \ + } else { \ + a_type *ret; \ + if ((ret = a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_right_get(a_type, a_field, node), cb, arg)) != \ + &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_left_get(a_type, a_field, node), cb, arg)); \ + } \ +} \ +a_attr a_type * \ +a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start, \ + a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *), \ + void *arg) { \ + int cmp = a_cmp(start, node); \ + if (cmp > 0) { \ + a_type *ret; \ + if ((ret = a_prefix##reverse_iter_start(rbtree, start, \ + rbtn_right_get(a_type, a_field, node), cb, arg)) != \ + &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_left_get(a_type, a_field, node), cb, arg)); \ + } else if (cmp < 0) { \ + return (a_prefix##reverse_iter_start(rbtree, start, \ + rbtn_left_get(a_type, a_field, node), cb, arg)); \ + } else { \ + a_type *ret; \ + if ((ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_left_get(a_type, a_field, node), cb, arg)); \ + } \ +} \ +a_attr a_type * \ +a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ + a_type *ret; \ + if (start != NULL) { \ + ret = a_prefix##reverse_iter_start(rbtree, start, \ + rbtree->rbt_root, cb, arg); \ + } else { \ + ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root, \ + cb, arg); \ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = NULL; \ + } \ + return (ret); \ +} + +#endif /* RB_H_ */ diff --git a/dep/include/jemalloc/internal/stats.h b/dep/include/jemalloc/internal/stats.h new file mode 100644 index 00000000000..cbf035ff2b9 --- /dev/null +++ b/dep/include/jemalloc/internal/stats.h @@ -0,0 +1,174 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#define UMAX2S_BUFSIZE 65 + +#ifdef JEMALLOC_STATS +typedef struct tcache_bin_stats_s tcache_bin_stats_t; +typedef struct malloc_bin_stats_s malloc_bin_stats_t; +typedef struct malloc_large_stats_s malloc_large_stats_t; +typedef struct arena_stats_s arena_stats_t; +#endif +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) +typedef struct chunk_stats_s chunk_stats_t; +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#ifdef JEMALLOC_STATS + +#ifdef JEMALLOC_TCACHE +struct tcache_bin_stats_s { + /* + * Number of allocation requests that corresponded to the size of this + * bin. + */ + uint64_t nrequests; +}; +#endif + +struct malloc_bin_stats_s { + /* + * Current number of bytes allocated, including objects currently + * cached by tcache. + */ + size_t allocated; + + /* + * Total number of allocation/deallocation requests served directly by + * the bin. Note that tcache may allocate an object, then recycle it + * many times, resulting many increments to nrequests, but only one + * each to nmalloc and ndalloc. + */ + uint64_t nmalloc; + uint64_t ndalloc; + + /* + * Number of allocation requests that correspond to the size of this + * bin. This includes requests served by tcache, though tcache only + * periodically merges into this counter. + */ + uint64_t nrequests; + +#ifdef JEMALLOC_TCACHE + /* Number of tcache fills from this bin. */ + uint64_t nfills; + + /* Number of tcache flushes to this bin. */ + uint64_t nflushes; +#endif + + /* Total number of runs created for this bin's size class. */ + uint64_t nruns; + + /* + * Total number of runs reused by extracting them from the runs tree for + * this bin's size class. + */ + uint64_t reruns; + + /* High-water mark for this bin. */ + size_t highruns; + + /* Current number of runs in this bin. */ + size_t curruns; +}; + +struct malloc_large_stats_s { + /* + * Total number of allocation/deallocation requests served directly by + * the arena. Note that tcache may allocate an object, then recycle it + * many times, resulting many increments to nrequests, but only one + * each to nmalloc and ndalloc. + */ + uint64_t nmalloc; + uint64_t ndalloc; + + /* + * Number of allocation requests that correspond to this size class. + * This includes requests served by tcache, though tcache only + * periodically merges into this counter. + */ + uint64_t nrequests; + + /* High-water mark for this size class. */ + size_t highruns; + + /* Current number of runs of this size class. */ + size_t curruns; +}; + +struct arena_stats_s { + /* Number of bytes currently mapped. */ + size_t mapped; + + /* + * Total number of purge sweeps, total number of madvise calls made, + * and total pages purged in order to keep dirty unused memory under + * control. + */ + uint64_t npurge; + uint64_t nmadvise; + uint64_t purged; + + /* Per-size-category statistics. */ + size_t allocated_large; + uint64_t nmalloc_large; + uint64_t ndalloc_large; + uint64_t nrequests_large; + + /* + * One element for each possible size class, including sizes that + * overlap with bin size classes. This is necessary because ipalloc() + * sometimes has to use such large objects in order to assure proper + * alignment. + */ + malloc_large_stats_t *lstats; +}; +#endif /* JEMALLOC_STATS */ + +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) +struct chunk_stats_s { +# ifdef JEMALLOC_STATS + /* Number of chunks that were allocated. */ + uint64_t nchunks; +# endif + + /* High-water mark for number of chunks allocated. */ + size_t highchunks; + + /* + * Current number of chunks allocated. This value isn't maintained for + * any other purpose, so keep track of it in order to be able to set + * highchunks. + */ + size_t curchunks; +}; +#endif /* JEMALLOC_STATS */ + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern bool opt_stats_print; + +char *umax2s(uintmax_t x, unsigned base, char *s); +#ifdef JEMALLOC_STATS +void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, + const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); +void malloc_printf(const char *format, ...) + JEMALLOC_ATTR(format(printf, 1, 2)); +#endif +void stats_print(void (*write)(void *, const char *), void *cbopaque, + const char *opts); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_STATS +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +#endif /* JEMALLOC_STATS */ +/******************************************************************************/ diff --git a/dep/include/jemalloc/internal/tcache.h b/dep/include/jemalloc/internal/tcache.h new file mode 100644 index 00000000000..c76597fafab --- /dev/null +++ b/dep/include/jemalloc/internal/tcache.h @@ -0,0 +1,380 @@ +#ifdef JEMALLOC_TCACHE +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct tcache_bin_s tcache_bin_t; +typedef struct tcache_s tcache_t; + +/* + * Absolute maximum number of cache slots for each small bin in the thread + * cache. This is an additional constraint beyond that imposed as: twice the + * number of regions per run for this size class. + * + * This constant must be an even number. + */ +#define TCACHE_NSLOTS_SMALL_MAX 200 + +/* Number of cache slots for large size classes. */ +#define TCACHE_NSLOTS_LARGE 20 + +/* (1U << opt_lg_tcache_maxclass) is used to compute tcache_maxclass. */ +#define LG_TCACHE_MAXCLASS_DEFAULT 15 + +/* + * (1U << opt_lg_tcache_gc_sweep) is the approximate number of allocation + * events between full GC sweeps (-1: disabled). Integer rounding may cause + * the actual number to be slightly higher, since GC is performed + * incrementally. + */ +#define LG_TCACHE_GC_SWEEP_DEFAULT 13 + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct tcache_bin_s { +# ifdef JEMALLOC_STATS + tcache_bin_stats_t tstats; +# endif + unsigned low_water; /* Min # cached since last GC. */ + unsigned high_water; /* Max # cached since last GC. */ + unsigned ncached; /* # of cached objects. */ + unsigned ncached_max; /* Upper limit on ncached. */ + void *avail; /* Chain of available objects. */ +}; + +struct tcache_s { +# ifdef JEMALLOC_STATS + ql_elm(tcache_t) link; /* Used for aggregating stats. */ +# endif +# ifdef JEMALLOC_PROF + uint64_t prof_accumbytes;/* Cleared after arena_prof_accum() */ +# endif + arena_t *arena; /* This thread's arena. */ + unsigned ev_cnt; /* Event count since incremental GC. */ + unsigned next_gc_bin; /* Next bin to GC. */ + tcache_bin_t tbins[1]; /* Dynamically sized. */ +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern bool opt_tcache; +extern ssize_t opt_lg_tcache_maxclass; +extern ssize_t opt_lg_tcache_gc_sweep; + +/* Map of thread-specific caches. */ +extern __thread tcache_t *tcache_tls + JEMALLOC_ATTR(tls_model("initial-exec")); + +/* + * Number of tcache bins. There are nbins small-object bins, plus 0 or more + * large-object bins. + */ +extern size_t nhbins; + +/* Maximum cached size class. */ +extern size_t tcache_maxclass; + +/* Number of tcache allocation/deallocation events between incremental GCs. */ +extern unsigned tcache_gc_incr; + +void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) + , tcache_t *tcache +#endif + ); +void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) + , tcache_t *tcache +#endif + ); +tcache_t *tcache_create(arena_t *arena); +void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, + size_t binind); +void tcache_destroy(tcache_t *tcache); +#ifdef JEMALLOC_STATS +void tcache_stats_merge(tcache_t *tcache, arena_t *arena); +#endif +void tcache_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void tcache_event(tcache_t *tcache); +tcache_t *tcache_get(void); +void *tcache_alloc_easy(tcache_bin_t *tbin); +void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero); +void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero); +void tcache_dalloc_small(tcache_t *tcache, void *ptr); +void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) +JEMALLOC_INLINE tcache_t * +tcache_get(void) +{ + tcache_t *tcache; + + if ((isthreaded & opt_tcache) == false) + return (NULL); + + tcache = tcache_tls; + if ((uintptr_t)tcache <= (uintptr_t)1) { + if (tcache == NULL) { + tcache = tcache_create(choose_arena()); + if (tcache == NULL) + return (NULL); + } else + return (NULL); + } + + return (tcache); +} + +JEMALLOC_INLINE void +tcache_event(tcache_t *tcache) +{ + + if (tcache_gc_incr == 0) + return; + + tcache->ev_cnt++; + assert(tcache->ev_cnt <= tcache_gc_incr); + if (tcache->ev_cnt == tcache_gc_incr) { + size_t binind = tcache->next_gc_bin; + tcache_bin_t *tbin = &tcache->tbins[binind]; + + if (tbin->low_water > 0) { + /* + * Flush (ceiling) 3/4 of the objects below the low + * water mark. + */ + if (binind < nbins) { + tcache_bin_flush_small(tbin, binind, + tbin->ncached - tbin->low_water + + (tbin->low_water >> 2) +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) + , tcache +#endif + ); + } else { + tcache_bin_flush_large(tbin, binind, + tbin->ncached - tbin->low_water + + (tbin->low_water >> 2) +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) + , tcache +#endif + ); + } + } + tbin->low_water = tbin->ncached; + tbin->high_water = tbin->ncached; + + tcache->next_gc_bin++; + if (tcache->next_gc_bin == nhbins) + tcache->next_gc_bin = 0; + tcache->ev_cnt = 0; + } +} + +JEMALLOC_INLINE void * +tcache_alloc_easy(tcache_bin_t *tbin) +{ + void *ret; + + if (tbin->ncached == 0) + return (NULL); + tbin->ncached--; + if (tbin->ncached < tbin->low_water) + tbin->low_water = tbin->ncached; + ret = tbin->avail; + tbin->avail = *(void **)ret; + return (ret); +} + +JEMALLOC_INLINE void * +tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) +{ + void *ret; + size_t binind; + tcache_bin_t *tbin; + + binind = small_size2bin[size]; + assert(binind < nbins); + tbin = &tcache->tbins[binind]; + ret = tcache_alloc_easy(tbin); + if (ret == NULL) { + ret = tcache_alloc_small_hard(tcache, tbin, binind); + if (ret == NULL) + return (NULL); + } + assert(arena_salloc(ret) == tcache->arena->bins[binind].reg_size); + + if (zero == false) { +#ifdef JEMALLOC_FILL + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); +#endif + } else + memset(ret, 0, size); + +#ifdef JEMALLOC_STATS + tbin->tstats.nrequests++; +#endif +#ifdef JEMALLOC_PROF + tcache->prof_accumbytes += tcache->arena->bins[binind].reg_size; +#endif + tcache_event(tcache); + return (ret); +} + +JEMALLOC_INLINE void * +tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) +{ + void *ret; + size_t binind; + tcache_bin_t *tbin; + + size = PAGE_CEILING(size); + assert(size <= tcache_maxclass); + binind = nbins + (size >> PAGE_SHIFT) - 1; + assert(binind < nhbins); + tbin = &tcache->tbins[binind]; + ret = tcache_alloc_easy(tbin); + if (ret == NULL) { + /* + * Only allocate one large object at a time, because it's quite + * expensive to create one and not use it. + */ + ret = arena_malloc_large(tcache->arena, size, zero); + if (ret == NULL) + return (NULL); + } else { +#ifdef JEMALLOC_PROF + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); + size_t pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) + >> PAGE_SHIFT); + chunk->map[pageind].bits |= CHUNK_MAP_CLASS_MASK; +#endif + if (zero == false) { +#ifdef JEMALLOC_FILL + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); +#endif + } else + memset(ret, 0, size); + +#ifdef JEMALLOC_STATS + tbin->tstats.nrequests++; +#endif +#ifdef JEMALLOC_PROF + tcache->prof_accumbytes += size; +#endif + } + + tcache_event(tcache); + return (ret); +} + +JEMALLOC_INLINE void +tcache_dalloc_small(tcache_t *tcache, void *ptr) +{ + arena_t *arena; + arena_chunk_t *chunk; + arena_run_t *run; + arena_bin_t *bin; + tcache_bin_t *tbin; + size_t pageind, binind; + arena_chunk_map_t *mapelm; + + assert(arena_salloc(ptr) <= small_maxclass); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena = chunk->arena; + pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); + mapelm = &chunk->map[pageind]; + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); + assert(run->magic == ARENA_RUN_MAGIC); + bin = run->bin; + binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / + sizeof(arena_bin_t); + assert(binind < nbins); + +#ifdef JEMALLOC_FILL + if (opt_junk) + memset(ptr, 0x5a, bin->reg_size); +#endif + + tbin = &tcache->tbins[binind]; + if (tbin->ncached == tbin->ncached_max) { + tcache_bin_flush_small(tbin, binind, (tbin->ncached_max >> 1) +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) + , tcache +#endif + ); + } + assert(tbin->ncached < tbin->ncached_max); + *(void **)ptr = tbin->avail; + tbin->avail = ptr; + tbin->ncached++; + if (tbin->ncached > tbin->high_water) + tbin->high_water = tbin->ncached; + + tcache_event(tcache); +} + +JEMALLOC_INLINE void +tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) +{ + arena_t *arena; + arena_chunk_t *chunk; + size_t pageind, binind; + tcache_bin_t *tbin; + arena_chunk_map_t *mapelm; + + assert((size & PAGE_MASK) == 0); + assert(arena_salloc(ptr) > small_maxclass); + assert(arena_salloc(ptr) <= tcache_maxclass); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena = chunk->arena; + pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); + mapelm = &chunk->map[pageind]; + binind = nbins + (size >> PAGE_SHIFT) - 1; + +#ifdef JEMALLOC_FILL + if (opt_junk) + memset(ptr, 0x5a, bin->reg_size); +#endif + + tbin = &tcache->tbins[binind]; + if (tbin->ncached == tbin->ncached_max) { + tcache_bin_flush_large(tbin, binind, (tbin->ncached_max >> 1) +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) + , tcache +#endif + ); + } + assert(tbin->ncached < tbin->ncached_max); + *(void **)ptr = tbin->avail; + tbin->avail = ptr; + tbin->ncached++; + if (tbin->ncached > tbin->high_water) + tbin->high_water = tbin->ncached; + + tcache_event(tcache); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ +#endif /* JEMALLOC_TCACHE */ diff --git a/dep/include/jemalloc/jemalloc.h b/dep/include/jemalloc/jemalloc.h new file mode 100644 index 00000000000..d9bafbfff55 --- /dev/null +++ b/dep/include/jemalloc/jemalloc.h @@ -0,0 +1,42 @@ +#ifndef JEMALLOC_H_ +#define JEMALLOC_H_ +#ifdef __cplusplus +extern "C" { +#endif + +#define JEMALLOC_VERSION "1.0.0-0-g5523399" +#define JEMALLOC_VERSION_MAJOR 1 +#define JEMALLOC_VERSION_MINOR 0 +#define JEMALLOC_VERSION_BUGFIX 0 +#define JEMALLOC_VERSION_NREV 0 +#define JEMALLOC_VERSION_GID "5523399" + +#include "jemalloc_defs.h" +#ifndef JEMALLOC_P +# define JEMALLOC_P(s) s +#endif + +extern const char *JEMALLOC_P(malloc_options); +extern void (*JEMALLOC_P(malloc_message))(void *, const char *); + +void *JEMALLOC_P(malloc)(size_t size) JEMALLOC_ATTR(malloc); +void *JEMALLOC_P(calloc)(size_t num, size_t size) JEMALLOC_ATTR(malloc); +int JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) + JEMALLOC_ATTR(nonnull(1)); +void *JEMALLOC_P(realloc)(void *ptr, size_t size); +void JEMALLOC_P(free)(void *ptr); + +size_t JEMALLOC_P(malloc_usable_size)(const void *ptr); +void JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *), + void *cbopaque, const char *opts); +int JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, + void *newp, size_t newlen); +int JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, + size_t *miblenp); +int JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen); + +#ifdef __cplusplus +}; +#endif +#endif /* JEMALLOC_H_ */ diff --git a/dep/include/jemalloc/jemalloc.h.in b/dep/include/jemalloc/jemalloc.h.in new file mode 100644 index 00000000000..8ef8183686e --- /dev/null +++ b/dep/include/jemalloc/jemalloc.h.in @@ -0,0 +1,42 @@ +#ifndef JEMALLOC_H_ +#define JEMALLOC_H_ +#ifdef __cplusplus +extern "C" { +#endif + +#define JEMALLOC_VERSION "@jemalloc_version@" +#define JEMALLOC_VERSION_MAJOR @jemalloc_version_major@ +#define JEMALLOC_VERSION_MINOR @jemalloc_version_minor@ +#define JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@ +#define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@ +#define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" + +#include "jemalloc_defs@install_suffix@.h" +#ifndef JEMALLOC_P +# define JEMALLOC_P(s) s +#endif + +extern const char *JEMALLOC_P(malloc_options); +extern void (*JEMALLOC_P(malloc_message))(void *, const char *); + +void *JEMALLOC_P(malloc)(size_t size) JEMALLOC_ATTR(malloc); +void *JEMALLOC_P(calloc)(size_t num, size_t size) JEMALLOC_ATTR(malloc); +int JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) + JEMALLOC_ATTR(nonnull(1)); +void *JEMALLOC_P(realloc)(void *ptr, size_t size); +void JEMALLOC_P(free)(void *ptr); + +size_t JEMALLOC_P(malloc_usable_size)(const void *ptr); +void JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *), + void *cbopaque, const char *opts); +int JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, + void *newp, size_t newlen); +int JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, + size_t *miblenp); +int JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen); + +#ifdef __cplusplus +}; +#endif +#endif /* JEMALLOC_H_ */ diff --git a/dep/include/jemalloc/jemalloc_defs.h b/dep/include/jemalloc/jemalloc_defs.h new file mode 100644 index 00000000000..e8acaed3abd --- /dev/null +++ b/dep/include/jemalloc/jemalloc_defs.h @@ -0,0 +1,102 @@ +/* include/jemalloc/jemalloc_defs.h. Generated from jemalloc_defs.h.in by configure. */ +#ifndef JEMALLOC_DEFS_H_ +#define JEMALLOC_DEFS_H_ + +/* + * If JEMALLOC_PREFIX is defined, it will cause all public APIs to be prefixed. + * This makes it possible, with some care, to use multiple allocators + * simultaneously. + * + * In many cases it is more convenient to manually prefix allocator function + * calls than to let macros do it automatically, particularly when using + * multiple allocators simultaneously. Define JEMALLOC_MANGLE before + * #include'ing jemalloc.h in order to cause name mangling that corresponds to + * the API prefixing. + */ +/* #undef JEMALLOC_PREFIX */ +#if (defined(JEMALLOC_PREFIX) && defined(JEMALLOC_MANGLE)) +/* #undef JEMALLOC_P */ +#endif + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT __asm__ volatile("pause") + +/* Defined if __attribute__((...)) syntax is supported. */ +#define JEMALLOC_HAVE_ATTR +#ifdef JEMALLOC_HAVE_ATTR +# define JEMALLOC_ATTR(s) __attribute__((s)) +#else +# define JEMALLOC_ATTR(s) +#endif + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +/* #undef JEMALLOC_STATS */ + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* + * JEMALLOC_TINY enables support for tiny objects, which are smaller than one + * quantum. + */ +#define JEMALLOC_TINY + +/* + * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects. + * This makes it possible to allocate/deallocate objects without any locking + * when the cache is in the steady state. + */ +#define JEMALLOC_TCACHE + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage + * segment (DSS). + */ +/* #undef JEMALLOC_DSS */ + +/* JEMALLOC_SWAP enables mmap()ed swap file support. */ +/* #undef JEMALLOC_SWAP */ + +/* Support memory filling (junk/zero). */ +/* #undef JEMALLOC_FILL */ + +/* Support optional abort() on OOM. */ +/* #undef JEMALLOC_XMALLOC */ + +/* Support SYSV semantics. */ +/* #undef JEMALLOC_SYSV */ + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +#define JEMALLOC_LAZY_LOCK + +/* Determine page size at run time if defined. */ +/* #undef DYNAMIC_PAGE_SHIFT */ + +/* One page is 2^STATIC_PAGE_SHIFT bytes. */ +#define STATIC_PAGE_SHIFT 12 + +/* TLS is used to map arenas and magazine caches to threads. */ +/* #undef NO_TLS */ + +/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ +#define LG_SIZEOF_PTR 3 + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +#endif /* JEMALLOC_DEFS_H_ */ diff --git a/dep/include/jemalloc/jemalloc_defs.h.in b/dep/include/jemalloc/jemalloc_defs.h.in new file mode 100644 index 00000000000..8b98d670acc --- /dev/null +++ b/dep/include/jemalloc/jemalloc_defs.h.in @@ -0,0 +1,101 @@ +#ifndef JEMALLOC_DEFS_H_ +#define JEMALLOC_DEFS_H_ + +/* + * If JEMALLOC_PREFIX is defined, it will cause all public APIs to be prefixed. + * This makes it possible, with some care, to use multiple allocators + * simultaneously. + * + * In many cases it is more convenient to manually prefix allocator function + * calls than to let macros do it automatically, particularly when using + * multiple allocators simultaneously. Define JEMALLOC_MANGLE before + * #include'ing jemalloc.h in order to cause name mangling that corresponds to + * the API prefixing. + */ +#undef JEMALLOC_PREFIX +#if (defined(JEMALLOC_PREFIX) && defined(JEMALLOC_MANGLE)) +#undef JEMALLOC_P +#endif + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#undef CPU_SPINWAIT + +/* Defined if __attribute__((...)) syntax is supported. */ +#undef JEMALLOC_HAVE_ATTR +#ifdef JEMALLOC_HAVE_ATTR +# define JEMALLOC_ATTR(s) __attribute__((s)) +#else +# define JEMALLOC_ATTR(s) +#endif + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +#undef JEMALLOC_DEBUG + +/* JEMALLOC_STATS enables statistics calculation. */ +#undef JEMALLOC_STATS + +/* JEMALLOC_PROF enables allocation profiling. */ +#undef JEMALLOC_PROF + +/* Use libunwind for profile backtracing if defined. */ +#undef JEMALLOC_PROF_LIBUNWIND + +/* Use libgcc for profile backtracing if defined. */ +#undef JEMALLOC_PROF_LIBGCC + +/* + * JEMALLOC_TINY enables support for tiny objects, which are smaller than one + * quantum. + */ +#undef JEMALLOC_TINY + +/* + * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects. + * This makes it possible to allocate/deallocate objects without any locking + * when the cache is in the steady state. + */ +#undef JEMALLOC_TCACHE + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage + * segment (DSS). + */ +#undef JEMALLOC_DSS + +/* JEMALLOC_SWAP enables mmap()ed swap file support. */ +#undef JEMALLOC_SWAP + +/* Support memory filling (junk/zero). */ +#undef JEMALLOC_FILL + +/* Support optional abort() on OOM. */ +#undef JEMALLOC_XMALLOC + +/* Support SYSV semantics. */ +#undef JEMALLOC_SYSV + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +#undef JEMALLOC_LAZY_LOCK + +/* Determine page size at run time if defined. */ +#undef DYNAMIC_PAGE_SHIFT + +/* One page is 2^STATIC_PAGE_SHIFT bytes. */ +#undef STATIC_PAGE_SHIFT + +/* TLS is used to map arenas and magazine caches to threads. */ +#undef NO_TLS + +/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ +#undef LG_SIZEOF_PTR + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#undef LG_SIZEOF_INT + +#endif /* JEMALLOC_DEFS_H_ */ |