aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorraczman <none@none>2010-04-20 10:58:41 +0200
committerraczman <none@none>2010-04-20 10:58:41 +0200
commit1532a2f9a2b227e0011a81959403a3b0b05153ba (patch)
tree293e9bea38c5caee2fcef1c8cc1fcf38b505ece9
parentb34e2bdb1c988450d9737d7565f9d357edff4dc7 (diff)
Use jemalloc as memory allocator on linux.
In comparison to standard glibc allocator, jemalloc fargments adress space less, and scales linearly in multithreaded environment. Author: Jason Evans, mad props to him. --HG-- branch : trunk
-rw-r--r--dep/include/jemalloc/internal/arena.h537
-rw-r--r--dep/include/jemalloc/internal/base.h24
-rw-r--r--dep/include/jemalloc/internal/chunk.h61
-rw-r--r--dep/include/jemalloc/internal/chunk_dss.h29
-rw-r--r--dep/include/jemalloc/internal/chunk_mmap.h20
-rw-r--r--dep/include/jemalloc/internal/chunk_swap.h33
-rw-r--r--dep/include/jemalloc/internal/ckh.h95
-rw-r--r--dep/include/jemalloc/internal/ctl.h117
-rw-r--r--dep/include/jemalloc/internal/extent.h49
-rw-r--r--dep/include/jemalloc/internal/hash.h70
-rw-r--r--dep/include/jemalloc/internal/huge.h38
-rw-r--r--dep/include/jemalloc/internal/jemalloc_internal.h561
-rw-r--r--dep/include/jemalloc/internal/jemalloc_internal.h.in561
-rw-r--r--dep/include/jemalloc/internal/mb.h108
-rw-r--r--dep/include/jemalloc/internal/mutex.h61
-rw-r--r--dep/include/jemalloc/internal/prn.h60
-rw-r--r--dep/include/jemalloc/internal/prof.h171
-rw-r--r--dep/include/jemalloc/internal/ql.h83
-rw-r--r--dep/include/jemalloc/internal/qr.h67
-rw-r--r--dep/include/jemalloc/internal/rb.h973
-rw-r--r--dep/include/jemalloc/internal/stats.h174
-rw-r--r--dep/include/jemalloc/internal/tcache.h380
-rw-r--r--dep/include/jemalloc/jemalloc.h42
-rw-r--r--dep/include/jemalloc/jemalloc.h.in42
-rw-r--r--dep/include/jemalloc/jemalloc_defs.h102
-rw-r--r--dep/include/jemalloc/jemalloc_defs.h.in101
-rw-r--r--dep/src/CMakeLists.txt1
-rw-r--r--dep/src/jmalloc/CMakeLists.txt27
-rw-r--r--dep/src/jmalloc/arena.c2446
-rw-r--r--dep/src/jmalloc/base.c106
-rw-r--r--dep/src/jmalloc/chunk.c150
-rw-r--r--dep/src/jmalloc/chunk_dss.c268
-rw-r--r--dep/src/jmalloc/chunk_mmap.c201
-rw-r--r--dep/src/jmalloc/chunk_swap.c383
-rw-r--r--dep/src/jmalloc/ckh.c601
-rw-r--r--dep/src/jmalloc/ctl.c1482
-rw-r--r--dep/src/jmalloc/extent.c41
-rw-r--r--dep/src/jmalloc/hash.c2
-rw-r--r--dep/src/jmalloc/huge.c298
-rw-r--r--dep/src/jmalloc/jemalloc.c1349
-rw-r--r--dep/src/jmalloc/mb.c2
-rw-r--r--dep/src/jmalloc/mutex.c70
-rw-r--r--dep/src/jmalloc/prof.c1328
-rw-r--r--dep/src/jmalloc/stats.c717
-rw-r--r--dep/src/jmalloc/tcache.c403
-rw-r--r--src/trinitycore/CMakeLists.txt1
46 files changed, 14435 insertions, 0 deletions
diff --git a/dep/include/jemalloc/internal/arena.h b/dep/include/jemalloc/internal/arena.h
new file mode 100644
index 00000000000..bb4ce2a54f7
--- /dev/null
+++ b/dep/include/jemalloc/internal/arena.h
@@ -0,0 +1,537 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/*
+ * Subpages are an artificially designated partitioning of pages. Their only
+ * purpose is to support subpage-spaced size classes.
+ *
+ * There must be at least 4 subpages per page, due to the way size classes are
+ * handled.
+ */
+#define LG_SUBPAGE 8
+#define SUBPAGE ((size_t)(1U << LG_SUBPAGE))
+#define SUBPAGE_MASK (SUBPAGE - 1)
+
+/* Return the smallest subpage multiple that is >= s. */
+#define SUBPAGE_CEILING(s) \
+ (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)
+
+#ifdef JEMALLOC_TINY
+ /* Smallest size class to support. */
+# define LG_TINY_MIN LG_SIZEOF_PTR
+#endif
+
+/*
+ * Maximum size class that is a multiple of the quantum, but not (necessarily)
+ * a power of 2. Above this size, allocations are rounded up to the nearest
+ * power of 2.
+ */
+#define LG_QSPACE_MAX_DEFAULT 7
+
+/*
+ * Maximum size class that is a multiple of the cacheline, but not (necessarily)
+ * a power of 2. Above this size, allocations are rounded up to the nearest
+ * power of 2.
+ */
+#define LG_CSPACE_MAX_DEFAULT 9
+
+/*
+ * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized
+ * as small as possible such that this setting is still honored, without
+ * violating other constraints. The goal is to make runs as small as possible
+ * without exceeding a per run external fragmentation threshold.
+ *
+ * We use binary fixed point math for overhead computations, where the binary
+ * point is implicitly RUN_BFP bits to the left.
+ *
+ * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
+ * honored for some/all object sizes, since there is one bit of header overhead
+ * per object (plus a constant). This constraint is relaxed (ignored) for runs
+ * that are so small that the per-region overhead is greater than:
+ *
+ * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
+ */
+#define RUN_BFP 12
+/* \/ Implicit binary fixed point. */
+#define RUN_MAX_OVRHD 0x0000003dU
+#define RUN_MAX_OVRHD_RELAX 0x00001800U
+
+/*
+ * The minimum ratio of active:dirty pages per arena is computed as:
+ *
+ * (nactive >> opt_lg_dirty_mult) >= ndirty
+ *
+ * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32
+ * times as many active pages as dirty pages.
+ */
+#define LG_DIRTY_MULT_DEFAULT 5
+
+typedef struct arena_chunk_map_s arena_chunk_map_t;
+typedef struct arena_chunk_s arena_chunk_t;
+typedef struct arena_run_s arena_run_t;
+typedef struct arena_bin_s arena_bin_t;
+typedef struct arena_s arena_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+/* Each element of the chunk map corresponds to one page within the chunk. */
+struct arena_chunk_map_s {
+ union {
+ /*
+ * Linkage for run trees. There are two disjoint uses:
+ *
+ * 1) arena_t's runs_avail_{clean,dirty} trees.
+ * 2) arena_run_t conceptually uses this linkage for in-use
+ * non-full runs, rather than directly embedding linkage.
+ */
+ rb_node(arena_chunk_map_t) rb_link;
+ /*
+ * List of runs currently in purgatory. arena_chunk_purge()
+ * temporarily allocates runs that contain dirty pages while
+ * purging, so that other threads cannot use the runs while the
+ * purging thread is operating without the arena lock held.
+ */
+ ql_elm(arena_chunk_map_t) ql_link;
+ } u;
+
+#ifdef JEMALLOC_PROF
+ /* Profile counters, used for large object runs. */
+ prof_thr_cnt_t *prof_cnt;
+#endif
+
+ /*
+ * Run address (or size) and various flags are stored together. The bit
+ * layout looks like (assuming 32-bit system):
+ *
+ * ???????? ???????? ????---- ----dzla
+ *
+ * ? : Unallocated: Run address for first/last pages, unset for internal
+ * pages.
+ * Small: Run page offset.
+ * Large: Run size for first page, unset for trailing pages.
+ * - : Unused.
+ * d : dirty?
+ * z : zeroed?
+ * l : large?
+ * a : allocated?
+ *
+ * Following are example bit patterns for the three types of runs.
+ *
+ * p : run page offset
+ * s : run size
+ * c : size class (used only if prof_promote is true)
+ * x : don't care
+ * - : 0
+ * + : 1
+ * [DZLA] : bit set
+ * [dzla] : bit unset
+ *
+ * Unallocated (clean):
+ * ssssssss ssssssss ssss---- ----dz--
+ * xxxxxxxx xxxxxxxx xxxx---- -----Zxx
+ * ssssssss ssssssss ssss---- ----dZ--
+ *
+ * Unallocated (dirty):
+ * ssssssss ssssssss ssss---- ----D---
+ * xxxxxxxx xxxxxxxx xxxx---- ----xxxx
+ * ssssssss ssssssss ssss---- ----D---
+ *
+ * Small:
+ * pppppppp pppppppp pppp---- ----d--a
+ * pppppppp pppppppp pppp---- -------a
+ * pppppppp pppppppp pppp---- ----d--a
+ *
+ * Large:
+ * ssssssss ssssssss ssss++++ ++++D-la
+ * xxxxxxxx xxxxxxxx xxxx---- ----xxxx
+ * -------- -------- -------- ----D-la
+ *
+ * Large (sampled, size <= PAGE_SIZE):
+ * ssssssss ssssssss sssscccc ccccD-la
+ *
+ * Large (not sampled, size == PAGE_SIZE):
+ * ssssssss ssssssss ssss++++ ++++D-la
+ */
+ size_t bits;
+#ifdef JEMALLOC_PROF
+#define CHUNK_MAP_CLASS_SHIFT 4
+#define CHUNK_MAP_CLASS_MASK ((size_t)0xff0U)
+#endif
+#define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU)
+#define CHUNK_MAP_DIRTY ((size_t)0x8U)
+#define CHUNK_MAP_ZEROED ((size_t)0x4U)
+#define CHUNK_MAP_LARGE ((size_t)0x2U)
+#define CHUNK_MAP_ALLOCATED ((size_t)0x1U)
+#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED
+};
+typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
+typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
+
+/* Arena chunk header. */
+struct arena_chunk_s {
+ /* Arena that owns the chunk. */
+ arena_t *arena;
+
+ /* Linkage for the arena's chunks_dirty list. */
+ ql_elm(arena_chunk_t) link_dirty;
+
+ /*
+ * True if the chunk is currently in the chunks_dirty list, due to
+ * having at some point contained one or more dirty pages. Removal
+ * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible.
+ */
+ bool dirtied;
+
+ /* Number of dirty pages. */
+ size_t ndirty;
+
+ /* Map of pages within chunk that keeps track of free/large/small. */
+ arena_chunk_map_t map[1]; /* Dynamically sized. */
+};
+typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
+
+struct arena_run_s {
+#ifdef JEMALLOC_DEBUG
+ uint32_t magic;
+# define ARENA_RUN_MAGIC 0x384adf93
+#endif
+
+ /* Bin this run is associated with. */
+ arena_bin_t *bin;
+
+ /* Stack of available freed regions, or NULL. */
+ void *avail;
+
+ /* Next region that has never been allocated, or run boundary. */
+ void *next;
+
+ /* Number of free regions in run. */
+ unsigned nfree;
+};
+
+struct arena_bin_s {
+ /*
+ * All operations on runcur, runs, and stats require that lock be
+ * locked. Run allocation/deallocation are protected by the arena lock,
+ * which may be acquired while holding one or more bin locks, but not
+ * vise versa.
+ */
+ malloc_mutex_t lock;
+
+ /*
+ * Current run being used to service allocations of this bin's size
+ * class.
+ */
+ arena_run_t *runcur;
+
+ /*
+ * Tree of non-full runs. This tree is used when looking for an
+ * existing run when runcur is no longer usable. We choose the
+ * non-full run that is lowest in memory; this policy tends to keep
+ * objects packed well, and it can also help reduce the number of
+ * almost-empty chunks.
+ */
+ arena_run_tree_t runs;
+
+ /* Size of regions in a run for this bin's size class. */
+ size_t reg_size;
+
+ /* Total size of a run for this bin's size class. */
+ size_t run_size;
+
+ /* Total number of regions in a run for this bin's size class. */
+ uint32_t nregs;
+
+#ifdef JEMALLOC_PROF
+ /*
+ * Offset of first (prof_cnt_t *) in a run header for this bin's size
+ * class, or 0 if (opt_prof == false).
+ */
+ uint32_t cnt0_offset;
+#endif
+
+ /* Offset of first region in a run for this bin's size class. */
+ uint32_t reg0_offset;
+
+#ifdef JEMALLOC_STATS
+ /* Bin statistics. */
+ malloc_bin_stats_t stats;
+#endif
+};
+
+struct arena_s {
+#ifdef JEMALLOC_DEBUG
+ uint32_t magic;
+# define ARENA_MAGIC 0x947d3d24
+#endif
+
+ /* This arena's index within the arenas array. */
+ unsigned ind;
+
+ /*
+ * All non-bin-related operations on this arena require that lock be
+ * locked.
+ */
+ malloc_mutex_t lock;
+
+#ifdef JEMALLOC_STATS
+ arena_stats_t stats;
+# ifdef JEMALLOC_TCACHE
+ /*
+ * List of tcaches for extant threads associated with this arena.
+ * Stats from these are merged incrementally, and at exit.
+ */
+ ql_head(tcache_t) tcache_ql;
+# endif
+#endif
+
+#ifdef JEMALLOC_PROF
+ uint64_t prof_accumbytes;
+#endif
+
+ /* List of dirty-page-containing chunks this arena manages. */
+ ql_head(arena_chunk_t) chunks_dirty;
+
+ /*
+ * In order to avoid rapid chunk allocation/deallocation when an arena
+ * oscillates right on the cusp of needing a new chunk, cache the most
+ * recently freed chunk. The spare is left in the arena's chunk trees
+ * until it is deleted.
+ *
+ * There is one spare chunk per arena, rather than one spare total, in
+ * order to avoid interactions between multiple threads that could make
+ * a single spare inadequate.
+ */
+ arena_chunk_t *spare;
+
+ /* Number of pages in active runs. */
+ size_t nactive;
+
+ /*
+ * Current count of pages within unused runs that are potentially
+ * dirty, and for which madvise(... MADV_DONTNEED) has not been called.
+ * By tracking this, we can institute a limit on how much dirty unused
+ * memory is mapped for each arena.
+ */
+ size_t ndirty;
+
+ /*
+ * Approximate number of pages being purged. It is possible for
+ * multiple threads to purge dirty pages concurrently, and they use
+ * npurgatory to indicate the total number of pages all threads are
+ * attempting to purge.
+ */
+ size_t npurgatory;
+
+ /*
+ * Size/address-ordered trees of this arena's available runs. The trees
+ * are used for first-best-fit run allocation. The dirty tree contains
+ * runs with dirty pages (i.e. very likely to have been touched and
+ * therefore have associated physical pages), whereas the clean tree
+ * contains runs with pages that either have no associated physical
+ * pages, or have pages that the kernel may recycle at any time due to
+ * previous madvise(2) calls. The dirty tree is used in preference to
+ * the clean tree for allocations, because using dirty pages reduces
+ * the amount of dirty purging necessary to keep the active:dirty page
+ * ratio below the purge threshold.
+ */
+ arena_avail_tree_t runs_avail_clean;
+ arena_avail_tree_t runs_avail_dirty;
+
+ /*
+ * bins is used to store trees of free regions of the following sizes,
+ * assuming a 16-byte quantum, 4 KiB page size, and default
+ * JEMALLOC_OPTIONS.
+ *
+ * bins[i] | size |
+ * --------+--------+
+ * 0 | 2 |
+ * 1 | 4 |
+ * 2 | 8 |
+ * --------+--------+
+ * 3 | 16 |
+ * 4 | 32 |
+ * 5 | 48 |
+ * : :
+ * 8 | 96 |
+ * 9 | 112 |
+ * 10 | 128 |
+ * --------+--------+
+ * 11 | 192 |
+ * 12 | 256 |
+ * 13 | 320 |
+ * 14 | 384 |
+ * 15 | 448 |
+ * 16 | 512 |
+ * --------+--------+
+ * 17 | 768 |
+ * 18 | 1024 |
+ * 19 | 1280 |
+ * : :
+ * 27 | 3328 |
+ * 28 | 3584 |
+ * 29 | 3840 |
+ * --------+--------+
+ * 30 | 4 KiB |
+ * 31 | 6 KiB |
+ * 33 | 8 KiB |
+ * : :
+ * 43 | 28 KiB |
+ * 44 | 30 KiB |
+ * 45 | 32 KiB |
+ * --------+--------+
+ */
+ arena_bin_t bins[1]; /* Dynamically sized. */
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern size_t opt_lg_qspace_max;
+extern size_t opt_lg_cspace_max;
+extern ssize_t opt_lg_dirty_mult;
+extern uint8_t const *small_size2bin;
+
+/* Various bin-related settings. */
+#ifdef JEMALLOC_TINY /* Number of (2^n)-spaced tiny bins. */
+# define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN))
+#else
+# define ntbins 0
+#endif
+extern unsigned nqbins; /* Number of quantum-spaced bins. */
+extern unsigned ncbins; /* Number of cacheline-spaced bins. */
+extern unsigned nsbins; /* Number of subpage-spaced bins. */
+extern unsigned nbins;
+#ifdef JEMALLOC_TINY
+# define tspace_max ((size_t)(QUANTUM >> 1))
+#endif
+#define qspace_min QUANTUM
+extern size_t qspace_max;
+extern size_t cspace_min;
+extern size_t cspace_max;
+extern size_t sspace_min;
+extern size_t sspace_max;
+#define small_maxclass sspace_max
+
+#define nlclasses (chunk_npages - arena_chunk_header_npages)
+
+#ifdef JEMALLOC_TCACHE
+void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin,
+ size_t binind
+# ifdef JEMALLOC_PROF
+ , uint64_t prof_accumbytes
+# endif
+ );
+#endif
+#ifdef JEMALLOC_PROF
+void arena_prof_accum(arena_t *arena, uint64_t accumbytes);
+#endif
+void *arena_malloc_small(arena_t *arena, size_t size, bool zero);
+void *arena_malloc_large(arena_t *arena, size_t size, bool zero);
+void *arena_malloc(size_t size, bool zero);
+void *arena_palloc(arena_t *arena, size_t alignment, size_t size,
+ size_t alloc_size);
+size_t arena_salloc(const void *ptr);
+#ifdef JEMALLOC_PROF
+void arena_prof_promoted(const void *ptr, size_t size);
+size_t arena_salloc_demote(const void *ptr);
+prof_thr_cnt_t *arena_prof_cnt_get(const void *ptr);
+void arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt);
+#endif
+void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+ arena_chunk_map_t *mapelm);
+void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr);
+#ifdef JEMALLOC_STATS
+void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty,
+ arena_stats_t *astats, malloc_bin_stats_t *bstats,
+ malloc_large_stats_t *lstats);
+#endif
+void *arena_ralloc(void *ptr, size_t size, size_t oldsize);
+bool arena_new(arena_t *arena, unsigned ind);
+bool arena_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
+JEMALLOC_INLINE void
+arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
+{
+ size_t pageind;
+ arena_chunk_map_t *mapelm;
+
+ assert(arena != NULL);
+ assert(arena->magic == ARENA_MAGIC);
+ assert(chunk->arena == arena);
+ assert(ptr != NULL);
+ assert(CHUNK_ADDR2BASE(ptr) != ptr);
+
+ pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
+ mapelm = &chunk->map[pageind];
+ assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
+ if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
+ /* Small allocation. */
+#ifdef JEMALLOC_TCACHE
+ tcache_t *tcache;
+
+ if ((tcache = tcache_get()) != NULL)
+ tcache_dalloc_small(tcache, ptr);
+ else {
+#endif
+ arena_run_t *run;
+ arena_bin_t *bin;
+
+ run = (arena_run_t *)((uintptr_t)chunk +
+ (uintptr_t)((pageind - (mapelm->bits >>
+ PAGE_SHIFT)) << PAGE_SHIFT));
+ assert(run->magic == ARENA_RUN_MAGIC);
+ assert(((uintptr_t)ptr - ((uintptr_t)run +
+ (uintptr_t)run->bin->reg0_offset)) %
+ run->bin->reg_size == 0);
+ bin = run->bin;
+ malloc_mutex_lock(&bin->lock);
+ arena_dalloc_bin(arena, chunk, ptr, mapelm);
+ malloc_mutex_unlock(&bin->lock);
+#ifdef JEMALLOC_TCACHE
+ }
+#endif
+ } else {
+#ifdef JEMALLOC_TCACHE
+ size_t size = mapelm->bits & ~PAGE_MASK;
+
+ assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+ if (size <= tcache_maxclass) {
+ tcache_t *tcache;
+
+ if ((tcache = tcache_get()) != NULL)
+ tcache_dalloc_large(tcache, ptr, size);
+ else {
+ malloc_mutex_lock(&arena->lock);
+ arena_dalloc_large(arena, chunk, ptr);
+ malloc_mutex_unlock(&arena->lock);
+ }
+ } else {
+ malloc_mutex_lock(&arena->lock);
+ arena_dalloc_large(arena, chunk, ptr);
+ malloc_mutex_unlock(&arena->lock);
+ }
+#else
+ assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+ malloc_mutex_lock(&arena->lock);
+ arena_dalloc_large(arena, chunk, ptr);
+ malloc_mutex_unlock(&arena->lock);
+#endif
+ }
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/dep/include/jemalloc/internal/base.h b/dep/include/jemalloc/internal/base.h
new file mode 100644
index 00000000000..e353f309bd2
--- /dev/null
+++ b/dep/include/jemalloc/internal/base.h
@@ -0,0 +1,24 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern malloc_mutex_t base_mtx;
+
+void *base_alloc(size_t size);
+extent_node_t *base_node_alloc(void);
+void base_node_dealloc(extent_node_t *node);
+bool base_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/dep/include/jemalloc/internal/chunk.h b/dep/include/jemalloc/internal/chunk.h
new file mode 100644
index 00000000000..1f6abf782f1
--- /dev/null
+++ b/dep/include/jemalloc/internal/chunk.h
@@ -0,0 +1,61 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/*
+ * Size and alignment of memory chunks that are allocated by the OS's virtual
+ * memory system.
+ */
+#define LG_CHUNK_DEFAULT 22
+
+/* Return the chunk address for allocation address a. */
+#define CHUNK_ADDR2BASE(a) \
+ ((void *)((uintptr_t)(a) & ~chunksize_mask))
+
+/* Return the chunk offset of address a. */
+#define CHUNK_ADDR2OFFSET(a) \
+ ((size_t)((uintptr_t)(a) & chunksize_mask))
+
+/* Return the smallest chunk multiple that is >= s. */
+#define CHUNK_CEILING(s) \
+ (((s) + chunksize_mask) & ~chunksize_mask)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern size_t opt_lg_chunk;
+#ifdef JEMALLOC_SWAP
+extern bool opt_overcommit;
+#endif
+
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+/* Protects stats_chunks; currently not used for any other purpose. */
+extern malloc_mutex_t chunks_mtx;
+/* Chunk statistics. */
+extern chunk_stats_t stats_chunks;
+#endif
+
+extern size_t chunksize;
+extern size_t chunksize_mask; /* (chunksize - 1). */
+extern size_t chunk_npages;
+extern size_t arena_chunk_header_npages;
+extern size_t arena_maxclass; /* Max size class for arenas. */
+
+void *chunk_alloc(size_t size, bool *zero);
+void chunk_dealloc(void *chunk, size_t size);
+bool chunk_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
+#include "jemalloc/internal/chunk_swap.h"
+#include "jemalloc/internal/chunk_dss.h"
+#include "jemalloc/internal/chunk_mmap.h"
diff --git a/dep/include/jemalloc/internal/chunk_dss.h b/dep/include/jemalloc/internal/chunk_dss.h
new file mode 100644
index 00000000000..6be4ad1f212
--- /dev/null
+++ b/dep/include/jemalloc/internal/chunk_dss.h
@@ -0,0 +1,29 @@
+#ifdef JEMALLOC_DSS
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+/*
+ * Protects sbrk() calls. This avoids malloc races among threads, though it
+ * does not protect against races with threads that call sbrk() directly.
+ */
+extern malloc_mutex_t dss_mtx;
+
+void *chunk_alloc_dss(size_t size, bool *zero);
+bool chunk_dealloc_dss(void *chunk, size_t size);
+bool chunk_dss_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+#endif /* JEMALLOC_DSS */
diff --git a/dep/include/jemalloc/internal/chunk_mmap.h b/dep/include/jemalloc/internal/chunk_mmap.h
new file mode 100644
index 00000000000..8fb90b77c9b
--- /dev/null
+++ b/dep/include/jemalloc/internal/chunk_mmap.h
@@ -0,0 +1,20 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void *chunk_alloc_mmap(size_t size);
+void chunk_dealloc_mmap(void *chunk, size_t size);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/dep/include/jemalloc/internal/chunk_swap.h b/dep/include/jemalloc/internal/chunk_swap.h
new file mode 100644
index 00000000000..d50cb197449
--- /dev/null
+++ b/dep/include/jemalloc/internal/chunk_swap.h
@@ -0,0 +1,33 @@
+#ifdef JEMALLOC_SWAP
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern malloc_mutex_t swap_mtx;
+extern bool swap_enabled;
+extern bool swap_prezeroed;
+extern size_t swap_nfds;
+extern int *swap_fds;
+#ifdef JEMALLOC_STATS
+extern size_t swap_avail;
+#endif
+
+void *chunk_alloc_swap(size_t size, bool *zero);
+bool chunk_dealloc_swap(void *chunk, size_t size);
+bool chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed);
+bool chunk_swap_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+#endif /* JEMALLOC_SWAP */
diff --git a/dep/include/jemalloc/internal/ckh.h b/dep/include/jemalloc/internal/ckh.h
new file mode 100644
index 00000000000..c39ea5c75ef
--- /dev/null
+++ b/dep/include/jemalloc/internal/ckh.h
@@ -0,0 +1,95 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct ckh_s ckh_t;
+typedef struct ckhc_s ckhc_t;
+
+/* Typedefs to allow easy function pointer passing. */
+typedef void ckh_hash_t (const void *, unsigned, size_t *, size_t *);
+typedef bool ckh_keycomp_t (const void *, const void *);
+
+/* Maintain counters used to get an idea of performance. */
+/* #define CKH_COUNT */
+/* Print counter values in ckh_delete() (requires CKH_COUNT). */
+/* #define CKH_VERBOSE */
+
+/*
+ * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket. Try to fit
+ * one bucket per L1 cache line.
+ */
+#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+/* Hash table cell. */
+struct ckhc_s {
+ const void *key;
+ const void *data;
+};
+
+struct ckh_s {
+#ifdef JEMALLOC_DEBUG
+#define CKH_MAGIG 0x3af2489d
+ uint32_t magic;
+#endif
+
+#ifdef CKH_COUNT
+ /* Counters used to get an idea of performance. */
+ uint64_t ngrows;
+ uint64_t nshrinks;
+ uint64_t nshrinkfails;
+ uint64_t ninserts;
+ uint64_t nrelocs;
+#endif
+
+ /* Used for pseudo-random number generation. */
+#define CKH_A 12345
+#define CKH_C 12347
+ uint32_t prn_state;
+
+ /* Total number of items. */
+ size_t count;
+
+ /*
+ * Minimum and current number of hash table buckets. There are
+ * 2^LG_CKH_BUCKET_CELLS cells per bucket.
+ */
+ unsigned lg_minbuckets;
+ unsigned lg_curbuckets;
+
+ /* Hash and comparison functions. */
+ ckh_hash_t *hash;
+ ckh_keycomp_t *keycomp;
+
+ /* Hash table with 2^lg_curbuckets buckets. */
+ ckhc_t *tab;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+bool ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+ ckh_keycomp_t *keycomp);
+void ckh_delete(ckh_t *ckh);
+size_t ckh_count(ckh_t *ckh);
+bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
+bool ckh_insert(ckh_t *ckh, const void *key, const void *data);
+bool ckh_remove(ckh_t *ckh, const void *searchkey, void **key,
+ void **data);
+bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data);
+void ckh_string_hash(const void *key, unsigned minbits, size_t *hash1,
+ size_t *hash2);
+bool ckh_string_keycomp(const void *k1, const void *k2);
+void ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1,
+ size_t *hash2);
+bool ckh_pointer_keycomp(const void *k1, const void *k2);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/dep/include/jemalloc/internal/ctl.h b/dep/include/jemalloc/internal/ctl.h
new file mode 100644
index 00000000000..7bbf21e0e85
--- /dev/null
+++ b/dep/include/jemalloc/internal/ctl.h
@@ -0,0 +1,117 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct ctl_node_s ctl_node_t;
+typedef struct ctl_arena_stats_s ctl_arena_stats_t;
+typedef struct ctl_stats_s ctl_stats_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct ctl_node_s {
+ bool named;
+ union {
+ struct {
+ const char *name;
+ /* If (nchildren == 0), this is a terminal node. */
+ unsigned nchildren;
+ const ctl_node_t *children;
+ } named;
+ struct {
+ const ctl_node_t *(*index)(const size_t *, size_t,
+ size_t);
+ } indexed;
+ } u;
+ int (*ctl)(const size_t *, size_t, void *, size_t *, void *,
+ size_t);
+};
+
+struct ctl_arena_stats_s {
+ bool initialized;
+ size_t pactive;
+ size_t pdirty;
+#ifdef JEMALLOC_STATS
+ arena_stats_t astats;
+
+ /* Aggregate stats for small size classes, based on bin stats. */
+ size_t allocated_small;
+ uint64_t nmalloc_small;
+ uint64_t ndalloc_small;
+ uint64_t nrequests_small;
+
+ malloc_bin_stats_t *bstats; /* nbins elements. */
+ malloc_large_stats_t *lstats; /* nlclasses elements. */
+#endif
+};
+
+struct ctl_stats_s {
+#ifdef JEMALLOC_STATS
+ size_t allocated;
+ size_t active;
+ size_t mapped;
+ struct {
+ size_t current; /* stats_chunks.curchunks */
+ uint64_t total; /* stats_chunks.nchunks */
+ size_t high; /* stats_chunks.highchunks */
+ } chunks;
+ struct {
+ size_t allocated; /* huge_allocated */
+ uint64_t nmalloc; /* huge_nmalloc */
+ uint64_t ndalloc; /* huge_ndalloc */
+ } huge;
+#endif
+ ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */
+#ifdef JEMALLOC_SWAP
+ size_t swap_avail;
+#endif
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+int ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
+ size_t newlen);
+int ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp);
+
+int ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen);
+bool ctl_boot(void);
+
+#define xmallctl(name, oldp, oldlenp, newp, newlen) do { \
+ if (JEMALLOC_P(mallctl)(name, oldp, oldlenp, newp, newlen) \
+ != 0) { \
+ malloc_write("<jemalloc>: Invalid xmallctl(\""); \
+ malloc_write(name); \
+ malloc_write("\", ...) call\n"); \
+ abort(); \
+ } \
+} while (0)
+
+#define xmallctlnametomib(name, mibp, miblenp) do { \
+ if (JEMALLOC_P(mallctlnametomib)(name, mibp, miblenp) != 0) { \
+ malloc_write( \
+ "<jemalloc>: Invalid xmallctlnametomib(\""); \
+ malloc_write(name); \
+ malloc_write("\", ...) call\n"); \
+ abort(); \
+ } \
+} while (0)
+
+#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do { \
+ if (JEMALLOC_P(mallctlbymib)(mib, miblen, oldp, oldlenp, newp, \
+ newlen) != 0) { \
+ malloc_write( \
+ "<jemalloc>: Invalid xmallctlbymib() call\n"); \
+ abort(); \
+ } \
+} while (0)
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
diff --git a/dep/include/jemalloc/internal/extent.h b/dep/include/jemalloc/internal/extent.h
new file mode 100644
index 00000000000..33a4e9a3852
--- /dev/null
+++ b/dep/include/jemalloc/internal/extent.h
@@ -0,0 +1,49 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct extent_node_s extent_node_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+/* Tree of extents. */
+struct extent_node_s {
+#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS))
+ /* Linkage for the size/address-ordered tree. */
+ rb_node(extent_node_t) link_szad;
+#endif
+
+ /* Linkage for the address-ordered tree. */
+ rb_node(extent_node_t) link_ad;
+
+#ifdef JEMALLOC_PROF
+ /* Profile counters, used for huge objects. */
+ prof_thr_cnt_t *prof_cnt;
+#endif
+
+ /* Pointer to the extent that this tree node is responsible for. */
+ void *addr;
+
+ /* Total region size. */
+ size_t size;
+};
+typedef rb_tree(extent_node_t) extent_tree_t;
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS))
+rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t)
+#endif
+
+rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t)
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
diff --git a/dep/include/jemalloc/internal/hash.h b/dep/include/jemalloc/internal/hash.h
new file mode 100644
index 00000000000..d12cdb8359f
--- /dev/null
+++ b/dep/include/jemalloc/internal/hash.h
@@ -0,0 +1,70 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+uint64_t hash(const void *key, size_t len, uint64_t seed);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(HASH_C_))
+/*
+ * The following hash function is based on MurmurHash64A(), placed into the
+ * public domain by Austin Appleby. See http://murmurhash.googlepages.com/ for
+ * details.
+ */
+JEMALLOC_INLINE uint64_t
+hash(const void *key, size_t len, uint64_t seed)
+{
+ const uint64_t m = 0xc6a4a7935bd1e995;
+ const int r = 47;
+ uint64_t h = seed ^ (len * m);
+ const uint64_t *data = (const uint64_t *)key;
+ const uint64_t *end = data + (len/8);
+ const unsigned char *data2;
+
+ assert(((uintptr_t)key & 0x7) == 0);
+
+ while(data != end) {
+ uint64_t k = *data++;
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h ^= k;
+ h *= m;
+ }
+
+ data2 = (const unsigned char *)data;
+ switch(len & 7) {
+ case 7: h ^= ((uint64_t)(data2[6])) << 48;
+ case 6: h ^= ((uint64_t)(data2[5])) << 40;
+ case 5: h ^= ((uint64_t)(data2[4])) << 32;
+ case 4: h ^= ((uint64_t)(data2[3])) << 24;
+ case 3: h ^= ((uint64_t)(data2[2])) << 16;
+ case 2: h ^= ((uint64_t)(data2[1])) << 8;
+ case 1: h ^= ((uint64_t)(data2[0]));
+ h *= m;
+ }
+
+ h ^= h >> r;
+ h *= m;
+ h ^= h >> r;
+
+ return h;
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/dep/include/jemalloc/internal/huge.h b/dep/include/jemalloc/internal/huge.h
new file mode 100644
index 00000000000..3cf32f7506d
--- /dev/null
+++ b/dep/include/jemalloc/internal/huge.h
@@ -0,0 +1,38 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#ifdef JEMALLOC_STATS
+/* Huge allocation statistics. */
+extern uint64_t huge_nmalloc;
+extern uint64_t huge_ndalloc;
+extern size_t huge_allocated;
+#endif
+
+/* Protects chunk-related data structures. */
+extern malloc_mutex_t huge_mtx;
+
+void *huge_malloc(size_t size, bool zero);
+void *huge_palloc(size_t alignment, size_t size);
+void *huge_ralloc(void *ptr, size_t size, size_t oldsize);
+void huge_dalloc(void *ptr);
+size_t huge_salloc(const void *ptr);
+#ifdef JEMALLOC_PROF
+prof_thr_cnt_t *huge_prof_cnt_get(const void *ptr);
+void huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt);
+#endif
+bool huge_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/dep/include/jemalloc/internal/jemalloc_internal.h b/dep/include/jemalloc/internal/jemalloc_internal.h
new file mode 100644
index 00000000000..109510d2962
--- /dev/null
+++ b/dep/include/jemalloc/internal/jemalloc_internal.h
@@ -0,0 +1,561 @@
+#include <sys/mman.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/uio.h>
+
+#include <errno.h>
+#include <limits.h>
+#ifndef SIZE_T_MAX
+# define SIZE_T_MAX SIZE_MAX
+#endif
+#include <pthread.h>
+#include <sched.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <pthread.h>
+
+#define JEMALLOC_MANGLE
+#include "../jemalloc.h"
+
+#ifdef JEMALLOC_LAZY_LOCK
+#include <dlfcn.h>
+#endif
+
+#define RB_COMPACT
+#include "jemalloc/internal/rb.h"
+#include "jemalloc/internal/qr.h"
+#include "jemalloc/internal/ql.h"
+
+extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
+
+/*
+ * Define a custom assert() in order to reduce the chances of deadlock during
+ * assertion failure.
+ */
+#ifdef JEMALLOC_DEBUG
+# define assert(e) do { \
+ if (!(e)) { \
+ char line_buf[UMAX2S_BUFSIZE]; \
+ malloc_write("<jemalloc>: "); \
+ malloc_write(__FILE__); \
+ malloc_write(":"); \
+ malloc_write(umax2s(__LINE__, 10, line_buf)); \
+ malloc_write(": Failed assertion: "); \
+ malloc_write("\""); \
+ malloc_write(#e); \
+ malloc_write("\"\n"); \
+ abort(); \
+ } \
+} while (0)
+#else
+#define assert(e)
+#endif
+
+/*
+ * jemalloc can conceptually be broken into components (arena, tcache, etc.),
+ * but there are circular dependencies that cannot be broken without
+ * substantial performance degradation. In order to reduce the effect on
+ * visual code flow, read the header files in multiple passes, with one of the
+ * following cpp variables defined during each pass:
+ *
+ * JEMALLOC_H_TYPES : Preprocessor-defined constants and psuedo-opaque data
+ * types.
+ * JEMALLOC_H_STRUCTS : Data structures.
+ * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes.
+ * JEMALLOC_H_INLINES : Inline functions.
+ */
+/******************************************************************************/
+#define JEMALLOC_H_TYPES
+
+#define ZU(z) ((size_t)z)
+
+#ifndef __DECONST
+# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var))
+#endif
+
+#ifdef JEMALLOC_DEBUG
+ /* Disable inlining to make debugging easier. */
+# define JEMALLOC_INLINE
+# define inline
+#else
+# define JEMALLOC_ENABLE_INLINE
+# define JEMALLOC_INLINE static inline
+#endif
+
+/* Size of stack-allocated buffer passed to strerror_r(). */
+#define STRERROR_BUF 64
+
+/* Minimum alignment of allocations is 2^LG_QUANTUM bytes. */
+#ifdef __i386__
+# define LG_QUANTUM 4
+#endif
+#ifdef __ia64__
+# define LG_QUANTUM 4
+#endif
+#ifdef __alpha__
+# define LG_QUANTUM 4
+#endif
+#ifdef __sparc64__
+# define LG_QUANTUM 4
+#endif
+#if (defined(__amd64__) || defined(__x86_64__))
+# define LG_QUANTUM 4
+#endif
+#ifdef __arm__
+# define LG_QUANTUM 3
+#endif
+#ifdef __mips__
+# define LG_QUANTUM 3
+#endif
+#ifdef __powerpc__
+# define LG_QUANTUM 4
+#endif
+#ifdef __s390x__
+# define LG_QUANTUM 4
+#endif
+
+#define QUANTUM ((size_t)(1U << LG_QUANTUM))
+#define QUANTUM_MASK (QUANTUM - 1)
+
+/* Return the smallest quantum multiple that is >= a. */
+#define QUANTUM_CEILING(a) \
+ (((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
+
+#define SIZEOF_PTR (1U << LG_SIZEOF_PTR)
+
+/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
+#if (!defined(PIC) && !defined(NO_TLS))
+# define NO_TLS
+#endif
+
+/*
+ * Maximum size of L1 cache line. This is used to avoid cache line aliasing.
+ * In addition, this controls the spacing of cacheline-spaced size classes.
+ */
+#define LG_CACHELINE 6
+#define CACHELINE ((size_t)(1U << LG_CACHELINE))
+#define CACHELINE_MASK (CACHELINE - 1)
+
+/* Return the smallest cacheline multiple that is >= s. */
+#define CACHELINE_CEILING(s) \
+ (((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
+
+/*
+ * Page size. STATIC_PAGE_SHIFT is determined by the configure script. If
+ * DYNAMIC_PAGE_SHIFT is enabled, only use the STATIC_PAGE_* macros where
+ * compile-time values are required for the purposes of defining data
+ * structures.
+ */
+#define STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT))
+#define STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1))
+
+#ifdef DYNAMIC_PAGE_SHIFT
+# define PAGE_SHIFT lg_pagesize
+# define PAGE_SIZE pagesize
+# define PAGE_MASK pagesize_mask
+#else
+# define PAGE_SHIFT STATIC_PAGE_SHIFT
+# define PAGE_SIZE STATIC_PAGE_SIZE
+# define PAGE_MASK STATIC_PAGE_MASK
+#endif
+
+/* Return the smallest pagesize multiple that is >= s. */
+#define PAGE_CEILING(s) \
+ (((s) + PAGE_MASK) & ~PAGE_MASK)
+
+#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mb.h"
+#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/arena.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/chunk.h"
+#include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/tcache.h"
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/prof.h"
+
+#undef JEMALLOC_H_TYPES
+/******************************************************************************/
+#define JEMALLOC_H_STRUCTS
+
+#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mb.h"
+#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/arena.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/chunk.h"
+#include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/tcache.h"
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/prof.h"
+
+#undef JEMALLOC_H_STRUCTS
+/******************************************************************************/
+#define JEMALLOC_H_EXTERNS
+
+extern bool opt_abort;
+#ifdef JEMALLOC_FILL
+extern bool opt_junk;
+#endif
+#ifdef JEMALLOC_SYSV
+extern bool opt_sysv;
+#endif
+#ifdef JEMALLOC_XMALLOC
+extern bool opt_xmalloc;
+#endif
+#ifdef JEMALLOC_FILL
+extern bool opt_zero;
+#endif
+
+#ifdef DYNAMIC_PAGE_SHIFT
+extern size_t pagesize;
+extern size_t pagesize_mask;
+extern size_t lg_pagesize;
+#endif
+
+/* Number of CPUs. */
+extern unsigned ncpus;
+
+extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */
+#ifndef NO_TLS
+/*
+ * Map of pthread_self() --> arenas[???], used for selecting an arena to use
+ * for allocations.
+ */
+extern __thread arena_t *arenas_map JEMALLOC_ATTR(tls_model("initial-exec"));
+#endif
+/*
+ * Arenas that are used to service external requests. Not all elements of the
+ * arenas array are necessarily used; arenas are created lazily as needed.
+ */
+extern arena_t **arenas;
+extern unsigned narenas;
+
+arena_t *arenas_extend(unsigned ind);
+#ifndef NO_TLS
+arena_t *choose_arena_hard(void);
+#endif
+
+#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mb.h"
+#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/arena.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/chunk.h"
+#include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/tcache.h"
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/prof.h"
+
+#undef JEMALLOC_H_EXTERNS
+/******************************************************************************/
+#define JEMALLOC_H_INLINES
+
+#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mb.h"
+#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/chunk.h"
+#include "jemalloc/internal/huge.h"
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void malloc_write(const char *s);
+arena_t *choose_arena(void);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+/*
+ * Wrapper around malloc_message() that avoids the need for
+ * JEMALLOC_P(malloc_message)(...) throughout the code.
+ */
+JEMALLOC_INLINE void
+malloc_write(const char *s)
+{
+
+ JEMALLOC_P(malloc_message)(NULL, s);
+}
+
+/*
+ * Choose an arena based on a per-thread value (fast-path code, calls slow-path
+ * code if necessary).
+ */
+JEMALLOC_INLINE arena_t *
+choose_arena(void)
+{
+ arena_t *ret;
+
+ /*
+ * We can only use TLS if this is a PIC library, since for the static
+ * library version, libc's malloc is used by TLS allocation, which
+ * introduces a bootstrapping issue.
+ */
+#ifndef NO_TLS
+ ret = arenas_map;
+ if (ret == NULL) {
+ ret = choose_arena_hard();
+ assert(ret != NULL);
+ }
+#else
+ if (isthreaded && narenas > 1) {
+ unsigned long ind;
+
+ /*
+ * Hash pthread_self() to one of the arenas. There is a prime
+ * number of arenas, so this has a reasonable chance of
+ * working. Even so, the hashing can be easily thwarted by
+ * inconvenient pthread_self() values. Without specific
+ * knowledge of how pthread_self() calculates values, we can't
+ * easily do much better than this.
+ */
+ ind = (unsigned long) pthread_self() % narenas;
+
+ /*
+ * Optimistially assume that arenas[ind] has been initialized.
+ * At worst, we find out that some other thread has already
+ * done so, after acquiring the lock in preparation. Note that
+ * this lazy locking also has the effect of lazily forcing
+ * cache coherency; without the lock acquisition, there's no
+ * guarantee that modification of arenas[ind] by another thread
+ * would be seen on this CPU for an arbitrary amount of time.
+ *
+ * In general, this approach to modifying a synchronized value
+ * isn't a good idea, but in this case we only ever modify the
+ * value once, so things work out well.
+ */
+ ret = arenas[ind];
+ if (ret == NULL) {
+ /*
+ * Avoid races with another thread that may have already
+ * initialized arenas[ind].
+ */
+ malloc_mutex_lock(&arenas_lock);
+ if (arenas[ind] == NULL)
+ ret = arenas_extend((unsigned)ind);
+ else
+ ret = arenas[ind];
+ malloc_mutex_unlock(&arenas_lock);
+ }
+ } else
+ ret = arenas[0];
+#endif
+
+ assert(ret != NULL);
+ return (ret);
+}
+#endif
+
+#include "jemalloc/internal/tcache.h"
+#include "jemalloc/internal/arena.h"
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/prof.h"
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void *imalloc(size_t size);
+void *icalloc(size_t size);
+void *ipalloc(size_t alignment, size_t size);
+size_t isalloc(const void *ptr);
+void *iralloc(void *ptr, size_t size);
+void idalloc(void *ptr);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+JEMALLOC_INLINE void *
+imalloc(size_t size)
+{
+
+ assert(size != 0);
+
+ if (size <= arena_maxclass)
+ return (arena_malloc(size, false));
+ else
+ return (huge_malloc(size, false));
+}
+
+JEMALLOC_INLINE void *
+icalloc(size_t size)
+{
+
+ if (size <= arena_maxclass)
+ return (arena_malloc(size, true));
+ else
+ return (huge_malloc(size, true));
+}
+
+JEMALLOC_INLINE void *
+ipalloc(size_t alignment, size_t size)
+{
+ void *ret;
+ size_t ceil_size;
+
+ /*
+ * Round size up to the nearest multiple of alignment.
+ *
+ * This done, we can take advantage of the fact that for each small
+ * size class, every object is aligned at the smallest power of two
+ * that is non-zero in the base two representation of the size. For
+ * example:
+ *
+ * Size | Base 2 | Minimum alignment
+ * -----+----------+------------------
+ * 96 | 1100000 | 32
+ * 144 | 10100000 | 32
+ * 192 | 11000000 | 64
+ *
+ * Depending on runtime settings, it is possible that arena_malloc()
+ * will further round up to a power of two, but that never causes
+ * correctness issues.
+ */
+ ceil_size = (size + (alignment - 1)) & (-alignment);
+ /*
+ * (ceil_size < size) protects against the combination of maximal
+ * alignment and size greater than maximal alignment.
+ */
+ if (ceil_size < size) {
+ /* size_t overflow. */
+ return (NULL);
+ }
+
+ if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE
+ && ceil_size <= arena_maxclass))
+ ret = arena_malloc(ceil_size, false);
+ else {
+ size_t run_size;
+
+ /*
+ * We can't achieve subpage alignment, so round up alignment
+ * permanently; it makes later calculations simpler.
+ */
+ alignment = PAGE_CEILING(alignment);
+ ceil_size = PAGE_CEILING(size);
+ /*
+ * (ceil_size < size) protects against very large sizes within
+ * PAGE_SIZE of SIZE_T_MAX.
+ *
+ * (ceil_size + alignment < ceil_size) protects against the
+ * combination of maximal alignment and ceil_size large enough
+ * to cause overflow. This is similar to the first overflow
+ * check above, but it needs to be repeated due to the new
+ * ceil_size value, which may now be *equal* to maximal
+ * alignment, whereas before we only detected overflow if the
+ * original size was *greater* than maximal alignment.
+ */
+ if (ceil_size < size || ceil_size + alignment < ceil_size) {
+ /* size_t overflow. */
+ return (NULL);
+ }
+
+ /*
+ * Calculate the size of the over-size run that arena_palloc()
+ * would need to allocate in order to guarantee the alignment.
+ */
+ if (ceil_size >= alignment)
+ run_size = ceil_size + alignment - PAGE_SIZE;
+ else {
+ /*
+ * It is possible that (alignment << 1) will cause
+ * overflow, but it doesn't matter because we also
+ * subtract PAGE_SIZE, which in the case of overflow
+ * leaves us with a very large run_size. That causes
+ * the first conditional below to fail, which means
+ * that the bogus run_size value never gets used for
+ * anything important.
+ */
+ run_size = (alignment << 1) - PAGE_SIZE;
+ }
+
+ if (run_size <= arena_maxclass) {
+ ret = arena_palloc(choose_arena(), alignment, ceil_size,
+ run_size);
+ } else if (alignment <= chunksize)
+ ret = huge_malloc(ceil_size, false);
+ else
+ ret = huge_palloc(alignment, ceil_size);
+ }
+
+ assert(((uintptr_t)ret & (alignment - 1)) == 0);
+ return (ret);
+}
+
+JEMALLOC_INLINE size_t
+isalloc(const void *ptr)
+{
+ size_t ret;
+ arena_chunk_t *chunk;
+
+ assert(ptr != NULL);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (chunk != ptr) {
+ /* Region. */
+ assert(chunk->arena->magic == ARENA_MAGIC);
+
+#ifdef JEMALLOC_PROF
+ ret = arena_salloc_demote(ptr);
+#else
+ ret = arena_salloc(ptr);
+#endif
+ } else
+ ret = huge_salloc(ptr);
+
+ return (ret);
+}
+
+JEMALLOC_INLINE void *
+iralloc(void *ptr, size_t size)
+{
+ size_t oldsize;
+
+ assert(ptr != NULL);
+ assert(size != 0);
+
+ oldsize = isalloc(ptr);
+
+ if (size <= arena_maxclass)
+ return (arena_ralloc(ptr, size, oldsize));
+ else
+ return (huge_ralloc(ptr, size, oldsize));
+}
+
+JEMALLOC_INLINE void
+idalloc(void *ptr)
+{
+ arena_chunk_t *chunk;
+
+ assert(ptr != NULL);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (chunk != ptr)
+ arena_dalloc(chunk->arena, chunk, ptr);
+ else
+ huge_dalloc(ptr);
+}
+#endif
+
+#undef JEMALLOC_H_INLINES
+/******************************************************************************/
diff --git a/dep/include/jemalloc/internal/jemalloc_internal.h.in b/dep/include/jemalloc/internal/jemalloc_internal.h.in
new file mode 100644
index 00000000000..2c3f32f126d
--- /dev/null
+++ b/dep/include/jemalloc/internal/jemalloc_internal.h.in
@@ -0,0 +1,561 @@
+#include <sys/mman.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/uio.h>
+
+#include <errno.h>
+#include <limits.h>
+#ifndef SIZE_T_MAX
+# define SIZE_T_MAX SIZE_MAX
+#endif
+#include <pthread.h>
+#include <sched.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <pthread.h>
+
+#define JEMALLOC_MANGLE
+#include "../jemalloc@install_suffix@.h"
+
+#ifdef JEMALLOC_LAZY_LOCK
+#include <dlfcn.h>
+#endif
+
+#define RB_COMPACT
+#include "jemalloc/internal/rb.h"
+#include "jemalloc/internal/qr.h"
+#include "jemalloc/internal/ql.h"
+
+extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
+
+/*
+ * Define a custom assert() in order to reduce the chances of deadlock during
+ * assertion failure.
+ */
+#ifdef JEMALLOC_DEBUG
+# define assert(e) do { \
+ if (!(e)) { \
+ char line_buf[UMAX2S_BUFSIZE]; \
+ malloc_write("<jemalloc>: "); \
+ malloc_write(__FILE__); \
+ malloc_write(":"); \
+ malloc_write(umax2s(__LINE__, 10, line_buf)); \
+ malloc_write(": Failed assertion: "); \
+ malloc_write("\""); \
+ malloc_write(#e); \
+ malloc_write("\"\n"); \
+ abort(); \
+ } \
+} while (0)
+#else
+#define assert(e)
+#endif
+
+/*
+ * jemalloc can conceptually be broken into components (arena, tcache, etc.),
+ * but there are circular dependencies that cannot be broken without
+ * substantial performance degradation. In order to reduce the effect on
+ * visual code flow, read the header files in multiple passes, with one of the
+ * following cpp variables defined during each pass:
+ *
+ * JEMALLOC_H_TYPES : Preprocessor-defined constants and psuedo-opaque data
+ * types.
+ * JEMALLOC_H_STRUCTS : Data structures.
+ * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes.
+ * JEMALLOC_H_INLINES : Inline functions.
+ */
+/******************************************************************************/
+#define JEMALLOC_H_TYPES
+
+#define ZU(z) ((size_t)z)
+
+#ifndef __DECONST
+# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var))
+#endif
+
+#ifdef JEMALLOC_DEBUG
+ /* Disable inlining to make debugging easier. */
+# define JEMALLOC_INLINE
+# define inline
+#else
+# define JEMALLOC_ENABLE_INLINE
+# define JEMALLOC_INLINE static inline
+#endif
+
+/* Size of stack-allocated buffer passed to strerror_r(). */
+#define STRERROR_BUF 64
+
+/* Minimum alignment of allocations is 2^LG_QUANTUM bytes. */
+#ifdef __i386__
+# define LG_QUANTUM 4
+#endif
+#ifdef __ia64__
+# define LG_QUANTUM 4
+#endif
+#ifdef __alpha__
+# define LG_QUANTUM 4
+#endif
+#ifdef __sparc64__
+# define LG_QUANTUM 4
+#endif
+#if (defined(__amd64__) || defined(__x86_64__))
+# define LG_QUANTUM 4
+#endif
+#ifdef __arm__
+# define LG_QUANTUM 3
+#endif
+#ifdef __mips__
+# define LG_QUANTUM 3
+#endif
+#ifdef __powerpc__
+# define LG_QUANTUM 4
+#endif
+#ifdef __s390x__
+# define LG_QUANTUM 4
+#endif
+
+#define QUANTUM ((size_t)(1U << LG_QUANTUM))
+#define QUANTUM_MASK (QUANTUM - 1)
+
+/* Return the smallest quantum multiple that is >= a. */
+#define QUANTUM_CEILING(a) \
+ (((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
+
+#define SIZEOF_PTR (1U << LG_SIZEOF_PTR)
+
+/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
+#if (!defined(PIC) && !defined(NO_TLS))
+# define NO_TLS
+#endif
+
+/*
+ * Maximum size of L1 cache line. This is used to avoid cache line aliasing.
+ * In addition, this controls the spacing of cacheline-spaced size classes.
+ */
+#define LG_CACHELINE 6
+#define CACHELINE ((size_t)(1U << LG_CACHELINE))
+#define CACHELINE_MASK (CACHELINE - 1)
+
+/* Return the smallest cacheline multiple that is >= s. */
+#define CACHELINE_CEILING(s) \
+ (((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
+
+/*
+ * Page size. STATIC_PAGE_SHIFT is determined by the configure script. If
+ * DYNAMIC_PAGE_SHIFT is enabled, only use the STATIC_PAGE_* macros where
+ * compile-time values are required for the purposes of defining data
+ * structures.
+ */
+#define STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT))
+#define STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1))
+
+#ifdef DYNAMIC_PAGE_SHIFT
+# define PAGE_SHIFT lg_pagesize
+# define PAGE_SIZE pagesize
+# define PAGE_MASK pagesize_mask
+#else
+# define PAGE_SHIFT STATIC_PAGE_SHIFT
+# define PAGE_SIZE STATIC_PAGE_SIZE
+# define PAGE_MASK STATIC_PAGE_MASK
+#endif
+
+/* Return the smallest pagesize multiple that is >= s. */
+#define PAGE_CEILING(s) \
+ (((s) + PAGE_MASK) & ~PAGE_MASK)
+
+#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mb.h"
+#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/arena.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/chunk.h"
+#include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/tcache.h"
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/prof.h"
+
+#undef JEMALLOC_H_TYPES
+/******************************************************************************/
+#define JEMALLOC_H_STRUCTS
+
+#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mb.h"
+#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/arena.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/chunk.h"
+#include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/tcache.h"
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/prof.h"
+
+#undef JEMALLOC_H_STRUCTS
+/******************************************************************************/
+#define JEMALLOC_H_EXTERNS
+
+extern bool opt_abort;
+#ifdef JEMALLOC_FILL
+extern bool opt_junk;
+#endif
+#ifdef JEMALLOC_SYSV
+extern bool opt_sysv;
+#endif
+#ifdef JEMALLOC_XMALLOC
+extern bool opt_xmalloc;
+#endif
+#ifdef JEMALLOC_FILL
+extern bool opt_zero;
+#endif
+
+#ifdef DYNAMIC_PAGE_SHIFT
+extern size_t pagesize;
+extern size_t pagesize_mask;
+extern size_t lg_pagesize;
+#endif
+
+/* Number of CPUs. */
+extern unsigned ncpus;
+
+extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */
+#ifndef NO_TLS
+/*
+ * Map of pthread_self() --> arenas[???], used for selecting an arena to use
+ * for allocations.
+ */
+extern __thread arena_t *arenas_map JEMALLOC_ATTR(tls_model("initial-exec"));
+#endif
+/*
+ * Arenas that are used to service external requests. Not all elements of the
+ * arenas array are necessarily used; arenas are created lazily as needed.
+ */
+extern arena_t **arenas;
+extern unsigned narenas;
+
+arena_t *arenas_extend(unsigned ind);
+#ifndef NO_TLS
+arena_t *choose_arena_hard(void);
+#endif
+
+#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mb.h"
+#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/arena.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/chunk.h"
+#include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/tcache.h"
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/prof.h"
+
+#undef JEMALLOC_H_EXTERNS
+/******************************************************************************/
+#define JEMALLOC_H_INLINES
+
+#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mb.h"
+#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/chunk.h"
+#include "jemalloc/internal/huge.h"
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void malloc_write(const char *s);
+arena_t *choose_arena(void);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+/*
+ * Wrapper around malloc_message() that avoids the need for
+ * JEMALLOC_P(malloc_message)(...) throughout the code.
+ */
+JEMALLOC_INLINE void
+malloc_write(const char *s)
+{
+
+ JEMALLOC_P(malloc_message)(NULL, s);
+}
+
+/*
+ * Choose an arena based on a per-thread value (fast-path code, calls slow-path
+ * code if necessary).
+ */
+JEMALLOC_INLINE arena_t *
+choose_arena(void)
+{
+ arena_t *ret;
+
+ /*
+ * We can only use TLS if this is a PIC library, since for the static
+ * library version, libc's malloc is used by TLS allocation, which
+ * introduces a bootstrapping issue.
+ */
+#ifndef NO_TLS
+ ret = arenas_map;
+ if (ret == NULL) {
+ ret = choose_arena_hard();
+ assert(ret != NULL);
+ }
+#else
+ if (isthreaded && narenas > 1) {
+ unsigned long ind;
+
+ /*
+ * Hash pthread_self() to one of the arenas. There is a prime
+ * number of arenas, so this has a reasonable chance of
+ * working. Even so, the hashing can be easily thwarted by
+ * inconvenient pthread_self() values. Without specific
+ * knowledge of how pthread_self() calculates values, we can't
+ * easily do much better than this.
+ */
+ ind = (unsigned long) pthread_self() % narenas;
+
+ /*
+ * Optimistially assume that arenas[ind] has been initialized.
+ * At worst, we find out that some other thread has already
+ * done so, after acquiring the lock in preparation. Note that
+ * this lazy locking also has the effect of lazily forcing
+ * cache coherency; without the lock acquisition, there's no
+ * guarantee that modification of arenas[ind] by another thread
+ * would be seen on this CPU for an arbitrary amount of time.
+ *
+ * In general, this approach to modifying a synchronized value
+ * isn't a good idea, but in this case we only ever modify the
+ * value once, so things work out well.
+ */
+ ret = arenas[ind];
+ if (ret == NULL) {
+ /*
+ * Avoid races with another thread that may have already
+ * initialized arenas[ind].
+ */
+ malloc_mutex_lock(&arenas_lock);
+ if (arenas[ind] == NULL)
+ ret = arenas_extend((unsigned)ind);
+ else
+ ret = arenas[ind];
+ malloc_mutex_unlock(&arenas_lock);
+ }
+ } else
+ ret = arenas[0];
+#endif
+
+ assert(ret != NULL);
+ return (ret);
+}
+#endif
+
+#include "jemalloc/internal/tcache.h"
+#include "jemalloc/internal/arena.h"
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/prof.h"
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void *imalloc(size_t size);
+void *icalloc(size_t size);
+void *ipalloc(size_t alignment, size_t size);
+size_t isalloc(const void *ptr);
+void *iralloc(void *ptr, size_t size);
+void idalloc(void *ptr);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+JEMALLOC_INLINE void *
+imalloc(size_t size)
+{
+
+ assert(size != 0);
+
+ if (size <= arena_maxclass)
+ return (arena_malloc(size, false));
+ else
+ return (huge_malloc(size, false));
+}
+
+JEMALLOC_INLINE void *
+icalloc(size_t size)
+{
+
+ if (size <= arena_maxclass)
+ return (arena_malloc(size, true));
+ else
+ return (huge_malloc(size, true));
+}
+
+JEMALLOC_INLINE void *
+ipalloc(size_t alignment, size_t size)
+{
+ void *ret;
+ size_t ceil_size;
+
+ /*
+ * Round size up to the nearest multiple of alignment.
+ *
+ * This done, we can take advantage of the fact that for each small
+ * size class, every object is aligned at the smallest power of two
+ * that is non-zero in the base two representation of the size. For
+ * example:
+ *
+ * Size | Base 2 | Minimum alignment
+ * -----+----------+------------------
+ * 96 | 1100000 | 32
+ * 144 | 10100000 | 32
+ * 192 | 11000000 | 64
+ *
+ * Depending on runtime settings, it is possible that arena_malloc()
+ * will further round up to a power of two, but that never causes
+ * correctness issues.
+ */
+ ceil_size = (size + (alignment - 1)) & (-alignment);
+ /*
+ * (ceil_size < size) protects against the combination of maximal
+ * alignment and size greater than maximal alignment.
+ */
+ if (ceil_size < size) {
+ /* size_t overflow. */
+ return (NULL);
+ }
+
+ if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE
+ && ceil_size <= arena_maxclass))
+ ret = arena_malloc(ceil_size, false);
+ else {
+ size_t run_size;
+
+ /*
+ * We can't achieve subpage alignment, so round up alignment
+ * permanently; it makes later calculations simpler.
+ */
+ alignment = PAGE_CEILING(alignment);
+ ceil_size = PAGE_CEILING(size);
+ /*
+ * (ceil_size < size) protects against very large sizes within
+ * PAGE_SIZE of SIZE_T_MAX.
+ *
+ * (ceil_size + alignment < ceil_size) protects against the
+ * combination of maximal alignment and ceil_size large enough
+ * to cause overflow. This is similar to the first overflow
+ * check above, but it needs to be repeated due to the new
+ * ceil_size value, which may now be *equal* to maximal
+ * alignment, whereas before we only detected overflow if the
+ * original size was *greater* than maximal alignment.
+ */
+ if (ceil_size < size || ceil_size + alignment < ceil_size) {
+ /* size_t overflow. */
+ return (NULL);
+ }
+
+ /*
+ * Calculate the size of the over-size run that arena_palloc()
+ * would need to allocate in order to guarantee the alignment.
+ */
+ if (ceil_size >= alignment)
+ run_size = ceil_size + alignment - PAGE_SIZE;
+ else {
+ /*
+ * It is possible that (alignment << 1) will cause
+ * overflow, but it doesn't matter because we also
+ * subtract PAGE_SIZE, which in the case of overflow
+ * leaves us with a very large run_size. That causes
+ * the first conditional below to fail, which means
+ * that the bogus run_size value never gets used for
+ * anything important.
+ */
+ run_size = (alignment << 1) - PAGE_SIZE;
+ }
+
+ if (run_size <= arena_maxclass) {
+ ret = arena_palloc(choose_arena(), alignment, ceil_size,
+ run_size);
+ } else if (alignment <= chunksize)
+ ret = huge_malloc(ceil_size, false);
+ else
+ ret = huge_palloc(alignment, ceil_size);
+ }
+
+ assert(((uintptr_t)ret & (alignment - 1)) == 0);
+ return (ret);
+}
+
+JEMALLOC_INLINE size_t
+isalloc(const void *ptr)
+{
+ size_t ret;
+ arena_chunk_t *chunk;
+
+ assert(ptr != NULL);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (chunk != ptr) {
+ /* Region. */
+ assert(chunk->arena->magic == ARENA_MAGIC);
+
+#ifdef JEMALLOC_PROF
+ ret = arena_salloc_demote(ptr);
+#else
+ ret = arena_salloc(ptr);
+#endif
+ } else
+ ret = huge_salloc(ptr);
+
+ return (ret);
+}
+
+JEMALLOC_INLINE void *
+iralloc(void *ptr, size_t size)
+{
+ size_t oldsize;
+
+ assert(ptr != NULL);
+ assert(size != 0);
+
+ oldsize = isalloc(ptr);
+
+ if (size <= arena_maxclass)
+ return (arena_ralloc(ptr, size, oldsize));
+ else
+ return (huge_ralloc(ptr, size, oldsize));
+}
+
+JEMALLOC_INLINE void
+idalloc(void *ptr)
+{
+ arena_chunk_t *chunk;
+
+ assert(ptr != NULL);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (chunk != ptr)
+ arena_dalloc(chunk->arena, chunk, ptr);
+ else
+ huge_dalloc(ptr);
+}
+#endif
+
+#undef JEMALLOC_H_INLINES
+/******************************************************************************/
diff --git a/dep/include/jemalloc/internal/mb.h b/dep/include/jemalloc/internal/mb.h
new file mode 100644
index 00000000000..1707aa91d68
--- /dev/null
+++ b/dep/include/jemalloc/internal/mb.h
@@ -0,0 +1,108 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void mb_write(void);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(MB_C_))
+#ifdef __i386__
+/*
+ * According to the Intel Architecture Software Developer's Manual, current
+ * processors execute instructions in order from the perspective of other
+ * processors in a multiprocessor system, but 1) Intel reserves the right to
+ * change that, and 2) the compiler's optimizer could re-order instructions if
+ * there weren't some form of barrier. Therefore, even if running on an
+ * architecture that does not need memory barriers (everything through at least
+ * i686), an "optimizer barrier" is necessary.
+ */
+JEMALLOC_INLINE void
+mb_write(void)
+{
+
+# if 0
+ /* This is a true memory barrier. */
+ asm volatile ("pusha;"
+ "xor %%eax,%%eax;"
+ "cpuid;"
+ "popa;"
+ : /* Outputs. */
+ : /* Inputs. */
+ : "memory" /* Clobbers. */
+ );
+#else
+ /*
+ * This is hopefully enough to keep the compiler from reordering
+ * instructions around this one.
+ */
+ asm volatile ("nop;"
+ : /* Outputs. */
+ : /* Inputs. */
+ : "memory" /* Clobbers. */
+ );
+#endif
+}
+#elif (defined(__amd64_) || defined(__x86_64__))
+JEMALLOC_INLINE void
+mb_write(void)
+{
+
+ asm volatile ("sfence"
+ : /* Outputs. */
+ : /* Inputs. */
+ : "memory" /* Clobbers. */
+ );
+}
+#elif defined(__powerpc__)
+JEMALLOC_INLINE void
+mb_write(void)
+{
+
+ asm volatile ("eieio"
+ : /* Outputs. */
+ : /* Inputs. */
+ : "memory" /* Clobbers. */
+ );
+}
+#elif defined(__sparc64__)
+JEMALLOC_INLINE void
+mb_write(void)
+{
+
+ asm volatile ("membar #StoreStore"
+ : /* Outputs. */
+ : /* Inputs. */
+ : "memory" /* Clobbers. */
+ );
+}
+#else
+/*
+ * This is much slower than a simple memory barrier, but the semantics of mutex
+ * unlock make this work.
+ */
+JEMALLOC_INLINE void
+mb_write(void)
+{
+ malloc_mutex_t mtx;
+
+ malloc_mutex_init(&mtx);
+ malloc_mutex_lock(&mtx);
+ malloc_mutex_unlock(&mtx);
+}
+#endif
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/dep/include/jemalloc/internal/mutex.h b/dep/include/jemalloc/internal/mutex.h
new file mode 100644
index 00000000000..108bfa8abfd
--- /dev/null
+++ b/dep/include/jemalloc/internal/mutex.h
@@ -0,0 +1,61 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef pthread_mutex_t malloc_mutex_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#ifdef JEMALLOC_LAZY_LOCK
+extern bool isthreaded;
+#else
+# define isthreaded true
+#endif
+
+bool malloc_mutex_init(malloc_mutex_t *mutex);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void malloc_mutex_lock(malloc_mutex_t *mutex);
+bool malloc_mutex_trylock(malloc_mutex_t *mutex);
+void malloc_mutex_unlock(malloc_mutex_t *mutex);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
+JEMALLOC_INLINE void
+malloc_mutex_lock(malloc_mutex_t *mutex)
+{
+
+ if (isthreaded)
+ pthread_mutex_lock(mutex);
+}
+
+JEMALLOC_INLINE bool
+malloc_mutex_trylock(malloc_mutex_t *mutex)
+{
+
+ if (isthreaded)
+ return (pthread_mutex_trylock(mutex) != 0);
+ else
+ return (false);
+}
+
+JEMALLOC_INLINE void
+malloc_mutex_unlock(malloc_mutex_t *mutex)
+{
+
+ if (isthreaded)
+ pthread_mutex_unlock(mutex);
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/dep/include/jemalloc/internal/prn.h b/dep/include/jemalloc/internal/prn.h
new file mode 100644
index 00000000000..0709d708012
--- /dev/null
+++ b/dep/include/jemalloc/internal/prn.h
@@ -0,0 +1,60 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/*
+ * Simple linear congruential pseudo-random number generator:
+ *
+ * prn(y) = (a*x + c) % m
+ *
+ * where the following constants ensure maximal period:
+ *
+ * a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
+ * c == Odd number (relatively prime to 2^n).
+ * m == 2^32
+ *
+ * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
+ *
+ * This choice of m has the disadvantage that the quality of the bits is
+ * proportional to bit position. For example. the lowest bit has a cycle of 2,
+ * the next has a cycle of 4, etc. For this reason, we prefer to use the upper
+ * bits.
+ *
+ * Macro parameters:
+ * uint32_t r : Result.
+ * unsigned lg_range : (0..32], number of least significant bits to return.
+ * uint32_t state : Seed value.
+ * const uint32_t a, c : See above discussion.
+ */
+#define prn32(r, lg_range, state, a, c) do { \
+ assert(lg_range > 0); \
+ assert(lg_range <= 32); \
+ \
+ r = (state * (a)) + (c); \
+ state = r; \
+ r >>= (32 - lg_range); \
+} while (false)
+
+/* Same as prn32(), but 64 bits of pseudo-randomness, using uint64_t. */
+#define prn64(r, lg_range, state, a, c) do { \
+ assert(lg_range > 0); \
+ assert(lg_range <= 64); \
+ \
+ r = (state * (a)) + (c); \
+ state = r; \
+ r >>= (64 - lg_range); \
+} while (false)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/dep/include/jemalloc/internal/prof.h b/dep/include/jemalloc/internal/prof.h
new file mode 100644
index 00000000000..6e71552d85e
--- /dev/null
+++ b/dep/include/jemalloc/internal/prof.h
@@ -0,0 +1,171 @@
+#ifdef JEMALLOC_PROF
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct prof_bt_s prof_bt_t;
+typedef struct prof_cnt_s prof_cnt_t;
+typedef struct prof_thr_cnt_s prof_thr_cnt_t;
+typedef struct prof_ctx_s prof_ctx_t;
+typedef struct prof_s prof_t;
+
+/* Option defaults. */
+#define LG_PROF_BT_MAX_DEFAULT 2
+#define LG_PROF_SAMPLE_DEFAULT 0
+#define LG_PROF_INTERVAL_DEFAULT 30
+
+/*
+ * Hard limit on stack backtrace depth. Note that the version of
+ * prof_backtrace() that is based on __builtin_return_address() necessarily has
+ * a hard-coded number of backtrace frame handlers, so increasing
+ * LG_PROF_BT_MAX requires changing prof_backtrace().
+ */
+#define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */
+#define PROF_BT_MAX (1U << LG_PROF_BT_MAX)
+
+/* Initial hash table size. */
+#define PROF_CKH_MINITEMS 64
+
+/* Size of memory buffer to use when writing dump files. */
+#define PROF_DUMP_BUF_SIZE 65536
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct prof_bt_s {
+ /* Backtrace, stored as len program counters. */
+ void **vec;
+ unsigned len;
+};
+
+#ifdef JEMALLOC_PROF_LIBGCC
+/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
+typedef struct {
+ prof_bt_t *bt;
+ unsigned nignore;
+ unsigned max;
+} prof_unwind_data_t;
+#endif
+
+struct prof_cnt_s {
+ /*
+ * Profiling counters. An allocation/deallocation pair can operate on
+ * different prof_thr_cnt_t objects that are linked into the same
+ * prof_ctx_t sets_ql, so it is possible for the cur* counters to go
+ * negative. In principle it is possible for the *bytes counters to
+ * overflow/underflow, but a general solution would require some form
+ * of 128-bit counter solution; this implementation doesn't bother to
+ * solve that problem.
+ */
+ int64_t curobjs;
+ int64_t curbytes;
+ uint64_t accumobjs;
+ uint64_t accumbytes;
+};
+
+struct prof_thr_cnt_s {
+ /* Linkage into prof_ctx_t's sets_ql. */
+ ql_elm(prof_thr_cnt_t) link;
+
+ /*
+ * Associated context. If a thread frees an object that it did not
+ * allocate, it is possible that the context is not cached in the
+ * thread's hash table, in which case it must be able to look up the
+ * context, insert a new prof_thr_cnt_t into the thread's hash table,
+ * and link it into the prof_ctx_t's sets_ql.
+ */
+ prof_ctx_t *ctx;
+
+ /*
+ * Threads use memory barriers to update the counters. Since there is
+ * only ever one writer, the only challenge is for the reader to get a
+ * consistent read of the counters.
+ *
+ * The writer uses this series of operations:
+ *
+ * 1) Increment epoch to an odd number.
+ * 2) Update counters.
+ * 3) Increment epoch to an even number.
+ *
+ * The reader must assure 1) that the epoch is even while it reads the
+ * counters, and 2) that the epoch doesn't change between the time it
+ * starts and finishes reading the counters.
+ */
+ unsigned epoch;
+
+ /* Profiling counters. */
+ prof_cnt_t cnts;
+};
+
+struct prof_ctx_s {
+ /* Protects cnt_merged and sets_ql. */
+ malloc_mutex_t lock;
+
+ /* Temporary storage for aggregation during dump. */
+ prof_cnt_t cnt_dump;
+
+ /* When threads exit, they merge their stats into cnt_merged. */
+ prof_cnt_t cnt_merged;
+
+ /*
+ * List of profile counters, one for each thread that has allocated in
+ * this context.
+ */
+ ql_head(prof_thr_cnt_t) cnts_ql;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern bool opt_prof;
+/*
+ * Even if opt_prof is true, sampling can be temporarily disabled by setting
+ * opt_prof_active to false. No locking is used when updating opt_prof_active,
+ * so there are no guarantees regarding how long it will take for all threads
+ * to notice state changes.
+ */
+extern bool opt_prof_active;
+extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */
+extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
+extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
+extern bool opt_prof_udump; /* High-water memory dumping. */
+extern bool opt_prof_leak; /* Dump leak summary at exit. */
+
+/*
+ * Profile dump interval, measured in bytes allocated. Each arena triggers a
+ * profile dump when it reaches this threshold. The effect is that the
+ * interval between profile dumps averages prof_interval, though the actual
+ * interval between dumps will tend to be sporadic, and the interval will be a
+ * maximum of approximately (prof_interval * narenas).
+ */
+extern uint64_t prof_interval;
+
+/*
+ * If true, promote small sampled objects to large objects, since small run
+ * headers do not have embedded profile context pointers.
+ */
+extern bool prof_promote;
+
+bool prof_init(prof_t *prof, bool master);
+void prof_destroy(prof_t *prof);
+
+prof_thr_cnt_t *prof_alloc_prep(size_t size);
+prof_thr_cnt_t *prof_cnt_get(const void *ptr);
+void prof_malloc(const void *ptr, prof_thr_cnt_t *cnt);
+void prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
+ size_t old_size, prof_thr_cnt_t *old_cnt);
+void prof_free(const void *ptr);
+void prof_idump(void);
+bool prof_mdump(const char *filename);
+void prof_udump(void);
+void prof_boot0(void);
+bool prof_boot1(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+#endif /* JEMALLOC_PROF */
diff --git a/dep/include/jemalloc/internal/ql.h b/dep/include/jemalloc/internal/ql.h
new file mode 100644
index 00000000000..a9ed2393f0c
--- /dev/null
+++ b/dep/include/jemalloc/internal/ql.h
@@ -0,0 +1,83 @@
+/*
+ * List definitions.
+ */
+#define ql_head(a_type) \
+struct { \
+ a_type *qlh_first; \
+}
+
+#define ql_head_initializer(a_head) {NULL}
+
+#define ql_elm(a_type) qr(a_type)
+
+/* List functions. */
+#define ql_new(a_head) do { \
+ (a_head)->qlh_first = NULL; \
+} while (0)
+
+#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field)
+
+#define ql_first(a_head) ((a_head)->qlh_first)
+
+#define ql_last(a_head, a_field) \
+ ((ql_first(a_head) != NULL) \
+ ? qr_prev(ql_first(a_head), a_field) : NULL)
+
+#define ql_next(a_head, a_elm, a_field) \
+ ((ql_last(a_head, a_field) != (a_elm)) \
+ ? qr_next((a_elm), a_field) : NULL)
+
+#define ql_prev(a_head, a_elm, a_field) \
+ ((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field) \
+ : NULL)
+
+#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do { \
+ qr_before_insert((a_qlelm), (a_elm), a_field); \
+ if (ql_first(a_head) == (a_qlelm)) { \
+ ql_first(a_head) = (a_elm); \
+ } \
+} while (0)
+
+#define ql_after_insert(a_qlelm, a_elm, a_field) \
+ qr_after_insert((a_qlelm), (a_elm), a_field)
+
+#define ql_head_insert(a_head, a_elm, a_field) do { \
+ if (ql_first(a_head) != NULL) { \
+ qr_before_insert(ql_first(a_head), (a_elm), a_field); \
+ } \
+ ql_first(a_head) = (a_elm); \
+} while (0)
+
+#define ql_tail_insert(a_head, a_elm, a_field) do { \
+ if (ql_first(a_head) != NULL) { \
+ qr_before_insert(ql_first(a_head), (a_elm), a_field); \
+ } \
+ ql_first(a_head) = qr_next((a_elm), a_field); \
+} while (0)
+
+#define ql_remove(a_head, a_elm, a_field) do { \
+ if (ql_first(a_head) == (a_elm)) { \
+ ql_first(a_head) = qr_next(ql_first(a_head), a_field); \
+ } \
+ if (ql_first(a_head) != (a_elm)) { \
+ qr_remove((a_elm), a_field); \
+ } else { \
+ ql_first(a_head) = NULL; \
+ } \
+} while (0)
+
+#define ql_head_remove(a_head, a_type, a_field) do { \
+ a_type *t = ql_first(a_head); \
+ ql_remove((a_head), t, a_field); \
+} while (0)
+
+#define ql_tail_remove(a_head, a_type, a_field) do { \
+ a_type *t = ql_last(a_head, a_field); \
+ ql_remove((a_head), t, a_field); \
+} while (0)
+
+#define ql_foreach(a_var, a_head, a_field) \
+ qr_foreach((a_var), ql_first(a_head), a_field)
+
+#define ql_reverse_foreach(a_var, a_head, a_field) \
+ qr_reverse_foreach((a_var), ql_first(a_head), a_field)
diff --git a/dep/include/jemalloc/internal/qr.h b/dep/include/jemalloc/internal/qr.h
new file mode 100644
index 00000000000..fe22352fedd
--- /dev/null
+++ b/dep/include/jemalloc/internal/qr.h
@@ -0,0 +1,67 @@
+/* Ring definitions. */
+#define qr(a_type) \
+struct { \
+ a_type *qre_next; \
+ a_type *qre_prev; \
+}
+
+/* Ring functions. */
+#define qr_new(a_qr, a_field) do { \
+ (a_qr)->a_field.qre_next = (a_qr); \
+ (a_qr)->a_field.qre_prev = (a_qr); \
+} while (0)
+
+#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next)
+
+#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
+
+#define qr_before_insert(a_qrelm, a_qr, a_field) do { \
+ (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev; \
+ (a_qr)->a_field.qre_next = (a_qrelm); \
+ (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr); \
+ (a_qrelm)->a_field.qre_prev = (a_qr); \
+} while (0)
+
+#define qr_after_insert(a_qrelm, a_qr, a_field) \
+ do \
+ { \
+ (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next; \
+ (a_qr)->a_field.qre_prev = (a_qrelm); \
+ (a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr); \
+ (a_qrelm)->a_field.qre_next = (a_qr); \
+ } while (0)
+
+#define qr_meld(a_qr_a, a_qr_b, a_field) do { \
+ void *t; \
+ (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b); \
+ (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a); \
+ t = (a_qr_a)->a_field.qre_prev; \
+ (a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev; \
+ (a_qr_b)->a_field.qre_prev = t; \
+} while (0)
+
+/* qr_meld() and qr_split() are functionally equivalent, so there's no need to
+ * have two copies of the code. */
+#define qr_split(a_qr_a, a_qr_b, a_field) \
+ qr_meld((a_qr_a), (a_qr_b), a_field)
+
+#define qr_remove(a_qr, a_field) do { \
+ (a_qr)->a_field.qre_prev->a_field.qre_next \
+ = (a_qr)->a_field.qre_next; \
+ (a_qr)->a_field.qre_next->a_field.qre_prev \
+ = (a_qr)->a_field.qre_prev; \
+ (a_qr)->a_field.qre_next = (a_qr); \
+ (a_qr)->a_field.qre_prev = (a_qr); \
+} while (0)
+
+#define qr_foreach(var, a_qr, a_field) \
+ for ((var) = (a_qr); \
+ (var) != NULL; \
+ (var) = (((var)->a_field.qre_next != (a_qr)) \
+ ? (var)->a_field.qre_next : NULL))
+
+#define qr_reverse_foreach(var, a_qr, a_field) \
+ for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL; \
+ (var) != NULL; \
+ (var) = (((var) != (a_qr)) \
+ ? (var)->a_field.qre_prev : NULL))
diff --git a/dep/include/jemalloc/internal/rb.h b/dep/include/jemalloc/internal/rb.h
new file mode 100644
index 00000000000..ee9b009d235
--- /dev/null
+++ b/dep/include/jemalloc/internal/rb.h
@@ -0,0 +1,973 @@
+/*-
+ *******************************************************************************
+ *
+ * cpp macro implementation of left-leaning 2-3 red-black trees. Parent
+ * pointers are not used, and color bits are stored in the least significant
+ * bit of right-child pointers (if RB_COMPACT is defined), thus making node
+ * linkage as compact as is possible for red-black trees.
+ *
+ * Usage:
+ *
+ * #include <stdint.h>
+ * #include <stdbool.h>
+ * #define NDEBUG // (Optional, see assert(3).)
+ * #include <assert.h>
+ * #define RB_COMPACT // (Optional, embed color bits in right-child pointers.)
+ * #include <rb.h>
+ * ...
+ *
+ *******************************************************************************
+ */
+
+#ifndef RB_H_
+#define RB_H_
+
+#if 0
+__FBSDID("$FreeBSD: head/lib/libc/stdlib/rb.h 204493 2010-02-28 22:57:13Z jasone $");
+#endif
+
+#ifdef RB_COMPACT
+/* Node structure. */
+#define rb_node(a_type) \
+struct { \
+ a_type *rbn_left; \
+ a_type *rbn_right_red; \
+}
+#else
+#define rb_node(a_type) \
+struct { \
+ a_type *rbn_left; \
+ a_type *rbn_right; \
+ bool rbn_red; \
+}
+#endif
+
+/* Root structure. */
+#define rb_tree(a_type) \
+struct { \
+ a_type *rbt_root; \
+ a_type rbt_nil; \
+}
+
+/* Left accessors. */
+#define rbtn_left_get(a_type, a_field, a_node) \
+ ((a_node)->a_field.rbn_left)
+#define rbtn_left_set(a_type, a_field, a_node, a_left) do { \
+ (a_node)->a_field.rbn_left = a_left; \
+} while (0)
+
+#ifdef RB_COMPACT
+/* Right accessors. */
+#define rbtn_right_get(a_type, a_field, a_node) \
+ ((a_type *) (((intptr_t) (a_node)->a_field.rbn_right_red) \
+ & ((ssize_t)-2)))
+#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \
+ (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) a_right) \
+ | (((uintptr_t) (a_node)->a_field.rbn_right_red) & ((size_t)1))); \
+} while (0)
+
+/* Color accessors. */
+#define rbtn_red_get(a_type, a_field, a_node) \
+ ((bool) (((uintptr_t) (a_node)->a_field.rbn_right_red) \
+ & ((size_t)1)))
+#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \
+ (a_node)->a_field.rbn_right_red = (a_type *) ((((intptr_t) \
+ (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)) \
+ | ((ssize_t)a_red)); \
+} while (0)
+#define rbtn_red_set(a_type, a_field, a_node) do { \
+ (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) \
+ (a_node)->a_field.rbn_right_red) | ((size_t)1)); \
+} while (0)
+#define rbtn_black_set(a_type, a_field, a_node) do { \
+ (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t) \
+ (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)); \
+} while (0)
+#else
+/* Right accessors. */
+#define rbtn_right_get(a_type, a_field, a_node) \
+ ((a_node)->a_field.rbn_right)
+#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \
+ (a_node)->a_field.rbn_right = a_right; \
+} while (0)
+
+/* Color accessors. */
+#define rbtn_red_get(a_type, a_field, a_node) \
+ ((a_node)->a_field.rbn_red)
+#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \
+ (a_node)->a_field.rbn_red = (a_red); \
+} while (0)
+#define rbtn_red_set(a_type, a_field, a_node) do { \
+ (a_node)->a_field.rbn_red = true; \
+} while (0)
+#define rbtn_black_set(a_type, a_field, a_node) do { \
+ (a_node)->a_field.rbn_red = false; \
+} while (0)
+#endif
+
+/* Node initializer. */
+#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \
+ rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \
+ rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \
+ rbtn_red_set(a_type, a_field, (a_node)); \
+} while (0)
+
+/* Tree initializer. */
+#define rb_new(a_type, a_field, a_rbt) do { \
+ (a_rbt)->rbt_root = &(a_rbt)->rbt_nil; \
+ rbt_node_new(a_type, a_field, a_rbt, &(a_rbt)->rbt_nil); \
+ rbtn_black_set(a_type, a_field, &(a_rbt)->rbt_nil); \
+} while (0)
+
+/* Internal utility macros. */
+#define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do { \
+ (r_node) = (a_root); \
+ if ((r_node) != &(a_rbt)->rbt_nil) { \
+ for (; \
+ rbtn_left_get(a_type, a_field, (r_node)) != &(a_rbt)->rbt_nil;\
+ (r_node) = rbtn_left_get(a_type, a_field, (r_node))) { \
+ } \
+ } \
+} while (0)
+
+#define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do { \
+ (r_node) = (a_root); \
+ if ((r_node) != &(a_rbt)->rbt_nil) { \
+ for (; rbtn_right_get(a_type, a_field, (r_node)) != \
+ &(a_rbt)->rbt_nil; (r_node) = rbtn_right_get(a_type, a_field, \
+ (r_node))) { \
+ } \
+ } \
+} while (0)
+
+#define rbtn_rotate_left(a_type, a_field, a_node, r_node) do { \
+ (r_node) = rbtn_right_get(a_type, a_field, (a_node)); \
+ rbtn_right_set(a_type, a_field, (a_node), \
+ rbtn_left_get(a_type, a_field, (r_node))); \
+ rbtn_left_set(a_type, a_field, (r_node), (a_node)); \
+} while (0)
+
+#define rbtn_rotate_right(a_type, a_field, a_node, r_node) do { \
+ (r_node) = rbtn_left_get(a_type, a_field, (a_node)); \
+ rbtn_left_set(a_type, a_field, (a_node), \
+ rbtn_right_get(a_type, a_field, (r_node))); \
+ rbtn_right_set(a_type, a_field, (r_node), (a_node)); \
+} while (0)
+
+/*
+ * The rb_proto() macro generates function prototypes that correspond to the
+ * functions generated by an equivalently parameterized call to rb_gen().
+ */
+
+#define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \
+a_attr void \
+a_prefix##new(a_rbt_type *rbtree); \
+a_attr a_type * \
+a_prefix##first(a_rbt_type *rbtree); \
+a_attr a_type * \
+a_prefix##last(a_rbt_type *rbtree); \
+a_attr a_type * \
+a_prefix##next(a_rbt_type *rbtree, a_type *node); \
+a_attr a_type * \
+a_prefix##prev(a_rbt_type *rbtree, a_type *node); \
+a_attr a_type * \
+a_prefix##search(a_rbt_type *rbtree, a_type *key); \
+a_attr a_type * \
+a_prefix##nsearch(a_rbt_type *rbtree, a_type *key); \
+a_attr a_type * \
+a_prefix##psearch(a_rbt_type *rbtree, a_type *key); \
+a_attr void \
+a_prefix##insert(a_rbt_type *rbtree, a_type *node); \
+a_attr void \
+a_prefix##remove(a_rbt_type *rbtree, a_type *node); \
+a_attr a_type * \
+a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \
+ a_rbt_type *, a_type *, void *), void *arg); \
+a_attr a_type * \
+a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \
+ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg);
+
+/*
+ * The rb_gen() macro generates a type-specific red-black tree implementation,
+ * based on the above cpp macros.
+ *
+ * Arguments:
+ *
+ * a_attr : Function attribute for generated functions (ex: static).
+ * a_prefix : Prefix for generated functions (ex: ex_).
+ * a_rb_type : Type for red-black tree data structure (ex: ex_t).
+ * a_type : Type for red-black tree node data structure (ex: ex_node_t).
+ * a_field : Name of red-black tree node linkage (ex: ex_link).
+ * a_cmp : Node comparison function name, with the following prototype:
+ * int (a_cmp *)(a_type *a_node, a_type *a_other);
+ * ^^^^^^
+ * or a_key
+ * Interpretation of comparision function return values:
+ * -1 : a_node < a_other
+ * 0 : a_node == a_other
+ * 1 : a_node > a_other
+ * In all cases, the a_node or a_key macro argument is the first
+ * argument to the comparison function, which makes it possible
+ * to write comparison functions that treat the first argument
+ * specially.
+ *
+ * Assuming the following setup:
+ *
+ * typedef struct ex_node_s ex_node_t;
+ * struct ex_node_s {
+ * rb_node(ex_node_t) ex_link;
+ * };
+ * typedef rb_tree(ex_node_t) ex_t;
+ * rb_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_cmp)
+ *
+ * The following API is generated:
+ *
+ * static void
+ * ex_new(ex_t *extree);
+ * Description: Initialize a red-black tree structure.
+ * Args:
+ * extree: Pointer to an uninitialized red-black tree object.
+ *
+ * static ex_node_t *
+ * ex_first(ex_t *extree);
+ * static ex_node_t *
+ * ex_last(ex_t *extree);
+ * Description: Get the first/last node in extree.
+ * Args:
+ * extree: Pointer to an initialized red-black tree object.
+ * Ret: First/last node in extree, or NULL if extree is empty.
+ *
+ * static ex_node_t *
+ * ex_next(ex_t *extree, ex_node_t *node);
+ * static ex_node_t *
+ * ex_prev(ex_t *extree, ex_node_t *node);
+ * Description: Get node's successor/predecessor.
+ * Args:
+ * extree: Pointer to an initialized red-black tree object.
+ * node : A node in extree.
+ * Ret: node's successor/predecessor in extree, or NULL if node is
+ * last/first.
+ *
+ * static ex_node_t *
+ * ex_search(ex_t *extree, ex_node_t *key);
+ * Description: Search for node that matches key.
+ * Args:
+ * extree: Pointer to an initialized red-black tree object.
+ * key : Search key.
+ * Ret: Node in extree that matches key, or NULL if no match.
+ *
+ * static ex_node_t *
+ * ex_nsearch(ex_t *extree, ex_node_t *key);
+ * static ex_node_t *
+ * ex_psearch(ex_t *extree, ex_node_t *key);
+ * Description: Search for node that matches key. If no match is found,
+ * return what would be key's successor/predecessor, were
+ * key in extree.
+ * Args:
+ * extree: Pointer to an initialized red-black tree object.
+ * key : Search key.
+ * Ret: Node in extree that matches key, or if no match, hypothetical
+ * node's successor/predecessor (NULL if no successor/predecessor).
+ *
+ * static void
+ * ex_insert(ex_t *extree, ex_node_t *node);
+ * Description: Insert node into extree.
+ * Args:
+ * extree: Pointer to an initialized red-black tree object.
+ * node : Node to be inserted into extree.
+ *
+ * static void
+ * ex_remove(ex_t *extree, ex_node_t *node);
+ * Description: Remove node from extree.
+ * Args:
+ * extree: Pointer to an initialized red-black tree object.
+ * node : Node in extree to be removed.
+ *
+ * static ex_node_t *
+ * ex_iter(ex_t *extree, ex_node_t *start, ex_node_t *(*cb)(ex_t *,
+ * ex_node_t *, void *), void *arg);
+ * static ex_node_t *
+ * ex_reverse_iter(ex_t *extree, ex_node_t *start, ex_node *(*cb)(ex_t *,
+ * ex_node_t *, void *), void *arg);
+ * Description: Iterate forward/backward over extree, starting at node.
+ * If extree is modified, iteration must be immediately
+ * terminated by the callback function that causes the
+ * modification.
+ * Args:
+ * extree: Pointer to an initialized red-black tree object.
+ * start : Node at which to start iteration, or NULL to start at
+ * first/last node.
+ * cb : Callback function, which is called for each node during
+ * iteration. Under normal circumstances the callback function
+ * should return NULL, which causes iteration to continue. If a
+ * callback function returns non-NULL, iteration is immediately
+ * terminated and the non-NULL return value is returned by the
+ * iterator. This is useful for re-starting iteration after
+ * modifying extree.
+ * arg : Opaque pointer passed to cb().
+ * Ret: NULL if iteration completed, or the non-NULL callback return value
+ * that caused termination of the iteration.
+ */
+#define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \
+a_attr void \
+a_prefix##new(a_rbt_type *rbtree) { \
+ rb_new(a_type, a_field, rbtree); \
+} \
+a_attr a_type * \
+a_prefix##first(a_rbt_type *rbtree) { \
+ a_type *ret; \
+ rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret); \
+ if (ret == &rbtree->rbt_nil) { \
+ ret = NULL; \
+ } \
+ return (ret); \
+} \
+a_attr a_type * \
+a_prefix##last(a_rbt_type *rbtree) { \
+ a_type *ret; \
+ rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret); \
+ if (ret == &rbtree->rbt_nil) { \
+ ret = NULL; \
+ } \
+ return (ret); \
+} \
+a_attr a_type * \
+a_prefix##next(a_rbt_type *rbtree, a_type *node) { \
+ a_type *ret; \
+ if (rbtn_right_get(a_type, a_field, node) != &rbtree->rbt_nil) { \
+ rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type, \
+ a_field, node), ret); \
+ } else { \
+ a_type *tnode = rbtree->rbt_root; \
+ assert(tnode != &rbtree->rbt_nil); \
+ ret = &rbtree->rbt_nil; \
+ while (true) { \
+ int cmp = (a_cmp)(node, tnode); \
+ if (cmp < 0) { \
+ ret = tnode; \
+ tnode = rbtn_left_get(a_type, a_field, tnode); \
+ } else if (cmp > 0) { \
+ tnode = rbtn_right_get(a_type, a_field, tnode); \
+ } else { \
+ break; \
+ } \
+ assert(tnode != &rbtree->rbt_nil); \
+ } \
+ } \
+ if (ret == &rbtree->rbt_nil) { \
+ ret = (NULL); \
+ } \
+ return (ret); \
+} \
+a_attr a_type * \
+a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \
+ a_type *ret; \
+ if (rbtn_left_get(a_type, a_field, node) != &rbtree->rbt_nil) { \
+ rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type, \
+ a_field, node), ret); \
+ } else { \
+ a_type *tnode = rbtree->rbt_root; \
+ assert(tnode != &rbtree->rbt_nil); \
+ ret = &rbtree->rbt_nil; \
+ while (true) { \
+ int cmp = (a_cmp)(node, tnode); \
+ if (cmp < 0) { \
+ tnode = rbtn_left_get(a_type, a_field, tnode); \
+ } else if (cmp > 0) { \
+ ret = tnode; \
+ tnode = rbtn_right_get(a_type, a_field, tnode); \
+ } else { \
+ break; \
+ } \
+ assert(tnode != &rbtree->rbt_nil); \
+ } \
+ } \
+ if (ret == &rbtree->rbt_nil) { \
+ ret = (NULL); \
+ } \
+ return (ret); \
+} \
+a_attr a_type * \
+a_prefix##search(a_rbt_type *rbtree, a_type *key) { \
+ a_type *ret; \
+ int cmp; \
+ ret = rbtree->rbt_root; \
+ while (ret != &rbtree->rbt_nil \
+ && (cmp = (a_cmp)(key, ret)) != 0) { \
+ if (cmp < 0) { \
+ ret = rbtn_left_get(a_type, a_field, ret); \
+ } else { \
+ ret = rbtn_right_get(a_type, a_field, ret); \
+ } \
+ } \
+ if (ret == &rbtree->rbt_nil) { \
+ ret = (NULL); \
+ } \
+ return (ret); \
+} \
+a_attr a_type * \
+a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) { \
+ a_type *ret; \
+ a_type *tnode = rbtree->rbt_root; \
+ ret = &rbtree->rbt_nil; \
+ while (tnode != &rbtree->rbt_nil) { \
+ int cmp = (a_cmp)(key, tnode); \
+ if (cmp < 0) { \
+ ret = tnode; \
+ tnode = rbtn_left_get(a_type, a_field, tnode); \
+ } else if (cmp > 0) { \
+ tnode = rbtn_right_get(a_type, a_field, tnode); \
+ } else { \
+ ret = tnode; \
+ break; \
+ } \
+ } \
+ if (ret == &rbtree->rbt_nil) { \
+ ret = (NULL); \
+ } \
+ return (ret); \
+} \
+a_attr a_type * \
+a_prefix##psearch(a_rbt_type *rbtree, a_type *key) { \
+ a_type *ret; \
+ a_type *tnode = rbtree->rbt_root; \
+ ret = &rbtree->rbt_nil; \
+ while (tnode != &rbtree->rbt_nil) { \
+ int cmp = (a_cmp)(key, tnode); \
+ if (cmp < 0) { \
+ tnode = rbtn_left_get(a_type, a_field, tnode); \
+ } else if (cmp > 0) { \
+ ret = tnode; \
+ tnode = rbtn_right_get(a_type, a_field, tnode); \
+ } else { \
+ ret = tnode; \
+ break; \
+ } \
+ } \
+ if (ret == &rbtree->rbt_nil) { \
+ ret = (NULL); \
+ } \
+ return (ret); \
+} \
+a_attr void \
+a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \
+ struct { \
+ a_type *node; \
+ int cmp; \
+ } path[sizeof(void *) << 4], *pathp; \
+ rbt_node_new(a_type, a_field, rbtree, node); \
+ /* Wind. */ \
+ path->node = rbtree->rbt_root; \
+ for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \
+ int cmp = pathp->cmp = a_cmp(node, pathp->node); \
+ assert(cmp != 0); \
+ if (cmp < 0) { \
+ pathp[1].node = rbtn_left_get(a_type, a_field, \
+ pathp->node); \
+ } else { \
+ pathp[1].node = rbtn_right_get(a_type, a_field, \
+ pathp->node); \
+ } \
+ } \
+ pathp->node = node; \
+ /* Unwind. */ \
+ for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \
+ a_type *cnode = pathp->node; \
+ if (pathp->cmp < 0) { \
+ a_type *left = pathp[1].node; \
+ rbtn_left_set(a_type, a_field, cnode, left); \
+ if (rbtn_red_get(a_type, a_field, left)) { \
+ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
+ if (rbtn_red_get(a_type, a_field, leftleft)) { \
+ /* Fix up 4-node. */ \
+ a_type *tnode; \
+ rbtn_black_set(a_type, a_field, leftleft); \
+ rbtn_rotate_right(a_type, a_field, cnode, tnode); \
+ cnode = tnode; \
+ } \
+ } else { \
+ return; \
+ } \
+ } else { \
+ a_type *right = pathp[1].node; \
+ rbtn_right_set(a_type, a_field, cnode, right); \
+ if (rbtn_red_get(a_type, a_field, right)) { \
+ a_type *left = rbtn_left_get(a_type, a_field, cnode); \
+ if (rbtn_red_get(a_type, a_field, left)) { \
+ /* Split 4-node. */ \
+ rbtn_black_set(a_type, a_field, left); \
+ rbtn_black_set(a_type, a_field, right); \
+ rbtn_red_set(a_type, a_field, cnode); \
+ } else { \
+ /* Lean left. */ \
+ a_type *tnode; \
+ bool tred = rbtn_red_get(a_type, a_field, cnode); \
+ rbtn_rotate_left(a_type, a_field, cnode, tnode); \
+ rbtn_color_set(a_type, a_field, tnode, tred); \
+ rbtn_red_set(a_type, a_field, cnode); \
+ cnode = tnode; \
+ } \
+ } else { \
+ return; \
+ } \
+ } \
+ pathp->node = cnode; \
+ } \
+ /* Set root, and make it black. */ \
+ rbtree->rbt_root = path->node; \
+ rbtn_black_set(a_type, a_field, rbtree->rbt_root); \
+} \
+a_attr void \
+a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
+ struct { \
+ a_type *node; \
+ int cmp; \
+ } *pathp, *nodep, path[sizeof(void *) << 4]; \
+ /* Wind. */ \
+ nodep = NULL; /* Silence compiler warning. */ \
+ path->node = rbtree->rbt_root; \
+ for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \
+ int cmp = pathp->cmp = a_cmp(node, pathp->node); \
+ if (cmp < 0) { \
+ pathp[1].node = rbtn_left_get(a_type, a_field, \
+ pathp->node); \
+ } else { \
+ pathp[1].node = rbtn_right_get(a_type, a_field, \
+ pathp->node); \
+ if (cmp == 0) { \
+ /* Find node's successor, in preparation for swap. */ \
+ pathp->cmp = 1; \
+ nodep = pathp; \
+ for (pathp++; pathp->node != &rbtree->rbt_nil; \
+ pathp++) { \
+ pathp->cmp = -1; \
+ pathp[1].node = rbtn_left_get(a_type, a_field, \
+ pathp->node); \
+ } \
+ break; \
+ } \
+ } \
+ } \
+ assert(nodep->node == node); \
+ pathp--; \
+ if (pathp->node != node) { \
+ /* Swap node with its successor. */ \
+ bool tred = rbtn_red_get(a_type, a_field, pathp->node); \
+ rbtn_color_set(a_type, a_field, pathp->node, \
+ rbtn_red_get(a_type, a_field, node)); \
+ rbtn_left_set(a_type, a_field, pathp->node, \
+ rbtn_left_get(a_type, a_field, node)); \
+ /* If node's successor is its right child, the following code */\
+ /* will do the wrong thing for the right child pointer. */\
+ /* However, it doesn't matter, because the pointer will be */\
+ /* properly set when the successor is pruned. */\
+ rbtn_right_set(a_type, a_field, pathp->node, \
+ rbtn_right_get(a_type, a_field, node)); \
+ rbtn_color_set(a_type, a_field, node, tred); \
+ /* The pruned leaf node's child pointers are never accessed */\
+ /* again, so don't bother setting them to nil. */\
+ nodep->node = pathp->node; \
+ pathp->node = node; \
+ if (nodep == path) { \
+ rbtree->rbt_root = nodep->node; \
+ } else { \
+ if (nodep[-1].cmp < 0) { \
+ rbtn_left_set(a_type, a_field, nodep[-1].node, \
+ nodep->node); \
+ } else { \
+ rbtn_right_set(a_type, a_field, nodep[-1].node, \
+ nodep->node); \
+ } \
+ } \
+ } else { \
+ a_type *left = rbtn_left_get(a_type, a_field, node); \
+ if (left != &rbtree->rbt_nil) { \
+ /* node has no successor, but it has a left child. */\
+ /* Splice node out, without losing the left child. */\
+ assert(rbtn_red_get(a_type, a_field, node) == false); \
+ assert(rbtn_red_get(a_type, a_field, left)); \
+ rbtn_black_set(a_type, a_field, left); \
+ if (pathp == path) { \
+ rbtree->rbt_root = left; \
+ } else { \
+ if (pathp[-1].cmp < 0) { \
+ rbtn_left_set(a_type, a_field, pathp[-1].node, \
+ left); \
+ } else { \
+ rbtn_right_set(a_type, a_field, pathp[-1].node, \
+ left); \
+ } \
+ } \
+ return; \
+ } else if (pathp == path) { \
+ /* The tree only contained one node. */ \
+ rbtree->rbt_root = &rbtree->rbt_nil; \
+ return; \
+ } \
+ } \
+ if (rbtn_red_get(a_type, a_field, pathp->node)) { \
+ /* Prune red node, which requires no fixup. */ \
+ assert(pathp[-1].cmp < 0); \
+ rbtn_left_set(a_type, a_field, pathp[-1].node, \
+ &rbtree->rbt_nil); \
+ return; \
+ } \
+ /* The node to be pruned is black, so unwind until balance is */\
+ /* restored. */\
+ pathp->node = &rbtree->rbt_nil; \
+ for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \
+ assert(pathp->cmp != 0); \
+ if (pathp->cmp < 0) { \
+ rbtn_left_set(a_type, a_field, pathp->node, \
+ pathp[1].node); \
+ assert(rbtn_red_get(a_type, a_field, pathp[1].node) \
+ == false); \
+ if (rbtn_red_get(a_type, a_field, pathp->node)) { \
+ a_type *right = rbtn_right_get(a_type, a_field, \
+ pathp->node); \
+ a_type *rightleft = rbtn_left_get(a_type, a_field, \
+ right); \
+ a_type *tnode; \
+ if (rbtn_red_get(a_type, a_field, rightleft)) { \
+ /* In the following diagrams, ||, //, and \\ */\
+ /* indicate the path to the removed node. */\
+ /* */\
+ /* || */\
+ /* pathp(r) */\
+ /* // \ */\
+ /* (b) (b) */\
+ /* / */\
+ /* (r) */\
+ /* */\
+ rbtn_black_set(a_type, a_field, pathp->node); \
+ rbtn_rotate_right(a_type, a_field, right, tnode); \
+ rbtn_right_set(a_type, a_field, pathp->node, tnode);\
+ rbtn_rotate_left(a_type, a_field, pathp->node, \
+ tnode); \
+ } else { \
+ /* || */\
+ /* pathp(r) */\
+ /* // \ */\
+ /* (b) (b) */\
+ /* / */\
+ /* (b) */\
+ /* */\
+ rbtn_rotate_left(a_type, a_field, pathp->node, \
+ tnode); \
+ } \
+ /* Balance restored, but rotation modified subtree */\
+ /* root. */\
+ assert((uintptr_t)pathp > (uintptr_t)path); \
+ if (pathp[-1].cmp < 0) { \
+ rbtn_left_set(a_type, a_field, pathp[-1].node, \
+ tnode); \
+ } else { \
+ rbtn_right_set(a_type, a_field, pathp[-1].node, \
+ tnode); \
+ } \
+ return; \
+ } else { \
+ a_type *right = rbtn_right_get(a_type, a_field, \
+ pathp->node); \
+ a_type *rightleft = rbtn_left_get(a_type, a_field, \
+ right); \
+ if (rbtn_red_get(a_type, a_field, rightleft)) { \
+ /* || */\
+ /* pathp(b) */\
+ /* // \ */\
+ /* (b) (b) */\
+ /* / */\
+ /* (r) */\
+ a_type *tnode; \
+ rbtn_black_set(a_type, a_field, rightleft); \
+ rbtn_rotate_right(a_type, a_field, right, tnode); \
+ rbtn_right_set(a_type, a_field, pathp->node, tnode);\
+ rbtn_rotate_left(a_type, a_field, pathp->node, \
+ tnode); \
+ /* Balance restored, but rotation modified */\
+ /* subree root, which may actually be the tree */\
+ /* root. */\
+ if (pathp == path) { \
+ /* Set root. */ \
+ rbtree->rbt_root = tnode; \
+ } else { \
+ if (pathp[-1].cmp < 0) { \
+ rbtn_left_set(a_type, a_field, \
+ pathp[-1].node, tnode); \
+ } else { \
+ rbtn_right_set(a_type, a_field, \
+ pathp[-1].node, tnode); \
+ } \
+ } \
+ return; \
+ } else { \
+ /* || */\
+ /* pathp(b) */\
+ /* // \ */\
+ /* (b) (b) */\
+ /* / */\
+ /* (b) */\
+ a_type *tnode; \
+ rbtn_red_set(a_type, a_field, pathp->node); \
+ rbtn_rotate_left(a_type, a_field, pathp->node, \
+ tnode); \
+ pathp->node = tnode; \
+ } \
+ } \
+ } else { \
+ a_type *left; \
+ rbtn_right_set(a_type, a_field, pathp->node, \
+ pathp[1].node); \
+ left = rbtn_left_get(a_type, a_field, pathp->node); \
+ if (rbtn_red_get(a_type, a_field, left)) { \
+ a_type *tnode; \
+ a_type *leftright = rbtn_right_get(a_type, a_field, \
+ left); \
+ a_type *leftrightleft = rbtn_left_get(a_type, a_field, \
+ leftright); \
+ if (rbtn_red_get(a_type, a_field, leftrightleft)) { \
+ /* || */\
+ /* pathp(b) */\
+ /* / \\ */\
+ /* (r) (b) */\
+ /* \ */\
+ /* (b) */\
+ /* / */\
+ /* (r) */\
+ a_type *unode; \
+ rbtn_black_set(a_type, a_field, leftrightleft); \
+ rbtn_rotate_right(a_type, a_field, pathp->node, \
+ unode); \
+ rbtn_rotate_right(a_type, a_field, pathp->node, \
+ tnode); \
+ rbtn_right_set(a_type, a_field, unode, tnode); \
+ rbtn_rotate_left(a_type, a_field, unode, tnode); \
+ } else { \
+ /* || */\
+ /* pathp(b) */\
+ /* / \\ */\
+ /* (r) (b) */\
+ /* \ */\
+ /* (b) */\
+ /* / */\
+ /* (b) */\
+ assert(leftright != &rbtree->rbt_nil); \
+ rbtn_red_set(a_type, a_field, leftright); \
+ rbtn_rotate_right(a_type, a_field, pathp->node, \
+ tnode); \
+ rbtn_black_set(a_type, a_field, tnode); \
+ } \
+ /* Balance restored, but rotation modified subtree */\
+ /* root, which may actually be the tree root. */\
+ if (pathp == path) { \
+ /* Set root. */ \
+ rbtree->rbt_root = tnode; \
+ } else { \
+ if (pathp[-1].cmp < 0) { \
+ rbtn_left_set(a_type, a_field, pathp[-1].node, \
+ tnode); \
+ } else { \
+ rbtn_right_set(a_type, a_field, pathp[-1].node, \
+ tnode); \
+ } \
+ } \
+ return; \
+ } else if (rbtn_red_get(a_type, a_field, pathp->node)) { \
+ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
+ if (rbtn_red_get(a_type, a_field, leftleft)) { \
+ /* || */\
+ /* pathp(r) */\
+ /* / \\ */\
+ /* (b) (b) */\
+ /* / */\
+ /* (r) */\
+ a_type *tnode; \
+ rbtn_black_set(a_type, a_field, pathp->node); \
+ rbtn_red_set(a_type, a_field, left); \
+ rbtn_black_set(a_type, a_field, leftleft); \
+ rbtn_rotate_right(a_type, a_field, pathp->node, \
+ tnode); \
+ /* Balance restored, but rotation modified */\
+ /* subtree root. */\
+ assert((uintptr_t)pathp > (uintptr_t)path); \
+ if (pathp[-1].cmp < 0) { \
+ rbtn_left_set(a_type, a_field, pathp[-1].node, \
+ tnode); \
+ } else { \
+ rbtn_right_set(a_type, a_field, pathp[-1].node, \
+ tnode); \
+ } \
+ return; \
+ } else { \
+ /* || */\
+ /* pathp(r) */\
+ /* / \\ */\
+ /* (b) (b) */\
+ /* / */\
+ /* (b) */\
+ rbtn_red_set(a_type, a_field, left); \
+ rbtn_black_set(a_type, a_field, pathp->node); \
+ /* Balance restored. */ \
+ return; \
+ } \
+ } else { \
+ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
+ if (rbtn_red_get(a_type, a_field, leftleft)) { \
+ /* || */\
+ /* pathp(b) */\
+ /* / \\ */\
+ /* (b) (b) */\
+ /* / */\
+ /* (r) */\
+ a_type *tnode; \
+ rbtn_black_set(a_type, a_field, leftleft); \
+ rbtn_rotate_right(a_type, a_field, pathp->node, \
+ tnode); \
+ /* Balance restored, but rotation modified */\
+ /* subtree root, which may actually be the tree */\
+ /* root. */\
+ if (pathp == path) { \
+ /* Set root. */ \
+ rbtree->rbt_root = tnode; \
+ } else { \
+ if (pathp[-1].cmp < 0) { \
+ rbtn_left_set(a_type, a_field, \
+ pathp[-1].node, tnode); \
+ } else { \
+ rbtn_right_set(a_type, a_field, \
+ pathp[-1].node, tnode); \
+ } \
+ } \
+ return; \
+ } else { \
+ /* || */\
+ /* pathp(b) */\
+ /* / \\ */\
+ /* (b) (b) */\
+ /* / */\
+ /* (b) */\
+ rbtn_red_set(a_type, a_field, left); \
+ } \
+ } \
+ } \
+ } \
+ /* Set root. */ \
+ rbtree->rbt_root = path->node; \
+ assert(rbtn_red_get(a_type, a_field, rbtree->rbt_root) == false); \
+} \
+a_attr a_type * \
+a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \
+ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \
+ if (node == &rbtree->rbt_nil) { \
+ return (&rbtree->rbt_nil); \
+ } else { \
+ a_type *ret; \
+ if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type, \
+ a_field, node), cb, arg)) != &rbtree->rbt_nil \
+ || (ret = cb(rbtree, node, arg)) != NULL) { \
+ return (ret); \
+ } \
+ return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \
+ a_field, node), cb, arg)); \
+ } \
+} \
+a_attr a_type * \
+a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node, \
+ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \
+ int cmp = a_cmp(start, node); \
+ if (cmp < 0) { \
+ a_type *ret; \
+ if ((ret = a_prefix##iter_start(rbtree, start, \
+ rbtn_left_get(a_type, a_field, node), cb, arg)) != \
+ &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \
+ return (ret); \
+ } \
+ return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \
+ a_field, node), cb, arg)); \
+ } else if (cmp > 0) { \
+ return (a_prefix##iter_start(rbtree, start, \
+ rbtn_right_get(a_type, a_field, node), cb, arg)); \
+ } else { \
+ a_type *ret; \
+ if ((ret = cb(rbtree, node, arg)) != NULL) { \
+ return (ret); \
+ } \
+ return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \
+ a_field, node), cb, arg)); \
+ } \
+} \
+a_attr a_type * \
+a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \
+ a_rbt_type *, a_type *, void *), void *arg) { \
+ a_type *ret; \
+ if (start != NULL) { \
+ ret = a_prefix##iter_start(rbtree, start, rbtree->rbt_root, \
+ cb, arg); \
+ } else { \
+ ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\
+ } \
+ if (ret == &rbtree->rbt_nil) { \
+ ret = NULL; \
+ } \
+ return (ret); \
+} \
+a_attr a_type * \
+a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node, \
+ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \
+ if (node == &rbtree->rbt_nil) { \
+ return (&rbtree->rbt_nil); \
+ } else { \
+ a_type *ret; \
+ if ((ret = a_prefix##reverse_iter_recurse(rbtree, \
+ rbtn_right_get(a_type, a_field, node), cb, arg)) != \
+ &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \
+ return (ret); \
+ } \
+ return (a_prefix##reverse_iter_recurse(rbtree, \
+ rbtn_left_get(a_type, a_field, node), cb, arg)); \
+ } \
+} \
+a_attr a_type * \
+a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start, \
+ a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *), \
+ void *arg) { \
+ int cmp = a_cmp(start, node); \
+ if (cmp > 0) { \
+ a_type *ret; \
+ if ((ret = a_prefix##reverse_iter_start(rbtree, start, \
+ rbtn_right_get(a_type, a_field, node), cb, arg)) != \
+ &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \
+ return (ret); \
+ } \
+ return (a_prefix##reverse_iter_recurse(rbtree, \
+ rbtn_left_get(a_type, a_field, node), cb, arg)); \
+ } else if (cmp < 0) { \
+ return (a_prefix##reverse_iter_start(rbtree, start, \
+ rbtn_left_get(a_type, a_field, node), cb, arg)); \
+ } else { \
+ a_type *ret; \
+ if ((ret = cb(rbtree, node, arg)) != NULL) { \
+ return (ret); \
+ } \
+ return (a_prefix##reverse_iter_recurse(rbtree, \
+ rbtn_left_get(a_type, a_field, node), cb, arg)); \
+ } \
+} \
+a_attr a_type * \
+a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \
+ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \
+ a_type *ret; \
+ if (start != NULL) { \
+ ret = a_prefix##reverse_iter_start(rbtree, start, \
+ rbtree->rbt_root, cb, arg); \
+ } else { \
+ ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root, \
+ cb, arg); \
+ } \
+ if (ret == &rbtree->rbt_nil) { \
+ ret = NULL; \
+ } \
+ return (ret); \
+}
+
+#endif /* RB_H_ */
diff --git a/dep/include/jemalloc/internal/stats.h b/dep/include/jemalloc/internal/stats.h
new file mode 100644
index 00000000000..cbf035ff2b9
--- /dev/null
+++ b/dep/include/jemalloc/internal/stats.h
@@ -0,0 +1,174 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#define UMAX2S_BUFSIZE 65
+
+#ifdef JEMALLOC_STATS
+typedef struct tcache_bin_stats_s tcache_bin_stats_t;
+typedef struct malloc_bin_stats_s malloc_bin_stats_t;
+typedef struct malloc_large_stats_s malloc_large_stats_t;
+typedef struct arena_stats_s arena_stats_t;
+#endif
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+typedef struct chunk_stats_s chunk_stats_t;
+#endif
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#ifdef JEMALLOC_STATS
+
+#ifdef JEMALLOC_TCACHE
+struct tcache_bin_stats_s {
+ /*
+ * Number of allocation requests that corresponded to the size of this
+ * bin.
+ */
+ uint64_t nrequests;
+};
+#endif
+
+struct malloc_bin_stats_s {
+ /*
+ * Current number of bytes allocated, including objects currently
+ * cached by tcache.
+ */
+ size_t allocated;
+
+ /*
+ * Total number of allocation/deallocation requests served directly by
+ * the bin. Note that tcache may allocate an object, then recycle it
+ * many times, resulting many increments to nrequests, but only one
+ * each to nmalloc and ndalloc.
+ */
+ uint64_t nmalloc;
+ uint64_t ndalloc;
+
+ /*
+ * Number of allocation requests that correspond to the size of this
+ * bin. This includes requests served by tcache, though tcache only
+ * periodically merges into this counter.
+ */
+ uint64_t nrequests;
+
+#ifdef JEMALLOC_TCACHE
+ /* Number of tcache fills from this bin. */
+ uint64_t nfills;
+
+ /* Number of tcache flushes to this bin. */
+ uint64_t nflushes;
+#endif
+
+ /* Total number of runs created for this bin's size class. */
+ uint64_t nruns;
+
+ /*
+ * Total number of runs reused by extracting them from the runs tree for
+ * this bin's size class.
+ */
+ uint64_t reruns;
+
+ /* High-water mark for this bin. */
+ size_t highruns;
+
+ /* Current number of runs in this bin. */
+ size_t curruns;
+};
+
+struct malloc_large_stats_s {
+ /*
+ * Total number of allocation/deallocation requests served directly by
+ * the arena. Note that tcache may allocate an object, then recycle it
+ * many times, resulting many increments to nrequests, but only one
+ * each to nmalloc and ndalloc.
+ */
+ uint64_t nmalloc;
+ uint64_t ndalloc;
+
+ /*
+ * Number of allocation requests that correspond to this size class.
+ * This includes requests served by tcache, though tcache only
+ * periodically merges into this counter.
+ */
+ uint64_t nrequests;
+
+ /* High-water mark for this size class. */
+ size_t highruns;
+
+ /* Current number of runs of this size class. */
+ size_t curruns;
+};
+
+struct arena_stats_s {
+ /* Number of bytes currently mapped. */
+ size_t mapped;
+
+ /*
+ * Total number of purge sweeps, total number of madvise calls made,
+ * and total pages purged in order to keep dirty unused memory under
+ * control.
+ */
+ uint64_t npurge;
+ uint64_t nmadvise;
+ uint64_t purged;
+
+ /* Per-size-category statistics. */
+ size_t allocated_large;
+ uint64_t nmalloc_large;
+ uint64_t ndalloc_large;
+ uint64_t nrequests_large;
+
+ /*
+ * One element for each possible size class, including sizes that
+ * overlap with bin size classes. This is necessary because ipalloc()
+ * sometimes has to use such large objects in order to assure proper
+ * alignment.
+ */
+ malloc_large_stats_t *lstats;
+};
+#endif /* JEMALLOC_STATS */
+
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+struct chunk_stats_s {
+# ifdef JEMALLOC_STATS
+ /* Number of chunks that were allocated. */
+ uint64_t nchunks;
+# endif
+
+ /* High-water mark for number of chunks allocated. */
+ size_t highchunks;
+
+ /*
+ * Current number of chunks allocated. This value isn't maintained for
+ * any other purpose, so keep track of it in order to be able to set
+ * highchunks.
+ */
+ size_t curchunks;
+};
+#endif /* JEMALLOC_STATS */
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern bool opt_stats_print;
+
+char *umax2s(uintmax_t x, unsigned base, char *s);
+#ifdef JEMALLOC_STATS
+void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
+ const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4));
+void malloc_printf(const char *format, ...)
+ JEMALLOC_ATTR(format(printf, 1, 2));
+#endif
+void stats_print(void (*write)(void *, const char *), void *cbopaque,
+ const char *opts);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_STATS
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+#endif /* JEMALLOC_STATS */
+/******************************************************************************/
diff --git a/dep/include/jemalloc/internal/tcache.h b/dep/include/jemalloc/internal/tcache.h
new file mode 100644
index 00000000000..c76597fafab
--- /dev/null
+++ b/dep/include/jemalloc/internal/tcache.h
@@ -0,0 +1,380 @@
+#ifdef JEMALLOC_TCACHE
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct tcache_bin_s tcache_bin_t;
+typedef struct tcache_s tcache_t;
+
+/*
+ * Absolute maximum number of cache slots for each small bin in the thread
+ * cache. This is an additional constraint beyond that imposed as: twice the
+ * number of regions per run for this size class.
+ *
+ * This constant must be an even number.
+ */
+#define TCACHE_NSLOTS_SMALL_MAX 200
+
+/* Number of cache slots for large size classes. */
+#define TCACHE_NSLOTS_LARGE 20
+
+/* (1U << opt_lg_tcache_maxclass) is used to compute tcache_maxclass. */
+#define LG_TCACHE_MAXCLASS_DEFAULT 15
+
+/*
+ * (1U << opt_lg_tcache_gc_sweep) is the approximate number of allocation
+ * events between full GC sweeps (-1: disabled). Integer rounding may cause
+ * the actual number to be slightly higher, since GC is performed
+ * incrementally.
+ */
+#define LG_TCACHE_GC_SWEEP_DEFAULT 13
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct tcache_bin_s {
+# ifdef JEMALLOC_STATS
+ tcache_bin_stats_t tstats;
+# endif
+ unsigned low_water; /* Min # cached since last GC. */
+ unsigned high_water; /* Max # cached since last GC. */
+ unsigned ncached; /* # of cached objects. */
+ unsigned ncached_max; /* Upper limit on ncached. */
+ void *avail; /* Chain of available objects. */
+};
+
+struct tcache_s {
+# ifdef JEMALLOC_STATS
+ ql_elm(tcache_t) link; /* Used for aggregating stats. */
+# endif
+# ifdef JEMALLOC_PROF
+ uint64_t prof_accumbytes;/* Cleared after arena_prof_accum() */
+# endif
+ arena_t *arena; /* This thread's arena. */
+ unsigned ev_cnt; /* Event count since incremental GC. */
+ unsigned next_gc_bin; /* Next bin to GC. */
+ tcache_bin_t tbins[1]; /* Dynamically sized. */
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern bool opt_tcache;
+extern ssize_t opt_lg_tcache_maxclass;
+extern ssize_t opt_lg_tcache_gc_sweep;
+
+/* Map of thread-specific caches. */
+extern __thread tcache_t *tcache_tls
+ JEMALLOC_ATTR(tls_model("initial-exec"));
+
+/*
+ * Number of tcache bins. There are nbins small-object bins, plus 0 or more
+ * large-object bins.
+ */
+extern size_t nhbins;
+
+/* Maximum cached size class. */
+extern size_t tcache_maxclass;
+
+/* Number of tcache allocation/deallocation events between incremental GCs. */
+extern unsigned tcache_gc_incr;
+
+void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+ , tcache_t *tcache
+#endif
+ );
+void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+ , tcache_t *tcache
+#endif
+ );
+tcache_t *tcache_create(arena_t *arena);
+void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin,
+ size_t binind);
+void tcache_destroy(tcache_t *tcache);
+#ifdef JEMALLOC_STATS
+void tcache_stats_merge(tcache_t *tcache, arena_t *arena);
+#endif
+void tcache_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void tcache_event(tcache_t *tcache);
+tcache_t *tcache_get(void);
+void *tcache_alloc_easy(tcache_bin_t *tbin);
+void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero);
+void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero);
+void tcache_dalloc_small(tcache_t *tcache, void *ptr);
+void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_))
+JEMALLOC_INLINE tcache_t *
+tcache_get(void)
+{
+ tcache_t *tcache;
+
+ if ((isthreaded & opt_tcache) == false)
+ return (NULL);
+
+ tcache = tcache_tls;
+ if ((uintptr_t)tcache <= (uintptr_t)1) {
+ if (tcache == NULL) {
+ tcache = tcache_create(choose_arena());
+ if (tcache == NULL)
+ return (NULL);
+ } else
+ return (NULL);
+ }
+
+ return (tcache);
+}
+
+JEMALLOC_INLINE void
+tcache_event(tcache_t *tcache)
+{
+
+ if (tcache_gc_incr == 0)
+ return;
+
+ tcache->ev_cnt++;
+ assert(tcache->ev_cnt <= tcache_gc_incr);
+ if (tcache->ev_cnt == tcache_gc_incr) {
+ size_t binind = tcache->next_gc_bin;
+ tcache_bin_t *tbin = &tcache->tbins[binind];
+
+ if (tbin->low_water > 0) {
+ /*
+ * Flush (ceiling) 3/4 of the objects below the low
+ * water mark.
+ */
+ if (binind < nbins) {
+ tcache_bin_flush_small(tbin, binind,
+ tbin->ncached - tbin->low_water +
+ (tbin->low_water >> 2)
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+ , tcache
+#endif
+ );
+ } else {
+ tcache_bin_flush_large(tbin, binind,
+ tbin->ncached - tbin->low_water +
+ (tbin->low_water >> 2)
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+ , tcache
+#endif
+ );
+ }
+ }
+ tbin->low_water = tbin->ncached;
+ tbin->high_water = tbin->ncached;
+
+ tcache->next_gc_bin++;
+ if (tcache->next_gc_bin == nhbins)
+ tcache->next_gc_bin = 0;
+ tcache->ev_cnt = 0;
+ }
+}
+
+JEMALLOC_INLINE void *
+tcache_alloc_easy(tcache_bin_t *tbin)
+{
+ void *ret;
+
+ if (tbin->ncached == 0)
+ return (NULL);
+ tbin->ncached--;
+ if (tbin->ncached < tbin->low_water)
+ tbin->low_water = tbin->ncached;
+ ret = tbin->avail;
+ tbin->avail = *(void **)ret;
+ return (ret);
+}
+
+JEMALLOC_INLINE void *
+tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
+{
+ void *ret;
+ size_t binind;
+ tcache_bin_t *tbin;
+
+ binind = small_size2bin[size];
+ assert(binind < nbins);
+ tbin = &tcache->tbins[binind];
+ ret = tcache_alloc_easy(tbin);
+ if (ret == NULL) {
+ ret = tcache_alloc_small_hard(tcache, tbin, binind);
+ if (ret == NULL)
+ return (NULL);
+ }
+ assert(arena_salloc(ret) == tcache->arena->bins[binind].reg_size);
+
+ if (zero == false) {
+#ifdef JEMALLOC_FILL
+ if (opt_junk)
+ memset(ret, 0xa5, size);
+ else if (opt_zero)
+ memset(ret, 0, size);
+#endif
+ } else
+ memset(ret, 0, size);
+
+#ifdef JEMALLOC_STATS
+ tbin->tstats.nrequests++;
+#endif
+#ifdef JEMALLOC_PROF
+ tcache->prof_accumbytes += tcache->arena->bins[binind].reg_size;
+#endif
+ tcache_event(tcache);
+ return (ret);
+}
+
+JEMALLOC_INLINE void *
+tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
+{
+ void *ret;
+ size_t binind;
+ tcache_bin_t *tbin;
+
+ size = PAGE_CEILING(size);
+ assert(size <= tcache_maxclass);
+ binind = nbins + (size >> PAGE_SHIFT) - 1;
+ assert(binind < nhbins);
+ tbin = &tcache->tbins[binind];
+ ret = tcache_alloc_easy(tbin);
+ if (ret == NULL) {
+ /*
+ * Only allocate one large object at a time, because it's quite
+ * expensive to create one and not use it.
+ */
+ ret = arena_malloc_large(tcache->arena, size, zero);
+ if (ret == NULL)
+ return (NULL);
+ } else {
+#ifdef JEMALLOC_PROF
+ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
+ size_t pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk)
+ >> PAGE_SHIFT);
+ chunk->map[pageind].bits |= CHUNK_MAP_CLASS_MASK;
+#endif
+ if (zero == false) {
+#ifdef JEMALLOC_FILL
+ if (opt_junk)
+ memset(ret, 0xa5, size);
+ else if (opt_zero)
+ memset(ret, 0, size);
+#endif
+ } else
+ memset(ret, 0, size);
+
+#ifdef JEMALLOC_STATS
+ tbin->tstats.nrequests++;
+#endif
+#ifdef JEMALLOC_PROF
+ tcache->prof_accumbytes += size;
+#endif
+ }
+
+ tcache_event(tcache);
+ return (ret);
+}
+
+JEMALLOC_INLINE void
+tcache_dalloc_small(tcache_t *tcache, void *ptr)
+{
+ arena_t *arena;
+ arena_chunk_t *chunk;
+ arena_run_t *run;
+ arena_bin_t *bin;
+ tcache_bin_t *tbin;
+ size_t pageind, binind;
+ arena_chunk_map_t *mapelm;
+
+ assert(arena_salloc(ptr) <= small_maxclass);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ arena = chunk->arena;
+ pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
+ mapelm = &chunk->map[pageind];
+ run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
+ (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT));
+ assert(run->magic == ARENA_RUN_MAGIC);
+ bin = run->bin;
+ binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) /
+ sizeof(arena_bin_t);
+ assert(binind < nbins);
+
+#ifdef JEMALLOC_FILL
+ if (opt_junk)
+ memset(ptr, 0x5a, bin->reg_size);
+#endif
+
+ tbin = &tcache->tbins[binind];
+ if (tbin->ncached == tbin->ncached_max) {
+ tcache_bin_flush_small(tbin, binind, (tbin->ncached_max >> 1)
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+ , tcache
+#endif
+ );
+ }
+ assert(tbin->ncached < tbin->ncached_max);
+ *(void **)ptr = tbin->avail;
+ tbin->avail = ptr;
+ tbin->ncached++;
+ if (tbin->ncached > tbin->high_water)
+ tbin->high_water = tbin->ncached;
+
+ tcache_event(tcache);
+}
+
+JEMALLOC_INLINE void
+tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
+{
+ arena_t *arena;
+ arena_chunk_t *chunk;
+ size_t pageind, binind;
+ tcache_bin_t *tbin;
+ arena_chunk_map_t *mapelm;
+
+ assert((size & PAGE_MASK) == 0);
+ assert(arena_salloc(ptr) > small_maxclass);
+ assert(arena_salloc(ptr) <= tcache_maxclass);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ arena = chunk->arena;
+ pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
+ mapelm = &chunk->map[pageind];
+ binind = nbins + (size >> PAGE_SHIFT) - 1;
+
+#ifdef JEMALLOC_FILL
+ if (opt_junk)
+ memset(ptr, 0x5a, bin->reg_size);
+#endif
+
+ tbin = &tcache->tbins[binind];
+ if (tbin->ncached == tbin->ncached_max) {
+ tcache_bin_flush_large(tbin, binind, (tbin->ncached_max >> 1)
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+ , tcache
+#endif
+ );
+ }
+ assert(tbin->ncached < tbin->ncached_max);
+ *(void **)ptr = tbin->avail;
+ tbin->avail = ptr;
+ tbin->ncached++;
+ if (tbin->ncached > tbin->high_water)
+ tbin->high_water = tbin->ncached;
+
+ tcache_event(tcache);
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+#endif /* JEMALLOC_TCACHE */
diff --git a/dep/include/jemalloc/jemalloc.h b/dep/include/jemalloc/jemalloc.h
new file mode 100644
index 00000000000..d9bafbfff55
--- /dev/null
+++ b/dep/include/jemalloc/jemalloc.h
@@ -0,0 +1,42 @@
+#ifndef JEMALLOC_H_
+#define JEMALLOC_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define JEMALLOC_VERSION "1.0.0-0-g5523399"
+#define JEMALLOC_VERSION_MAJOR 1
+#define JEMALLOC_VERSION_MINOR 0
+#define JEMALLOC_VERSION_BUGFIX 0
+#define JEMALLOC_VERSION_NREV 0
+#define JEMALLOC_VERSION_GID "5523399"
+
+#include "jemalloc_defs.h"
+#ifndef JEMALLOC_P
+# define JEMALLOC_P(s) s
+#endif
+
+extern const char *JEMALLOC_P(malloc_options);
+extern void (*JEMALLOC_P(malloc_message))(void *, const char *);
+
+void *JEMALLOC_P(malloc)(size_t size) JEMALLOC_ATTR(malloc);
+void *JEMALLOC_P(calloc)(size_t num, size_t size) JEMALLOC_ATTR(malloc);
+int JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
+ JEMALLOC_ATTR(nonnull(1));
+void *JEMALLOC_P(realloc)(void *ptr, size_t size);
+void JEMALLOC_P(free)(void *ptr);
+
+size_t JEMALLOC_P(malloc_usable_size)(const void *ptr);
+void JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *),
+ void *cbopaque, const char *opts);
+int JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen);
+int JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp,
+ size_t *miblenp);
+int JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen);
+
+#ifdef __cplusplus
+};
+#endif
+#endif /* JEMALLOC_H_ */
diff --git a/dep/include/jemalloc/jemalloc.h.in b/dep/include/jemalloc/jemalloc.h.in
new file mode 100644
index 00000000000..8ef8183686e
--- /dev/null
+++ b/dep/include/jemalloc/jemalloc.h.in
@@ -0,0 +1,42 @@
+#ifndef JEMALLOC_H_
+#define JEMALLOC_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define JEMALLOC_VERSION "@jemalloc_version@"
+#define JEMALLOC_VERSION_MAJOR @jemalloc_version_major@
+#define JEMALLOC_VERSION_MINOR @jemalloc_version_minor@
+#define JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@
+#define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@
+#define JEMALLOC_VERSION_GID "@jemalloc_version_gid@"
+
+#include "jemalloc_defs@install_suffix@.h"
+#ifndef JEMALLOC_P
+# define JEMALLOC_P(s) s
+#endif
+
+extern const char *JEMALLOC_P(malloc_options);
+extern void (*JEMALLOC_P(malloc_message))(void *, const char *);
+
+void *JEMALLOC_P(malloc)(size_t size) JEMALLOC_ATTR(malloc);
+void *JEMALLOC_P(calloc)(size_t num, size_t size) JEMALLOC_ATTR(malloc);
+int JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
+ JEMALLOC_ATTR(nonnull(1));
+void *JEMALLOC_P(realloc)(void *ptr, size_t size);
+void JEMALLOC_P(free)(void *ptr);
+
+size_t JEMALLOC_P(malloc_usable_size)(const void *ptr);
+void JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *),
+ void *cbopaque, const char *opts);
+int JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen);
+int JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp,
+ size_t *miblenp);
+int JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen);
+
+#ifdef __cplusplus
+};
+#endif
+#endif /* JEMALLOC_H_ */
diff --git a/dep/include/jemalloc/jemalloc_defs.h b/dep/include/jemalloc/jemalloc_defs.h
new file mode 100644
index 00000000000..e8acaed3abd
--- /dev/null
+++ b/dep/include/jemalloc/jemalloc_defs.h
@@ -0,0 +1,102 @@
+/* include/jemalloc/jemalloc_defs.h. Generated from jemalloc_defs.h.in by configure. */
+#ifndef JEMALLOC_DEFS_H_
+#define JEMALLOC_DEFS_H_
+
+/*
+ * If JEMALLOC_PREFIX is defined, it will cause all public APIs to be prefixed.
+ * This makes it possible, with some care, to use multiple allocators
+ * simultaneously.
+ *
+ * In many cases it is more convenient to manually prefix allocator function
+ * calls than to let macros do it automatically, particularly when using
+ * multiple allocators simultaneously. Define JEMALLOC_MANGLE before
+ * #include'ing jemalloc.h in order to cause name mangling that corresponds to
+ * the API prefixing.
+ */
+/* #undef JEMALLOC_PREFIX */
+#if (defined(JEMALLOC_PREFIX) && defined(JEMALLOC_MANGLE))
+/* #undef JEMALLOC_P */
+#endif
+
+/*
+ * Hyper-threaded CPUs may need a special instruction inside spin loops in
+ * order to yield to another virtual CPU.
+ */
+#define CPU_SPINWAIT __asm__ volatile("pause")
+
+/* Defined if __attribute__((...)) syntax is supported. */
+#define JEMALLOC_HAVE_ATTR
+#ifdef JEMALLOC_HAVE_ATTR
+# define JEMALLOC_ATTR(s) __attribute__((s))
+#else
+# define JEMALLOC_ATTR(s)
+#endif
+
+/*
+ * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
+ * inline functions.
+ */
+/* #undef JEMALLOC_DEBUG */
+
+/* JEMALLOC_STATS enables statistics calculation. */
+/* #undef JEMALLOC_STATS */
+
+/* JEMALLOC_PROF enables allocation profiling. */
+/* #undef JEMALLOC_PROF */
+
+/* Use libunwind for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_LIBUNWIND */
+
+/* Use libgcc for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_LIBGCC */
+
+/*
+ * JEMALLOC_TINY enables support for tiny objects, which are smaller than one
+ * quantum.
+ */
+#define JEMALLOC_TINY
+
+/*
+ * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects.
+ * This makes it possible to allocate/deallocate objects without any locking
+ * when the cache is in the steady state.
+ */
+#define JEMALLOC_TCACHE
+
+/*
+ * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
+ * segment (DSS).
+ */
+/* #undef JEMALLOC_DSS */
+
+/* JEMALLOC_SWAP enables mmap()ed swap file support. */
+/* #undef JEMALLOC_SWAP */
+
+/* Support memory filling (junk/zero). */
+/* #undef JEMALLOC_FILL */
+
+/* Support optional abort() on OOM. */
+/* #undef JEMALLOC_XMALLOC */
+
+/* Support SYSV semantics. */
+/* #undef JEMALLOC_SYSV */
+
+/* Support lazy locking (avoid locking unless a second thread is launched). */
+#define JEMALLOC_LAZY_LOCK
+
+/* Determine page size at run time if defined. */
+/* #undef DYNAMIC_PAGE_SHIFT */
+
+/* One page is 2^STATIC_PAGE_SHIFT bytes. */
+#define STATIC_PAGE_SHIFT 12
+
+/* TLS is used to map arenas and magazine caches to threads. */
+/* #undef NO_TLS */
+
+/* sizeof(void *) == 2^LG_SIZEOF_PTR. */
+#define LG_SIZEOF_PTR 3
+
+/* sizeof(int) == 2^LG_SIZEOF_INT. */
+#define LG_SIZEOF_INT 2
+
+#endif /* JEMALLOC_DEFS_H_ */
diff --git a/dep/include/jemalloc/jemalloc_defs.h.in b/dep/include/jemalloc/jemalloc_defs.h.in
new file mode 100644
index 00000000000..8b98d670acc
--- /dev/null
+++ b/dep/include/jemalloc/jemalloc_defs.h.in
@@ -0,0 +1,101 @@
+#ifndef JEMALLOC_DEFS_H_
+#define JEMALLOC_DEFS_H_
+
+/*
+ * If JEMALLOC_PREFIX is defined, it will cause all public APIs to be prefixed.
+ * This makes it possible, with some care, to use multiple allocators
+ * simultaneously.
+ *
+ * In many cases it is more convenient to manually prefix allocator function
+ * calls than to let macros do it automatically, particularly when using
+ * multiple allocators simultaneously. Define JEMALLOC_MANGLE before
+ * #include'ing jemalloc.h in order to cause name mangling that corresponds to
+ * the API prefixing.
+ */
+#undef JEMALLOC_PREFIX
+#if (defined(JEMALLOC_PREFIX) && defined(JEMALLOC_MANGLE))
+#undef JEMALLOC_P
+#endif
+
+/*
+ * Hyper-threaded CPUs may need a special instruction inside spin loops in
+ * order to yield to another virtual CPU.
+ */
+#undef CPU_SPINWAIT
+
+/* Defined if __attribute__((...)) syntax is supported. */
+#undef JEMALLOC_HAVE_ATTR
+#ifdef JEMALLOC_HAVE_ATTR
+# define JEMALLOC_ATTR(s) __attribute__((s))
+#else
+# define JEMALLOC_ATTR(s)
+#endif
+
+/*
+ * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
+ * inline functions.
+ */
+#undef JEMALLOC_DEBUG
+
+/* JEMALLOC_STATS enables statistics calculation. */
+#undef JEMALLOC_STATS
+
+/* JEMALLOC_PROF enables allocation profiling. */
+#undef JEMALLOC_PROF
+
+/* Use libunwind for profile backtracing if defined. */
+#undef JEMALLOC_PROF_LIBUNWIND
+
+/* Use libgcc for profile backtracing if defined. */
+#undef JEMALLOC_PROF_LIBGCC
+
+/*
+ * JEMALLOC_TINY enables support for tiny objects, which are smaller than one
+ * quantum.
+ */
+#undef JEMALLOC_TINY
+
+/*
+ * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects.
+ * This makes it possible to allocate/deallocate objects without any locking
+ * when the cache is in the steady state.
+ */
+#undef JEMALLOC_TCACHE
+
+/*
+ * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
+ * segment (DSS).
+ */
+#undef JEMALLOC_DSS
+
+/* JEMALLOC_SWAP enables mmap()ed swap file support. */
+#undef JEMALLOC_SWAP
+
+/* Support memory filling (junk/zero). */
+#undef JEMALLOC_FILL
+
+/* Support optional abort() on OOM. */
+#undef JEMALLOC_XMALLOC
+
+/* Support SYSV semantics. */
+#undef JEMALLOC_SYSV
+
+/* Support lazy locking (avoid locking unless a second thread is launched). */
+#undef JEMALLOC_LAZY_LOCK
+
+/* Determine page size at run time if defined. */
+#undef DYNAMIC_PAGE_SHIFT
+
+/* One page is 2^STATIC_PAGE_SHIFT bytes. */
+#undef STATIC_PAGE_SHIFT
+
+/* TLS is used to map arenas and magazine caches to threads. */
+#undef NO_TLS
+
+/* sizeof(void *) == 2^LG_SIZEOF_PTR. */
+#undef LG_SIZEOF_PTR
+
+/* sizeof(int) == 2^LG_SIZEOF_INT. */
+#undef LG_SIZEOF_INT
+
+#endif /* JEMALLOC_DEFS_H_ */
diff --git a/dep/src/CMakeLists.txt b/dep/src/CMakeLists.txt
index f5ec42a39d5..62d3db7ce3d 100644
--- a/dep/src/CMakeLists.txt
+++ b/dep/src/CMakeLists.txt
@@ -1,4 +1,5 @@
add_subdirectory(g3dlite)
+add_subdirectory(jmalloc)
add_subdirectory(sockets)
add_subdirectory(zlib)
diff --git a/dep/src/jmalloc/CMakeLists.txt b/dep/src/jmalloc/CMakeLists.txt
new file mode 100644
index 00000000000..c3e4e81782c
--- /dev/null
+++ b/dep/src/jmalloc/CMakeLists.txt
@@ -0,0 +1,27 @@
+SET(jmalloc_STAT_SRC
+ arena.c
+ chunk.c
+ chunk_mmap.c
+ ckh.c
+ extent.c
+ huge.c
+ mb.c
+ prof.c
+ tcache.c
+ base.c
+ chunk_dss.c
+ chunk_swap.c
+ ctl.c
+ hash.c
+ jemalloc.c
+ mutex.c
+ stats.c
+ )
+
+include_directories(
+ ${CMAKE_SOURCE_DIR}/dep/include
+ )
+
+add_definitions(-D_GNU_SOURCE -D_REENTRANT)
+
+add_library(jmalloc STATIC ${jmalloc_STAT_SRC}) \ No newline at end of file
diff --git a/dep/src/jmalloc/arena.c b/dep/src/jmalloc/arena.c
new file mode 100644
index 00000000000..e74b4701907
--- /dev/null
+++ b/dep/src/jmalloc/arena.c
@@ -0,0 +1,2446 @@
+#define JEMALLOC_ARENA_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+size_t opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT;
+size_t opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT;
+ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
+uint8_t const *small_size2bin;
+
+/* Various bin-related settings. */
+unsigned nqbins;
+unsigned ncbins;
+unsigned nsbins;
+unsigned nbins;
+size_t qspace_max;
+size_t cspace_min;
+size_t cspace_max;
+size_t sspace_min;
+size_t sspace_max;
+
+size_t lg_mspace;
+size_t mspace_mask;
+
+/*
+ * const_small_size2bin is a static constant lookup table that in the common
+ * case can be used as-is for small_size2bin. For dynamically linked programs,
+ * this avoids a page of memory overhead per process.
+ */
+#define S2B_1(i) i,
+#define S2B_2(i) S2B_1(i) S2B_1(i)
+#define S2B_4(i) S2B_2(i) S2B_2(i)
+#define S2B_8(i) S2B_4(i) S2B_4(i)
+#define S2B_16(i) S2B_8(i) S2B_8(i)
+#define S2B_32(i) S2B_16(i) S2B_16(i)
+#define S2B_64(i) S2B_32(i) S2B_32(i)
+#define S2B_128(i) S2B_64(i) S2B_64(i)
+#define S2B_256(i) S2B_128(i) S2B_128(i)
+/*
+ * The number of elements in const_small_size2bin is dependent on page size
+ * and on the definition for SUBPAGE. If SUBPAGE changes, the '- 255' must also
+ * change, along with the addition/removal of static lookup table element
+ * definitions.
+ */
+static const uint8_t const_small_size2bin[STATIC_PAGE_SIZE - 255] = {
+ S2B_1(0xffU) /* 0 */
+#if (LG_QUANTUM == 4)
+/* 16-byte quantum **********************/
+# ifdef JEMALLOC_TINY
+# if (LG_TINY_MIN == 2)
+ S2B_4(0) /* 4 */
+ S2B_4(1) /* 8 */
+ S2B_8(2) /* 16 */
+# define S2B_QMIN 2
+# elif (LG_TINY_MIN == 3)
+ S2B_8(0) /* 8 */
+ S2B_8(1) /* 16 */
+# define S2B_QMIN 1
+# else
+# error "Unsupported LG_TINY_MIN"
+# endif
+# else
+ S2B_16(0) /* 16 */
+# define S2B_QMIN 0
+# endif
+ S2B_16(S2B_QMIN + 1) /* 32 */
+ S2B_16(S2B_QMIN + 2) /* 48 */
+ S2B_16(S2B_QMIN + 3) /* 64 */
+ S2B_16(S2B_QMIN + 4) /* 80 */
+ S2B_16(S2B_QMIN + 5) /* 96 */
+ S2B_16(S2B_QMIN + 6) /* 112 */
+ S2B_16(S2B_QMIN + 7) /* 128 */
+# define S2B_CMIN (S2B_QMIN + 8)
+#else
+/* 8-byte quantum ***********************/
+# ifdef JEMALLOC_TINY
+# if (LG_TINY_MIN == 2)
+ S2B_4(0) /* 4 */
+ S2B_4(1) /* 8 */
+# define S2B_QMIN 1
+# else
+# error "Unsupported LG_TINY_MIN"
+# endif
+# else
+ S2B_8(0) /* 8 */
+# define S2B_QMIN 0
+# endif
+ S2B_8(S2B_QMIN + 1) /* 16 */
+ S2B_8(S2B_QMIN + 2) /* 24 */
+ S2B_8(S2B_QMIN + 3) /* 32 */
+ S2B_8(S2B_QMIN + 4) /* 40 */
+ S2B_8(S2B_QMIN + 5) /* 48 */
+ S2B_8(S2B_QMIN + 6) /* 56 */
+ S2B_8(S2B_QMIN + 7) /* 64 */
+ S2B_8(S2B_QMIN + 8) /* 72 */
+ S2B_8(S2B_QMIN + 9) /* 80 */
+ S2B_8(S2B_QMIN + 10) /* 88 */
+ S2B_8(S2B_QMIN + 11) /* 96 */
+ S2B_8(S2B_QMIN + 12) /* 104 */
+ S2B_8(S2B_QMIN + 13) /* 112 */
+ S2B_8(S2B_QMIN + 14) /* 120 */
+ S2B_8(S2B_QMIN + 15) /* 128 */
+# define S2B_CMIN (S2B_QMIN + 16)
+#endif
+/****************************************/
+ S2B_64(S2B_CMIN + 0) /* 192 */
+ S2B_64(S2B_CMIN + 1) /* 256 */
+ S2B_64(S2B_CMIN + 2) /* 320 */
+ S2B_64(S2B_CMIN + 3) /* 384 */
+ S2B_64(S2B_CMIN + 4) /* 448 */
+ S2B_64(S2B_CMIN + 5) /* 512 */
+# define S2B_SMIN (S2B_CMIN + 6)
+ S2B_256(S2B_SMIN + 0) /* 768 */
+ S2B_256(S2B_SMIN + 1) /* 1024 */
+ S2B_256(S2B_SMIN + 2) /* 1280 */
+ S2B_256(S2B_SMIN + 3) /* 1536 */
+ S2B_256(S2B_SMIN + 4) /* 1792 */
+ S2B_256(S2B_SMIN + 5) /* 2048 */
+ S2B_256(S2B_SMIN + 6) /* 2304 */
+ S2B_256(S2B_SMIN + 7) /* 2560 */
+ S2B_256(S2B_SMIN + 8) /* 2816 */
+ S2B_256(S2B_SMIN + 9) /* 3072 */
+ S2B_256(S2B_SMIN + 10) /* 3328 */
+ S2B_256(S2B_SMIN + 11) /* 3584 */
+ S2B_256(S2B_SMIN + 12) /* 3840 */
+#if (STATIC_PAGE_SHIFT == 13)
+ S2B_256(S2B_SMIN + 13) /* 4096 */
+ S2B_256(S2B_SMIN + 14) /* 4352 */
+ S2B_256(S2B_SMIN + 15) /* 4608 */
+ S2B_256(S2B_SMIN + 16) /* 4864 */
+ S2B_256(S2B_SMIN + 17) /* 5120 */
+ S2B_256(S2B_SMIN + 18) /* 5376 */
+ S2B_256(S2B_SMIN + 19) /* 5632 */
+ S2B_256(S2B_SMIN + 20) /* 5888 */
+ S2B_256(S2B_SMIN + 21) /* 6144 */
+ S2B_256(S2B_SMIN + 22) /* 6400 */
+ S2B_256(S2B_SMIN + 23) /* 6656 */
+ S2B_256(S2B_SMIN + 24) /* 6912 */
+ S2B_256(S2B_SMIN + 25) /* 7168 */
+ S2B_256(S2B_SMIN + 26) /* 7424 */
+ S2B_256(S2B_SMIN + 27) /* 7680 */
+ S2B_256(S2B_SMIN + 28) /* 7936 */
+#endif
+};
+#undef S2B_1
+#undef S2B_2
+#undef S2B_4
+#undef S2B_8
+#undef S2B_16
+#undef S2B_32
+#undef S2B_64
+#undef S2B_128
+#undef S2B_256
+#undef S2B_QMIN
+#undef S2B_CMIN
+#undef S2B_SMIN
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
+ bool large, bool zero);
+static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
+static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
+static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large,
+ bool zero);
+static void arena_purge(arena_t *arena);
+static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
+static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
+ arena_run_t *run, size_t oldsize, size_t newsize);
+static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
+ arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
+static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
+static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
+static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
+static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk,
+ arena_run_t *run, arena_bin_t *bin);
+static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
+ void *ptr, size_t size, size_t oldsize);
+static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
+ void *ptr, size_t size, size_t oldsize);
+static bool arena_ralloc_large(void *ptr, size_t size, size_t oldsize);
+#ifdef JEMALLOC_TINY
+static size_t pow2_ceil(size_t x);
+#endif
+static bool small_size2bin_init(void);
+#ifdef JEMALLOC_DEBUG
+static void small_size2bin_validate(void);
+#endif
+static bool small_size2bin_init_hard(void);
+
+/******************************************************************************/
+
+static inline int
+arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
+{
+ uintptr_t a_mapelm = (uintptr_t)a;
+ uintptr_t b_mapelm = (uintptr_t)b;
+
+ assert(a != NULL);
+ assert(b != NULL);
+
+ return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm));
+}
+
+/* Generate red-black tree functions. */
+rb_gen(static JEMALLOC_ATTR(unused), arena_run_tree_, arena_run_tree_t,
+ arena_chunk_map_t, u.rb_link, arena_run_comp)
+
+static inline int
+arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
+{
+ int ret;
+ size_t a_size = a->bits & ~PAGE_MASK;
+ size_t b_size = b->bits & ~PAGE_MASK;
+
+ assert((a->bits & CHUNK_MAP_KEY) == CHUNK_MAP_KEY || (a->bits &
+ CHUNK_MAP_DIRTY) == (b->bits & CHUNK_MAP_DIRTY));
+
+ ret = (a_size > b_size) - (a_size < b_size);
+ if (ret == 0) {
+ uintptr_t a_mapelm, b_mapelm;
+
+ if ((a->bits & CHUNK_MAP_KEY) != CHUNK_MAP_KEY)
+ a_mapelm = (uintptr_t)a;
+ else {
+ /*
+ * Treat keys as though they are lower than anything
+ * else.
+ */
+ a_mapelm = 0;
+ }
+ b_mapelm = (uintptr_t)b;
+
+ ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm);
+ }
+
+ return (ret);
+}
+
+/* Generate red-black tree functions. */
+rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t,
+ arena_chunk_map_t, u.rb_link, arena_avail_comp)
+
+static inline void *
+arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
+{
+ void *ret;
+
+ assert(run->magic == ARENA_RUN_MAGIC);
+ assert(run->nfree > 0);
+
+ run->nfree--;
+ ret = run->avail;
+ if (ret != NULL) {
+ run->avail = *(void **)ret;
+ /* Double free can cause assertion failure.*/
+ assert(ret != NULL);
+ /* Write-after free can cause assertion failure. */
+ assert((uintptr_t)ret >= (uintptr_t)run +
+ (uintptr_t)bin->reg0_offset);
+ assert((uintptr_t)ret < (uintptr_t)run->next);
+ assert(((uintptr_t)ret - ((uintptr_t)run +
+ (uintptr_t)bin->reg0_offset)) % (uintptr_t)bin->reg_size ==
+ 0);
+ return (ret);
+ }
+ ret = run->next;
+ run->next = (void *)((uintptr_t)ret + (uintptr_t)bin->reg_size);
+ assert(ret != NULL);
+ return (ret);
+}
+
+static inline void
+arena_run_reg_dalloc(arena_run_t *run, void *ptr)
+{
+
+ assert(run->nfree < run->bin->nregs);
+ /* Freeing an interior pointer can cause assertion failure. */
+ assert(((uintptr_t)ptr - ((uintptr_t)run +
+ (uintptr_t)run->bin->reg0_offset)) % (uintptr_t)run->bin->reg_size
+ == 0);
+
+ *(void **)ptr = run->avail;
+ run->avail = ptr;
+ run->nfree++;
+}
+
+static void
+arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
+ bool zero)
+{
+ arena_chunk_t *chunk;
+ size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i;
+ size_t flag_dirty;
+ arena_avail_tree_t *runs_avail;
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
+ old_ndirty = chunk->ndirty;
+ run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
+ >> PAGE_SHIFT);
+ flag_dirty = chunk->map[run_ind].bits & CHUNK_MAP_DIRTY;
+ runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty :
+ &arena->runs_avail_clean;
+ total_pages = (chunk->map[run_ind].bits & ~PAGE_MASK) >>
+ PAGE_SHIFT;
+ assert((chunk->map[run_ind+total_pages-1].bits & CHUNK_MAP_DIRTY) ==
+ flag_dirty);
+ need_pages = (size >> PAGE_SHIFT);
+ assert(need_pages > 0);
+ assert(need_pages <= total_pages);
+ rem_pages = total_pages - need_pages;
+
+ arena_avail_tree_remove(runs_avail, &chunk->map[run_ind]);
+ arena->nactive += need_pages;
+
+ /* Keep track of trailing unused pages for later use. */
+ if (rem_pages > 0) {
+ if (flag_dirty != 0) {
+ chunk->map[run_ind+need_pages].bits = (rem_pages <<
+ PAGE_SHIFT) | CHUNK_MAP_DIRTY;
+ chunk->map[run_ind+total_pages-1].bits = (rem_pages <<
+ PAGE_SHIFT) | CHUNK_MAP_DIRTY;
+ } else {
+ chunk->map[run_ind+need_pages].bits = (rem_pages <<
+ PAGE_SHIFT) | (chunk->map[run_ind+need_pages].bits &
+ CHUNK_MAP_ZEROED);
+ chunk->map[run_ind+total_pages-1].bits = (rem_pages <<
+ PAGE_SHIFT) |
+ (chunk->map[run_ind+total_pages-1].bits &
+ CHUNK_MAP_ZEROED);
+ }
+ arena_avail_tree_insert(runs_avail,
+ &chunk->map[run_ind+need_pages]);
+ }
+
+ /* Update dirty page accounting. */
+ if (flag_dirty != 0) {
+ chunk->ndirty -= need_pages;
+ arena->ndirty -= need_pages;
+ }
+
+ /*
+ * Update the page map separately for large vs. small runs, since it is
+ * possible to avoid iteration for large mallocs.
+ */
+ if (large) {
+ if (zero) {
+ if (flag_dirty == 0) {
+ /*
+ * The run is clean, so some pages may be
+ * zeroed (i.e. never before touched).
+ */
+ for (i = 0; i < need_pages; i++) {
+ if ((chunk->map[run_ind + i].bits &
+ CHUNK_MAP_ZEROED) == 0) {
+ memset((void *)((uintptr_t)
+ chunk + ((run_ind + i) <<
+ PAGE_SHIFT)), 0,
+ PAGE_SIZE);
+ }
+ }
+ } else {
+ /*
+ * The run is dirty, so all pages must be
+ * zeroed.
+ */
+ memset((void *)((uintptr_t)chunk + (run_ind <<
+ PAGE_SHIFT)), 0, (need_pages <<
+ PAGE_SHIFT));
+ }
+ }
+
+ /*
+ * Set the last element first, in case the run only contains one
+ * page (i.e. both statements set the same element).
+ */
+ chunk->map[run_ind+need_pages-1].bits = CHUNK_MAP_LARGE |
+ CHUNK_MAP_ALLOCATED | flag_dirty;
+ chunk->map[run_ind].bits = size | CHUNK_MAP_LARGE |
+#ifdef JEMALLOC_PROF
+ CHUNK_MAP_CLASS_MASK |
+#endif
+ CHUNK_MAP_ALLOCATED | flag_dirty;
+ } else {
+ assert(zero == false);
+ /*
+ * Propagate the dirty flag to the allocated small run, so that
+ * arena_dalloc_bin_run() has the ability to conditionally trim
+ * clean pages.
+ */
+ chunk->map[run_ind].bits = CHUNK_MAP_ALLOCATED | flag_dirty;
+ for (i = 1; i < need_pages - 1; i++) {
+ chunk->map[run_ind + i].bits = (i << PAGE_SHIFT)
+ | CHUNK_MAP_ALLOCATED;
+ }
+ chunk->map[run_ind + need_pages - 1].bits = ((need_pages - 1) <<
+ PAGE_SHIFT) | CHUNK_MAP_ALLOCATED | flag_dirty;
+ }
+}
+
+static arena_chunk_t *
+arena_chunk_alloc(arena_t *arena)
+{
+ arena_chunk_t *chunk;
+ size_t i;
+
+ if (arena->spare != NULL) {
+ arena_avail_tree_t *runs_avail;
+
+ chunk = arena->spare;
+ arena->spare = NULL;
+
+ /* Insert the run into the appropriate runs_avail_* tree. */
+ if ((chunk->map[arena_chunk_header_npages].bits &
+ CHUNK_MAP_DIRTY) == 0)
+ runs_avail = &arena->runs_avail_clean;
+ else
+ runs_avail = &arena->runs_avail_dirty;
+ arena_avail_tree_insert(runs_avail,
+ &chunk->map[arena_chunk_header_npages]);
+ } else {
+ bool zero;
+ size_t zeroed;
+
+ zero = false;
+ malloc_mutex_unlock(&arena->lock);
+ chunk = (arena_chunk_t *)chunk_alloc(chunksize, &zero);
+ malloc_mutex_lock(&arena->lock);
+ if (chunk == NULL)
+ return (NULL);
+#ifdef JEMALLOC_STATS
+ arena->stats.mapped += chunksize;
+#endif
+
+ chunk->arena = arena;
+ ql_elm_new(chunk, link_dirty);
+ chunk->dirtied = false;
+
+ /*
+ * Claim that no pages are in use, since the header is merely
+ * overhead.
+ */
+ chunk->ndirty = 0;
+
+ /*
+ * Initialize the map to contain one maximal free untouched run.
+ * Mark the pages as zeroed iff chunk_alloc() returned a zeroed
+ * chunk.
+ */
+ zeroed = zero ? CHUNK_MAP_ZEROED : 0;
+ for (i = 0; i < arena_chunk_header_npages; i++)
+ chunk->map[i].bits = 0;
+ chunk->map[i].bits = arena_maxclass | zeroed;
+ for (i++; i < chunk_npages-1; i++)
+ chunk->map[i].bits = zeroed;
+ chunk->map[chunk_npages-1].bits = arena_maxclass | zeroed;
+
+ /* Insert the run into the runs_avail_clean tree. */
+ arena_avail_tree_insert(&arena->runs_avail_clean,
+ &chunk->map[arena_chunk_header_npages]);
+ }
+
+ return (chunk);
+}
+
+static void
+arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
+{
+ arena_avail_tree_t *runs_avail;
+
+ while (arena->spare != NULL) {
+ arena_chunk_t *spare = arena->spare;
+
+ arena->spare = NULL;
+ if (spare->dirtied) {
+ ql_remove(&chunk->arena->chunks_dirty, spare,
+ link_dirty);
+ arena->ndirty -= spare->ndirty;
+ }
+ malloc_mutex_unlock(&arena->lock);
+ chunk_dealloc((void *)spare, chunksize);
+ malloc_mutex_lock(&arena->lock);
+#ifdef JEMALLOC_STATS
+ arena->stats.mapped -= chunksize;
+#endif
+ }
+
+ /*
+ * Remove run from the appropriate runs_avail_* tree, so that the arena
+ * does not use it.
+ */
+ if ((chunk->map[arena_chunk_header_npages].bits &
+ CHUNK_MAP_DIRTY) == 0)
+ runs_avail = &arena->runs_avail_clean;
+ else
+ runs_avail = &arena->runs_avail_dirty;
+ arena_avail_tree_remove(runs_avail,
+ &chunk->map[arena_chunk_header_npages]);
+
+ arena->spare = chunk;
+}
+
+static arena_run_t *
+arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
+{
+ arena_chunk_t *chunk;
+ arena_run_t *run;
+ arena_chunk_map_t *mapelm, key;
+
+ assert(size <= arena_maxclass);
+ assert((size & PAGE_MASK) == 0);
+
+ /* Search the arena's chunks for the lowest best fit. */
+ key.bits = size | CHUNK_MAP_KEY;
+ mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key);
+ if (mapelm != NULL) {
+ arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
+ size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map)
+ / sizeof(arena_chunk_map_t);
+
+ run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
+ PAGE_SHIFT));
+ arena_run_split(arena, run, size, large, zero);
+ return (run);
+ }
+ mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key);
+ if (mapelm != NULL) {
+ arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
+ size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map)
+ / sizeof(arena_chunk_map_t);
+
+ run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
+ PAGE_SHIFT));
+ arena_run_split(arena, run, size, large, zero);
+ return (run);
+ }
+
+ /*
+ * No usable runs. Create a new chunk from which to allocate the run.
+ */
+ chunk = arena_chunk_alloc(arena);
+ if (chunk != NULL) {
+ run = (arena_run_t *)((uintptr_t)chunk +
+ (arena_chunk_header_npages << PAGE_SHIFT));
+ arena_run_split(arena, run, size, large, zero);
+ return (run);
+ }
+
+ /*
+ * arena_chunk_alloc() failed, but another thread may have made
+ * sufficient memory available while this one dropped arena->lock in
+ * arena_chunk_alloc(), so search one more time.
+ */
+ mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key);
+ if (mapelm != NULL) {
+ arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
+ size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map)
+ / sizeof(arena_chunk_map_t);
+
+ run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
+ PAGE_SHIFT));
+ arena_run_split(arena, run, size, large, zero);
+ return (run);
+ }
+ mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key);
+ if (mapelm != NULL) {
+ arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
+ size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map)
+ / sizeof(arena_chunk_map_t);
+
+ run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
+ PAGE_SHIFT));
+ arena_run_split(arena, run, size, large, zero);
+ return (run);
+ }
+
+ return (NULL);
+}
+
+static inline void
+arena_maybe_purge(arena_t *arena)
+{
+
+ /* Enforce opt_lg_dirty_mult. */
+ if (opt_lg_dirty_mult >= 0 && arena->ndirty > arena->npurgatory &&
+ (arena->ndirty - arena->npurgatory) > chunk_npages &&
+ (arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty -
+ arena->npurgatory))
+ arena_purge(arena);
+}
+
+static inline void
+arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
+{
+ ql_head(arena_chunk_map_t) mapelms;
+ arena_chunk_map_t *mapelm;
+ size_t pageind, flag_zeroed;
+#ifdef JEMALLOC_DEBUG
+ size_t ndirty;
+#endif
+#ifdef JEMALLOC_STATS
+ size_t nmadvise;
+#endif
+
+ ql_new(&mapelms);
+
+ flag_zeroed =
+#ifdef JEMALLOC_SWAP
+ swap_enabled ? 0 :
+#endif
+ CHUNK_MAP_ZEROED;
+
+ /*
+ * If chunk is the spare, temporarily re-allocate it, 1) so that its
+ * run is reinserted into runs_avail_dirty, and 2) so that it cannot be
+ * completely discarded by another thread while arena->lock is dropped
+ * by this thread. Note that the arena_run_dalloc() call will
+ * implicitly deallocate the chunk, so no explicit action is required
+ * in this function to deallocate the chunk.
+ *
+ * Note that once a chunk contains dirty pages, it cannot again contain
+ * a single run unless 1) it is a dirty run, or 2) this function purges
+ * dirty pages and causes the transition to a single clean run. Thus
+ * (chunk == arena->spare) is possible, but it is not possible for
+ * this function to be called on the spare unless it contains a dirty
+ * run.
+ */
+ if (chunk == arena->spare) {
+ assert((chunk->map[arena_chunk_header_npages].bits &
+ CHUNK_MAP_DIRTY) != 0);
+ arena_chunk_alloc(arena);
+ }
+
+ /* Temporarily allocate all free dirty runs within chunk. */
+ for (pageind = arena_chunk_header_npages; pageind < chunk_npages;) {
+ mapelm = &chunk->map[pageind];
+ if ((mapelm->bits & CHUNK_MAP_ALLOCATED) == 0) {
+ size_t npages;
+
+ npages = mapelm->bits >> PAGE_SHIFT;
+ assert(pageind + npages <= chunk_npages);
+ if (mapelm->bits & CHUNK_MAP_DIRTY) {
+ size_t i;
+
+ arena_avail_tree_remove(
+ &arena->runs_avail_dirty, mapelm);
+
+ /*
+ * Update internal elements in the page map, so
+ * that CHUNK_MAP_ZEROED is properly set.
+ * madvise(..., MADV_DONTNEED) results in
+ * zero-filled pages for anonymous mappings,
+ * but not for file-backed mappings.
+ */
+ mapelm->bits = (npages << PAGE_SHIFT) |
+ CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED |
+ flag_zeroed;
+ for (i = 1; i < npages - 1; i++) {
+ chunk->map[pageind + i].bits =
+ flag_zeroed;
+ }
+ if (npages > 1) {
+ chunk->map[pageind + npages - 1].bits =
+ (npages << PAGE_SHIFT) |
+ CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED |
+ flag_zeroed;
+ }
+
+ arena->nactive += npages;
+ /* Append to list for later processing. */
+ ql_elm_new(mapelm, u.ql_link);
+ ql_tail_insert(&mapelms, mapelm, u.ql_link);
+ }
+
+ pageind += npages;
+ } else {
+ /* Skip allocated run. */
+ if (mapelm->bits & CHUNK_MAP_LARGE)
+ pageind += mapelm->bits >> PAGE_SHIFT;
+ else {
+ arena_run_t *run = (arena_run_t *)((uintptr_t)
+ chunk + (uintptr_t)(pageind << PAGE_SHIFT));
+
+ assert((mapelm->bits >> PAGE_SHIFT) == 0);
+ assert(run->magic == ARENA_RUN_MAGIC);
+ pageind += run->bin->run_size >> PAGE_SHIFT;
+ }
+ }
+ }
+ assert(pageind == chunk_npages);
+
+#ifdef JEMALLOC_DEBUG
+ ndirty = chunk->ndirty;
+#endif
+#ifdef JEMALLOC_STATS
+ arena->stats.purged += chunk->ndirty;
+#endif
+ arena->ndirty -= chunk->ndirty;
+ chunk->ndirty = 0;
+ ql_remove(&arena->chunks_dirty, chunk, link_dirty);
+ chunk->dirtied = false;
+
+ malloc_mutex_unlock(&arena->lock);
+#ifdef JEMALLOC_STATS
+ nmadvise = 0;
+#endif
+ ql_foreach(mapelm, &mapelms, u.ql_link) {
+ size_t pageind = ((uintptr_t)mapelm - (uintptr_t)chunk->map) /
+ sizeof(arena_chunk_map_t);
+ size_t npages = mapelm->bits >> PAGE_SHIFT;
+
+ assert(pageind + npages <= chunk_npages);
+#ifdef JEMALLOC_DEBUG
+ assert(ndirty >= npages);
+ ndirty -= npages;
+#endif
+ madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)),
+ (npages << PAGE_SHIFT), MADV_DONTNEED);
+#ifdef JEMALLOC_STATS
+ nmadvise++;
+#endif
+ }
+#ifdef JEMALLOC_DEBUG
+ assert(ndirty == 0);
+#endif
+ malloc_mutex_lock(&arena->lock);
+#ifdef JEMALLOC_STATS
+ arena->stats.nmadvise += nmadvise;
+#endif
+
+ /* Deallocate runs. */
+ for (mapelm = ql_first(&mapelms); mapelm != NULL;
+ mapelm = ql_first(&mapelms)) {
+ size_t pageind = ((uintptr_t)mapelm - (uintptr_t)chunk->map) /
+ sizeof(arena_chunk_map_t);
+ arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
+ (uintptr_t)(pageind << PAGE_SHIFT));
+
+ ql_remove(&mapelms, mapelm, u.ql_link);
+ arena_run_dalloc(arena, run, false);
+ }
+}
+
+static void
+arena_purge(arena_t *arena)
+{
+ arena_chunk_t *chunk;
+ size_t npurgatory;
+#ifdef JEMALLOC_DEBUG
+ size_t ndirty = 0;
+
+ ql_foreach(chunk, &arena->chunks_dirty, link_dirty) {
+ assert(chunk->dirtied);
+ ndirty += chunk->ndirty;
+ }
+ assert(ndirty == arena->ndirty);
+#endif
+ assert(arena->ndirty > arena->npurgatory);
+ assert(arena->ndirty > chunk_npages);
+ assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty);
+
+#ifdef JEMALLOC_STATS
+ arena->stats.npurge++;
+#endif
+
+ /*
+ * Compute the minimum number of pages that this thread should try to
+ * purge, and add the result to arena->npurgatory. This will keep
+ * multiple threads from racing to reduce ndirty below the threshold.
+ */
+ npurgatory = (arena->ndirty - arena->npurgatory) - (arena->nactive >>
+ opt_lg_dirty_mult);
+ arena->npurgatory += npurgatory;
+
+ while (npurgatory > 0) {
+ /* Get next chunk with dirty pages. */
+ chunk = ql_first(&arena->chunks_dirty);
+ if (chunk == NULL) {
+ /*
+ * This thread was unable to purge as many pages as
+ * originally intended, due to races with other threads
+ * that either did some of the purging work, or re-used
+ * dirty pages.
+ */
+ arena->npurgatory -= npurgatory;
+ return;
+ }
+ while (chunk->ndirty == 0) {
+ ql_remove(&arena->chunks_dirty, chunk, link_dirty);
+ chunk->dirtied = false;
+ chunk = ql_first(&arena->chunks_dirty);
+ if (chunk == NULL) {
+ /* Same logic as for above. */
+ arena->npurgatory -= npurgatory;
+ return;
+ }
+ }
+
+ if (chunk->ndirty > npurgatory) {
+ /*
+ * This thread will, at a minimum, purge all the dirty
+ * pages in chunk, so set npurgatory to reflect this
+ * thread's commitment to purge the pages. This tends
+ * to reduce the chances of the following scenario:
+ *
+ * 1) This thread sets arena->npurgatory such that
+ * (arena->ndirty - arena->npurgatory) is at the
+ * threshold.
+ * 2) This thread drops arena->lock.
+ * 3) Another thread causes one or more pages to be
+ * dirtied, and immediately determines that it must
+ * purge dirty pages.
+ *
+ * If this scenario *does* play out, that's okay,
+ * because all of the purging work being done really
+ * needs to happen.
+ */
+ arena->npurgatory += chunk->ndirty - npurgatory;
+ npurgatory = chunk->ndirty;
+ }
+
+ arena->npurgatory -= chunk->ndirty;
+ npurgatory -= chunk->ndirty;
+ arena_chunk_purge(arena, chunk);
+ }
+}
+
+static void
+arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
+{
+ arena_chunk_t *chunk;
+ size_t size, run_ind, run_pages, flag_dirty;
+ arena_avail_tree_t *runs_avail;
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
+ run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk)
+ >> PAGE_SHIFT);
+ assert(run_ind >= arena_chunk_header_npages);
+ assert(run_ind < chunk_npages);
+ if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0)
+ size = chunk->map[run_ind].bits & ~PAGE_MASK;
+ else
+ size = run->bin->run_size;
+ run_pages = (size >> PAGE_SHIFT);
+ arena->nactive -= run_pages;
+
+ /*
+ * The run is dirty if the caller claims to have dirtied it, as well as
+ * if it was already dirty before being allocated.
+ */
+ if ((chunk->map[run_ind].bits & CHUNK_MAP_DIRTY) != 0)
+ dirty = true;
+ flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0;
+ runs_avail = dirty ? &arena->runs_avail_dirty :
+ &arena->runs_avail_clean;
+
+ /* Mark pages as unallocated in the chunk map. */
+ if (dirty) {
+ chunk->map[run_ind].bits = size | flag_dirty;
+ chunk->map[run_ind+run_pages-1].bits = size | flag_dirty;
+
+ chunk->ndirty += run_pages;
+ arena->ndirty += run_pages;
+ } else {
+ chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
+ CHUNK_MAP_ZEROED);
+ chunk->map[run_ind+run_pages-1].bits = size |
+ (chunk->map[run_ind+run_pages-1].bits & CHUNK_MAP_ZEROED);
+ }
+
+ /* Try to coalesce forward. */
+ if (run_ind + run_pages < chunk_npages &&
+ (chunk->map[run_ind+run_pages].bits & CHUNK_MAP_ALLOCATED) == 0 &&
+ (chunk->map[run_ind+run_pages].bits & CHUNK_MAP_DIRTY) ==
+ flag_dirty) {
+ size_t nrun_size = chunk->map[run_ind+run_pages].bits &
+ ~PAGE_MASK;
+
+ /*
+ * Remove successor from runs_avail; the coalesced run is
+ * inserted later.
+ */
+ arena_avail_tree_remove(runs_avail,
+ &chunk->map[run_ind+run_pages]);
+
+ size += nrun_size;
+ run_pages = size >> PAGE_SHIFT;
+
+ assert((chunk->map[run_ind+run_pages-1].bits & ~PAGE_MASK)
+ == nrun_size);
+ chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
+ CHUNK_MAP_FLAGS_MASK);
+ chunk->map[run_ind+run_pages-1].bits = size |
+ (chunk->map[run_ind+run_pages-1].bits &
+ CHUNK_MAP_FLAGS_MASK);
+ }
+
+ /* Try to coalesce backward. */
+ if (run_ind > arena_chunk_header_npages && (chunk->map[run_ind-1].bits &
+ CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[run_ind-1].bits &
+ CHUNK_MAP_DIRTY) == flag_dirty) {
+ size_t prun_size = chunk->map[run_ind-1].bits & ~PAGE_MASK;
+
+ run_ind -= prun_size >> PAGE_SHIFT;
+
+ /*
+ * Remove predecessor from runs_avail; the coalesced run is
+ * inserted later.
+ */
+ arena_avail_tree_remove(runs_avail, &chunk->map[run_ind]);
+
+ size += prun_size;
+ run_pages = size >> PAGE_SHIFT;
+
+ assert((chunk->map[run_ind].bits & ~PAGE_MASK) == prun_size);
+ chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
+ CHUNK_MAP_FLAGS_MASK);
+ chunk->map[run_ind+run_pages-1].bits = size |
+ (chunk->map[run_ind+run_pages-1].bits &
+ CHUNK_MAP_FLAGS_MASK);
+ }
+
+ /* Insert into runs_avail, now that coalescing is complete. */
+ arena_avail_tree_insert(runs_avail, &chunk->map[run_ind]);
+
+ /*
+ * Deallocate chunk if it is now completely unused. The bit
+ * manipulation checks whether the first run is unallocated and extends
+ * to the end of the chunk.
+ */
+ if ((chunk->map[arena_chunk_header_npages].bits & (~PAGE_MASK |
+ CHUNK_MAP_ALLOCATED)) == arena_maxclass)
+ arena_chunk_dealloc(arena, chunk);
+
+ /*
+ * It is okay to do dirty page processing even if the chunk was
+ * deallocated above, since in that case it is the spare. Waiting
+ * until after possible chunk deallocation to do dirty processing
+ * allows for an old spare to be fully deallocated, thus decreasing the
+ * chances of spuriously crossing the dirty page purging threshold.
+ */
+ if (dirty) {
+ if (chunk->dirtied == false) {
+ ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty);
+ chunk->dirtied = true;
+ }
+ arena_maybe_purge(arena);
+ }
+}
+
+static void
+arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
+ size_t oldsize, size_t newsize)
+{
+ size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
+ size_t head_npages = (oldsize - newsize) >> PAGE_SHIFT;
+ size_t flags = chunk->map[pageind].bits & CHUNK_MAP_FLAGS_MASK;
+
+ assert(oldsize > newsize);
+
+ /*
+ * Update the chunk map so that arena_run_dalloc() can treat the
+ * leading run as separately allocated.
+ */
+ assert(chunk->map[pageind].bits & CHUNK_MAP_LARGE);
+ assert(chunk->map[pageind].bits & CHUNK_MAP_ALLOCATED);
+ chunk->map[pageind].bits = (oldsize - newsize) | flags;
+ chunk->map[pageind+head_npages].bits = newsize | flags;
+
+ arena_run_dalloc(arena, run, false);
+}
+
+static void
+arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
+ size_t oldsize, size_t newsize, bool dirty)
+{
+ size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
+ size_t npages = newsize >> PAGE_SHIFT;
+ size_t flags = chunk->map[pageind].bits & CHUNK_MAP_FLAGS_MASK;
+
+ assert(oldsize > newsize);
+
+ /*
+ * Update the chunk map so that arena_run_dalloc() can treat the
+ * trailing run as separately allocated.
+ */
+ assert(chunk->map[pageind].bits & CHUNK_MAP_LARGE);
+ assert(chunk->map[pageind].bits & CHUNK_MAP_ALLOCATED);
+ chunk->map[pageind].bits = newsize | flags;
+ chunk->map[pageind+npages-1].bits = newsize | flags;
+ chunk->map[pageind+npages].bits = (oldsize - newsize) | flags;
+
+ arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
+ dirty);
+}
+
+static arena_run_t *
+arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
+{
+ arena_chunk_map_t *mapelm;
+ arena_run_t *run;
+
+ /* Look for a usable run. */
+ mapelm = arena_run_tree_first(&bin->runs);
+ if (mapelm != NULL) {
+ arena_chunk_t *chunk;
+ size_t pageind;
+
+ /* run is guaranteed to have available space. */
+ arena_run_tree_remove(&bin->runs, mapelm);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm);
+ pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
+ sizeof(arena_chunk_map_t));
+ run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
+ (mapelm->bits >> PAGE_SHIFT))
+ << PAGE_SHIFT));
+#ifdef JEMALLOC_STATS
+ bin->stats.reruns++;
+#endif
+ return (run);
+ }
+ /* No existing runs have any space available. */
+
+ /* Allocate a new run. */
+ malloc_mutex_unlock(&bin->lock);
+ /******************************/
+ malloc_mutex_lock(&arena->lock);
+ run = arena_run_alloc(arena, bin->run_size, false, false);
+ if (run != NULL) {
+ /* Initialize run internals. */
+ run->bin = bin;
+ run->avail = NULL;
+ run->next = (void *)(((uintptr_t)run) +
+ (uintptr_t)bin->reg0_offset);
+ run->nfree = bin->nregs;
+#ifdef JEMALLOC_DEBUG
+ run->magic = ARENA_RUN_MAGIC;
+#endif
+ }
+ malloc_mutex_unlock(&arena->lock);
+ /********************************/
+ malloc_mutex_lock(&bin->lock);
+ if (run != NULL) {
+#ifdef JEMALLOC_STATS
+ bin->stats.nruns++;
+ bin->stats.curruns++;
+ if (bin->stats.curruns > bin->stats.highruns)
+ bin->stats.highruns = bin->stats.curruns;
+#endif
+ return (run);
+ }
+
+ /*
+ * arena_run_alloc() failed, but another thread may have made
+ * sufficient memory available while this one dopped bin->lock above,
+ * so search one more time.
+ */
+ mapelm = arena_run_tree_first(&bin->runs);
+ if (mapelm != NULL) {
+ arena_chunk_t *chunk;
+ size_t pageind;
+
+ /* run is guaranteed to have available space. */
+ arena_run_tree_remove(&bin->runs, mapelm);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm);
+ pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
+ sizeof(arena_chunk_map_t));
+ run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
+ (mapelm->bits >> PAGE_SHIFT))
+ << PAGE_SHIFT));
+#ifdef JEMALLOC_STATS
+ bin->stats.reruns++;
+#endif
+ return (run);
+ }
+
+ return (NULL);
+}
+
+/* Re-fill bin->runcur, then call arena_run_reg_alloc(). */
+static void *
+arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
+{
+ void *ret;
+ arena_run_t *run;
+
+ bin->runcur = NULL;
+ run = arena_bin_nonfull_run_get(arena, bin);
+ if (bin->runcur != NULL && bin->runcur->nfree > 0) {
+ /*
+ * Another thread updated runcur while this one ran without the
+ * bin lock in arena_bin_nonfull_run_get().
+ */
+ assert(bin->runcur->magic == ARENA_RUN_MAGIC);
+ assert(bin->runcur->nfree > 0);
+ ret = arena_run_reg_alloc(bin->runcur, bin);
+ if (run != NULL) {
+ malloc_mutex_unlock(&bin->lock);
+ malloc_mutex_lock(&arena->lock);
+ arena_run_dalloc(arena, run, false);
+ malloc_mutex_unlock(&arena->lock);
+ malloc_mutex_lock(&bin->lock);
+ }
+ return (ret);
+ }
+
+ if (run == NULL)
+ return (NULL);
+
+ bin->runcur = run;
+
+ assert(bin->runcur->magic == ARENA_RUN_MAGIC);
+ assert(bin->runcur->nfree > 0);
+
+ return (arena_run_reg_alloc(bin->runcur, bin));
+}
+
+#ifdef JEMALLOC_PROF
+void
+arena_prof_accum(arena_t *arena, uint64_t accumbytes)
+{
+
+ if (prof_interval != 0) {
+ arena->prof_accumbytes += accumbytes;
+ if (arena->prof_accumbytes >= prof_interval) {
+ prof_idump();
+ arena->prof_accumbytes -= prof_interval;
+ }
+ }
+}
+#endif
+
+#ifdef JEMALLOC_TCACHE
+void
+arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind
+# ifdef JEMALLOC_PROF
+ , uint64_t prof_accumbytes
+# endif
+ )
+{
+ unsigned i, nfill;
+ arena_bin_t *bin;
+ arena_run_t *run;
+ void *ptr;
+
+ assert(tbin->ncached == 0);
+
+#ifdef JEMALLOC_PROF
+ malloc_mutex_lock(&arena->lock);
+ arena_prof_accum(arena, prof_accumbytes);
+ malloc_mutex_unlock(&arena->lock);
+#endif
+ bin = &arena->bins[binind];
+ malloc_mutex_lock(&bin->lock);
+ for (i = 0, nfill = (tbin->ncached_max >> 1); i < nfill; i++) {
+ if ((run = bin->runcur) != NULL && run->nfree > 0)
+ ptr = arena_run_reg_alloc(run, bin);
+ else
+ ptr = arena_bin_malloc_hard(arena, bin);
+ if (ptr == NULL)
+ break;
+ *(void **)ptr = tbin->avail;
+ tbin->avail = ptr;
+ }
+#ifdef JEMALLOC_STATS
+ bin->stats.allocated += (i - tbin->ncached) * bin->reg_size;
+ bin->stats.nmalloc += i;
+ bin->stats.nrequests += tbin->tstats.nrequests;
+ bin->stats.nfills++;
+ tbin->tstats.nrequests = 0;
+#endif
+ malloc_mutex_unlock(&bin->lock);
+ tbin->ncached = i;
+ if (tbin->ncached > tbin->high_water)
+ tbin->high_water = tbin->ncached;
+}
+#endif
+
+/*
+ * Calculate bin->run_size such that it meets the following constraints:
+ *
+ * *) bin->run_size >= min_run_size
+ * *) bin->run_size <= arena_maxclass
+ * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
+ * *) run header size < PAGE_SIZE
+ *
+ * bin->nregs and bin->reg0_offset are also calculated here, since these
+ * settings are all interdependent.
+ */
+static size_t
+arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
+{
+ size_t try_run_size, good_run_size;
+ uint32_t try_nregs, good_nregs;
+ uint32_t try_hdr_size, good_hdr_size;
+#ifdef JEMALLOC_PROF
+ uint32_t try_cnt0_offset, good_cnt0_offset;
+#endif
+ uint32_t try_reg0_offset, good_reg0_offset;
+
+ assert(min_run_size >= PAGE_SIZE);
+ assert(min_run_size <= arena_maxclass);
+
+ /*
+ * Calculate known-valid settings before entering the run_size
+ * expansion loop, so that the first part of the loop always copies
+ * valid settings.
+ *
+ * The do..while loop iteratively reduces the number of regions until
+ * the run header and the regions no longer overlap. A closed formula
+ * would be quite messy, since there is an interdependency between the
+ * header's mask length and the number of regions.
+ */
+ try_run_size = min_run_size;
+ try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
+ + 1; /* Counter-act try_nregs-- in loop. */
+ do {
+ try_nregs--;
+ try_hdr_size = sizeof(arena_run_t);
+#ifdef JEMALLOC_PROF
+ if (opt_prof && prof_promote == false) {
+ /* Pad to a quantum boundary. */
+ try_hdr_size = QUANTUM_CEILING(try_hdr_size);
+ try_cnt0_offset = try_hdr_size;
+ /* Add space for one (prof_thr_cnt_t *) per region. */
+ try_hdr_size += try_nregs * sizeof(prof_thr_cnt_t *);
+ } else
+ try_cnt0_offset = 0;
+#endif
+ try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
+ } while (try_hdr_size > try_reg0_offset);
+
+ /* run_size expansion loop. */
+ do {
+ /*
+ * Copy valid settings before trying more aggressive settings.
+ */
+ good_run_size = try_run_size;
+ good_nregs = try_nregs;
+ good_hdr_size = try_hdr_size;
+#ifdef JEMALLOC_PROF
+ good_cnt0_offset = try_cnt0_offset;
+#endif
+ good_reg0_offset = try_reg0_offset;
+
+ /* Try more aggressive settings. */
+ try_run_size += PAGE_SIZE;
+ try_nregs = ((try_run_size - sizeof(arena_run_t)) /
+ bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
+ do {
+ try_nregs--;
+ try_hdr_size = sizeof(arena_run_t);
+#ifdef JEMALLOC_PROF
+ if (opt_prof && prof_promote == false) {
+ /* Pad to a quantum boundary. */
+ try_hdr_size = QUANTUM_CEILING(try_hdr_size);
+ try_cnt0_offset = try_hdr_size;
+ /*
+ * Add space for one (prof_thr_cnt_t *) per
+ * region.
+ */
+ try_hdr_size += try_nregs *
+ sizeof(prof_thr_cnt_t *);
+ }
+#endif
+ try_reg0_offset = try_run_size - (try_nregs *
+ bin->reg_size);
+ } while (try_hdr_size > try_reg0_offset);
+ } while (try_run_size <= arena_maxclass
+ && try_run_size <= arena_maxclass
+ && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
+ && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
+ && try_hdr_size < PAGE_SIZE);
+
+ assert(good_hdr_size <= good_reg0_offset);
+
+ /* Copy final settings. */
+ bin->run_size = good_run_size;
+ bin->nregs = good_nregs;
+#ifdef JEMALLOC_PROF
+ bin->cnt0_offset = good_cnt0_offset;
+#endif
+ bin->reg0_offset = good_reg0_offset;
+
+ return (good_run_size);
+}
+
+void *
+arena_malloc_small(arena_t *arena, size_t size, bool zero)
+{
+ void *ret;
+ arena_bin_t *bin;
+ arena_run_t *run;
+ size_t binind;
+
+ binind = small_size2bin[size];
+ assert(binind < nbins);
+ bin = &arena->bins[binind];
+ size = bin->reg_size;
+
+ malloc_mutex_lock(&bin->lock);
+ if ((run = bin->runcur) != NULL && run->nfree > 0)
+ ret = arena_run_reg_alloc(run, bin);
+ else
+ ret = arena_bin_malloc_hard(arena, bin);
+
+ if (ret == NULL) {
+ malloc_mutex_unlock(&bin->lock);
+ return (NULL);
+ }
+
+#ifdef JEMALLOC_STATS
+ bin->stats.allocated += size;
+ bin->stats.nmalloc++;
+ bin->stats.nrequests++;
+#endif
+ malloc_mutex_unlock(&bin->lock);
+#ifdef JEMALLOC_PROF
+ if (isthreaded == false) {
+ malloc_mutex_lock(&arena->lock);
+ arena_prof_accum(arena, size);
+ malloc_mutex_unlock(&arena->lock);
+ }
+#endif
+
+ if (zero == false) {
+#ifdef JEMALLOC_FILL
+ if (opt_junk)
+ memset(ret, 0xa5, size);
+ else if (opt_zero)
+ memset(ret, 0, size);
+#endif
+ } else
+ memset(ret, 0, size);
+
+ return (ret);
+}
+
+void *
+arena_malloc_large(arena_t *arena, size_t size, bool zero)
+{
+ void *ret;
+
+ /* Large allocation. */
+ size = PAGE_CEILING(size);
+ malloc_mutex_lock(&arena->lock);
+ ret = (void *)arena_run_alloc(arena, size, true, zero);
+ if (ret == NULL) {
+ malloc_mutex_unlock(&arena->lock);
+ return (NULL);
+ }
+#ifdef JEMALLOC_STATS
+ arena->stats.nmalloc_large++;
+ arena->stats.nrequests_large++;
+ arena->stats.allocated_large += size;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
+ if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
+ }
+#endif
+#ifdef JEMALLOC_PROF
+ arena_prof_accum(arena, size);
+#endif
+ malloc_mutex_unlock(&arena->lock);
+
+ if (zero == false) {
+#ifdef JEMALLOC_FILL
+ if (opt_junk)
+ memset(ret, 0xa5, size);
+ else if (opt_zero)
+ memset(ret, 0, size);
+#endif
+ }
+
+ return (ret);
+}
+
+void *
+arena_malloc(size_t size, bool zero)
+{
+
+ assert(size != 0);
+ assert(QUANTUM_CEILING(size) <= arena_maxclass);
+
+ if (size <= small_maxclass) {
+#ifdef JEMALLOC_TCACHE
+ tcache_t *tcache;
+
+ if ((tcache = tcache_get()) != NULL)
+ return (tcache_alloc_small(tcache, size, zero));
+ else
+
+#endif
+ return (arena_malloc_small(choose_arena(), size, zero));
+ } else {
+#ifdef JEMALLOC_TCACHE
+ if (size <= tcache_maxclass) {
+ tcache_t *tcache;
+
+ if ((tcache = tcache_get()) != NULL)
+ return (tcache_alloc_large(tcache, size, zero));
+ else {
+ return (arena_malloc_large(choose_arena(),
+ size, zero));
+ }
+ } else
+#endif
+ return (arena_malloc_large(choose_arena(), size, zero));
+ }
+}
+
+/* Only handles large allocations that require more than page alignment. */
+void *
+arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
+{
+ void *ret;
+ size_t offset;
+ arena_chunk_t *chunk;
+
+ assert((size & PAGE_MASK) == 0);
+ assert((alignment & PAGE_MASK) == 0);
+
+ malloc_mutex_lock(&arena->lock);
+ ret = (void *)arena_run_alloc(arena, alloc_size, true, false);
+ if (ret == NULL) {
+ malloc_mutex_unlock(&arena->lock);
+ return (NULL);
+ }
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
+
+ offset = (uintptr_t)ret & (alignment - 1);
+ assert((offset & PAGE_MASK) == 0);
+ assert(offset < alloc_size);
+ if (offset == 0)
+ arena_run_trim_tail(arena, chunk, ret, alloc_size, size, false);
+ else {
+ size_t leadsize, trailsize;
+
+ leadsize = alignment - offset;
+ if (leadsize > 0) {
+ arena_run_trim_head(arena, chunk, ret, alloc_size,
+ alloc_size - leadsize);
+ ret = (void *)((uintptr_t)ret + leadsize);
+ }
+
+ trailsize = alloc_size - leadsize - size;
+ if (trailsize != 0) {
+ /* Trim trailing space. */
+ assert(trailsize < alloc_size);
+ arena_run_trim_tail(arena, chunk, ret, size + trailsize,
+ size, false);
+ }
+ }
+
+#ifdef JEMALLOC_STATS
+ arena->stats.nmalloc_large++;
+ arena->stats.nrequests_large++;
+ arena->stats.allocated_large += size;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
+ if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
+ }
+#endif
+ malloc_mutex_unlock(&arena->lock);
+
+#ifdef JEMALLOC_FILL
+ if (opt_junk)
+ memset(ret, 0xa5, size);
+ else if (opt_zero)
+ memset(ret, 0, size);
+#endif
+ return (ret);
+}
+
+/* Return the size of the allocation pointed to by ptr. */
+size_t
+arena_salloc(const void *ptr)
+{
+ size_t ret;
+ arena_chunk_t *chunk;
+ size_t pageind, mapbits;
+
+ assert(ptr != NULL);
+ assert(CHUNK_ADDR2BASE(ptr) != ptr);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
+ mapbits = chunk->map[pageind].bits;
+ assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
+ if ((mapbits & CHUNK_MAP_LARGE) == 0) {
+ arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
+ (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
+ PAGE_SHIFT));
+ assert(run->magic == ARENA_RUN_MAGIC);
+ assert(((uintptr_t)ptr - ((uintptr_t)run +
+ (uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size ==
+ 0);
+ ret = run->bin->reg_size;
+ } else {
+ assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+ ret = mapbits & ~PAGE_MASK;
+ assert(ret != 0);
+ }
+
+ return (ret);
+}
+
+#ifdef JEMALLOC_PROF
+void
+arena_prof_promoted(const void *ptr, size_t size)
+{
+ arena_chunk_t *chunk;
+ size_t pageind, binind;
+
+ assert(ptr != NULL);
+ assert(CHUNK_ADDR2BASE(ptr) != ptr);
+ assert(isalloc(ptr) == PAGE_SIZE);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
+ binind = small_size2bin[size];
+ assert(binind < nbins);
+ chunk->map[pageind].bits = (chunk->map[pageind].bits &
+ ~CHUNK_MAP_CLASS_MASK) | (binind << CHUNK_MAP_CLASS_SHIFT);
+}
+
+size_t
+arena_salloc_demote(const void *ptr)
+{
+ size_t ret;
+ arena_chunk_t *chunk;
+ size_t pageind, mapbits;
+
+ assert(ptr != NULL);
+ assert(CHUNK_ADDR2BASE(ptr) != ptr);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
+ mapbits = chunk->map[pageind].bits;
+ assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
+ if ((mapbits & CHUNK_MAP_LARGE) == 0) {
+ arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
+ (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
+ PAGE_SHIFT));
+ assert(run->magic == ARENA_RUN_MAGIC);
+ assert(((uintptr_t)ptr - ((uintptr_t)run +
+ (uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size ==
+ 0);
+ ret = run->bin->reg_size;
+ } else {
+ assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+ ret = mapbits & ~PAGE_MASK;
+ if (prof_promote && ret == PAGE_SIZE && (mapbits &
+ CHUNK_MAP_CLASS_MASK) != CHUNK_MAP_CLASS_MASK) {
+ size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >>
+ CHUNK_MAP_CLASS_SHIFT);
+ assert(binind < nbins);
+ ret = chunk->arena->bins[binind].reg_size;
+ }
+ assert(ret != 0);
+ }
+
+ return (ret);
+}
+
+static inline unsigned
+arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
+ size_t size)
+{
+ unsigned shift, diff, regind;
+
+ assert(run->magic == ARENA_RUN_MAGIC);
+
+ /*
+ * Avoid doing division with a variable divisor if possible. Using
+ * actual division here can reduce allocator throughput by over 20%!
+ */
+ diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
+
+ /* Rescale (factor powers of 2 out of the numerator and denominator). */
+ shift = ffs(size) - 1;
+ diff >>= shift;
+ size >>= shift;
+
+ if (size == 1) {
+ /* The divisor was a power of 2. */
+ regind = diff;
+ } else {
+ /*
+ * To divide by a number D that is not a power of two we
+ * multiply by (2^21 / D) and then right shift by 21 positions.
+ *
+ * X / D
+ *
+ * becomes
+ *
+ * (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
+ *
+ * We can omit the first three elements, because we never
+ * divide by 0, and 1 and 2 are both powers of two, which are
+ * handled above.
+ */
+#define SIZE_INV_SHIFT 21
+#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
+ static const unsigned size_invs[] = {
+ SIZE_INV(3),
+ SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
+ SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
+ SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
+ SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
+ SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
+ SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
+ SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
+ };
+
+ if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
+ regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
+ else
+ regind = diff / size;
+#undef SIZE_INV
+#undef SIZE_INV_SHIFT
+ }
+ assert(diff == regind * size);
+ assert(regind < bin->nregs);
+
+ return (regind);
+}
+
+prof_thr_cnt_t *
+arena_prof_cnt_get(const void *ptr)
+{
+ prof_thr_cnt_t *ret;
+ arena_chunk_t *chunk;
+ size_t pageind, mapbits;
+
+ assert(ptr != NULL);
+ assert(CHUNK_ADDR2BASE(ptr) != ptr);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
+ mapbits = chunk->map[pageind].bits;
+ assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
+ if ((mapbits & CHUNK_MAP_LARGE) == 0) {
+ if (prof_promote)
+ ret = (prof_thr_cnt_t *)(uintptr_t)1U;
+ else {
+ arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
+ (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
+ PAGE_SHIFT));
+ arena_bin_t *bin = run->bin;
+ unsigned regind;
+
+ assert(run->magic == ARENA_RUN_MAGIC);
+ regind = arena_run_regind(run, bin, ptr, bin->reg_size);
+ ret = *(prof_thr_cnt_t **)((uintptr_t)run +
+ bin->cnt0_offset + (regind *
+ sizeof(prof_thr_cnt_t *)));
+ }
+ } else
+ ret = chunk->map[pageind].prof_cnt;
+
+ return (ret);
+}
+
+void
+arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
+{
+ arena_chunk_t *chunk;
+ size_t pageind, mapbits;
+
+ assert(ptr != NULL);
+ assert(CHUNK_ADDR2BASE(ptr) != ptr);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
+ mapbits = chunk->map[pageind].bits;
+ assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
+ if ((mapbits & CHUNK_MAP_LARGE) == 0) {
+ if (prof_promote == false) {
+ arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
+ (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
+ PAGE_SHIFT));
+ arena_bin_t *bin = run->bin;
+ unsigned regind;
+
+ assert(run->magic == ARENA_RUN_MAGIC);
+ regind = arena_run_regind(run, bin, ptr, bin->reg_size);
+
+ *((prof_thr_cnt_t **)((uintptr_t)run + bin->cnt0_offset
+ + (regind * sizeof(prof_thr_cnt_t *)))) = cnt;
+ } else
+ assert((uintptr_t)cnt == (uintptr_t)1U);
+ } else
+ chunk->map[pageind].prof_cnt = cnt;
+}
+#endif
+
+static void
+arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
+ arena_bin_t *bin)
+{
+ size_t npages, run_ind, past;
+
+ /* Dissociate run from bin. */
+ if (run == bin->runcur)
+ bin->runcur = NULL;
+ else if (bin->nregs != 1) {
+ size_t run_pageind = (((uintptr_t)run - (uintptr_t)chunk)) >>
+ PAGE_SHIFT;
+ arena_chunk_map_t *run_mapelm = &chunk->map[run_pageind];
+ /*
+ * This block's conditional is necessary because if the run
+ * only contains one region, then it never gets inserted into
+ * the non-full runs tree.
+ */
+ arena_run_tree_remove(&bin->runs, run_mapelm);
+ }
+
+ malloc_mutex_unlock(&bin->lock);
+ /******************************/
+ npages = bin->run_size >> PAGE_SHIFT;
+ run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT);
+ past = (size_t)(((uintptr_t)run->next - (uintptr_t)1U -
+ (uintptr_t)chunk) >> PAGE_SHIFT) + 1;
+ malloc_mutex_lock(&arena->lock);
+
+ /*
+ * If the run was originally clean, and some pages were never touched,
+ * trim the clean pages before deallocating the dirty portion of the
+ * run.
+ */
+ if ((chunk->map[run_ind].bits & CHUNK_MAP_DIRTY) == 0 && past - run_ind
+ < npages) {
+ /*
+ * Trim clean pages. Convert to large run beforehand. Set the
+ * last map element first, in case this is a one-page run.
+ */
+ chunk->map[run_ind+npages-1].bits = CHUNK_MAP_LARGE |
+ (chunk->map[run_ind].bits & CHUNK_MAP_FLAGS_MASK);
+ chunk->map[run_ind].bits = bin->run_size | CHUNK_MAP_LARGE |
+ (chunk->map[run_ind].bits & CHUNK_MAP_FLAGS_MASK);
+ arena_run_trim_tail(arena, chunk, run, (npages << PAGE_SHIFT),
+ ((npages - (past - run_ind)) << PAGE_SHIFT), false);
+ npages = past - run_ind;
+ }
+#ifdef JEMALLOC_DEBUG
+ run->magic = 0;
+#endif
+ arena_run_dalloc(arena, run, true);
+ malloc_mutex_unlock(&arena->lock);
+ /****************************/
+ malloc_mutex_lock(&bin->lock);
+#ifdef JEMALLOC_STATS
+ bin->stats.curruns--;
+#endif
+}
+
+void
+arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+ arena_chunk_map_t *mapelm)
+{
+ size_t pageind;
+ arena_run_t *run;
+ arena_bin_t *bin;
+#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS))
+ size_t size;
+#endif
+
+ pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
+ run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
+ (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT));
+ assert(run->magic == ARENA_RUN_MAGIC);
+ bin = run->bin;
+#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS))
+ size = bin->reg_size;
+#endif
+
+#ifdef JEMALLOC_FILL
+ if (opt_junk)
+ memset(ptr, 0x5a, size);
+#endif
+
+ arena_run_reg_dalloc(run, ptr);
+
+ if (run->nfree == bin->nregs)
+ arena_dalloc_bin_run(arena, chunk, run, bin);
+ else if (run->nfree == 1 && run != bin->runcur) {
+ /*
+ * Make sure that bin->runcur always refers to the lowest
+ * non-full run, if one exists.
+ */
+ if (bin->runcur == NULL)
+ bin->runcur = run;
+ else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
+ /* Switch runcur. */
+ if (bin->runcur->nfree > 0) {
+ arena_chunk_t *runcur_chunk =
+ CHUNK_ADDR2BASE(bin->runcur);
+ size_t runcur_pageind =
+ (((uintptr_t)bin->runcur -
+ (uintptr_t)runcur_chunk)) >> PAGE_SHIFT;
+ arena_chunk_map_t *runcur_mapelm =
+ &runcur_chunk->map[runcur_pageind];
+
+ /* Insert runcur. */
+ arena_run_tree_insert(&bin->runs,
+ runcur_mapelm);
+ }
+ bin->runcur = run;
+ } else {
+ size_t run_pageind = (((uintptr_t)run -
+ (uintptr_t)chunk)) >> PAGE_SHIFT;
+ arena_chunk_map_t *run_mapelm =
+ &chunk->map[run_pageind];
+
+ assert(arena_run_tree_search(&bin->runs, run_mapelm) ==
+ NULL);
+ arena_run_tree_insert(&bin->runs, run_mapelm);
+ }
+ }
+
+#ifdef JEMALLOC_STATS
+ bin->stats.allocated -= size;
+ bin->stats.ndalloc++;
+#endif
+}
+
+#ifdef JEMALLOC_STATS
+void
+arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty,
+ arena_stats_t *astats, malloc_bin_stats_t *bstats,
+ malloc_large_stats_t *lstats)
+{
+ unsigned i;
+
+ malloc_mutex_lock(&arena->lock);
+ *nactive += arena->nactive;
+ *ndirty += arena->ndirty;
+
+ astats->mapped += arena->stats.mapped;
+ astats->npurge += arena->stats.npurge;
+ astats->nmadvise += arena->stats.nmadvise;
+ astats->purged += arena->stats.purged;
+ astats->allocated_large += arena->stats.allocated_large;
+ astats->nmalloc_large += arena->stats.nmalloc_large;
+ astats->ndalloc_large += arena->stats.ndalloc_large;
+ astats->nrequests_large += arena->stats.nrequests_large;
+
+ for (i = 0; i < nlclasses; i++) {
+ lstats[i].nmalloc += arena->stats.lstats[i].nmalloc;
+ lstats[i].ndalloc += arena->stats.lstats[i].ndalloc;
+ lstats[i].nrequests += arena->stats.lstats[i].nrequests;
+ lstats[i].highruns += arena->stats.lstats[i].highruns;
+ lstats[i].curruns += arena->stats.lstats[i].curruns;
+ }
+ malloc_mutex_unlock(&arena->lock);
+
+ for (i = 0; i < nbins; i++) {
+ arena_bin_t *bin = &arena->bins[i];
+
+ malloc_mutex_lock(&bin->lock);
+ bstats[i].allocated += bin->stats.allocated;
+ bstats[i].nmalloc += bin->stats.nmalloc;
+ bstats[i].ndalloc += bin->stats.ndalloc;
+ bstats[i].nrequests += bin->stats.nrequests;
+#ifdef JEMALLOC_TCACHE
+ bstats[i].nfills += bin->stats.nfills;
+ bstats[i].nflushes += bin->stats.nflushes;
+#endif
+ bstats[i].nruns += bin->stats.nruns;
+ bstats[i].reruns += bin->stats.reruns;
+ bstats[i].highruns += bin->stats.highruns;
+ bstats[i].curruns += bin->stats.curruns;
+ malloc_mutex_unlock(&bin->lock);
+ }
+}
+#endif
+
+void
+arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
+{
+
+ /* Large allocation. */
+#ifdef JEMALLOC_FILL
+# ifndef JEMALLOC_STATS
+ if (opt_junk)
+# endif
+#endif
+ {
+#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS))
+ size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
+ PAGE_SHIFT;
+ size_t size = chunk->map[pageind].bits & ~PAGE_MASK;
+#endif
+
+#ifdef JEMALLOC_FILL
+# ifdef JEMALLOC_STATS
+ if (opt_junk)
+# endif
+ memset(ptr, 0x5a, size);
+#endif
+#ifdef JEMALLOC_STATS
+ arena->stats.ndalloc_large++;
+ arena->stats.allocated_large -= size;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].ndalloc++;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns--;
+#endif
+ }
+
+ arena_run_dalloc(arena, (arena_run_t *)ptr, true);
+}
+
+static void
+arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+ size_t size, size_t oldsize)
+{
+
+ assert(size < oldsize);
+
+ /*
+ * Shrink the run, and make trailing pages available for other
+ * allocations.
+ */
+ malloc_mutex_lock(&arena->lock);
+ arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size,
+ true);
+#ifdef JEMALLOC_STATS
+ arena->stats.ndalloc_large++;
+ arena->stats.allocated_large -= oldsize;
+ arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++;
+ arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--;
+
+ arena->stats.nmalloc_large++;
+ arena->stats.nrequests_large++;
+ arena->stats.allocated_large += size;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
+ if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
+ }
+#endif
+ malloc_mutex_unlock(&arena->lock);
+}
+
+static bool
+arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+ size_t size, size_t oldsize)
+{
+ size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+ size_t npages = oldsize >> PAGE_SHIFT;
+
+ assert(oldsize == (chunk->map[pageind].bits & ~PAGE_MASK));
+
+ /* Try to extend the run. */
+ assert(size > oldsize);
+ malloc_mutex_lock(&arena->lock);
+ if (pageind + npages < chunk_npages && (chunk->map[pageind+npages].bits
+ & CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[pageind+npages].bits &
+ ~PAGE_MASK) >= size - oldsize) {
+ /*
+ * The next run is available and sufficiently large. Split the
+ * following run, then merge the first part with the existing
+ * allocation.
+ */
+ arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk +
+ ((pageind+npages) << PAGE_SHIFT)), size - oldsize, true,
+ false);
+
+ chunk->map[pageind].bits = size | CHUNK_MAP_LARGE |
+ CHUNK_MAP_ALLOCATED;
+ chunk->map[pageind+npages].bits = CHUNK_MAP_LARGE |
+ CHUNK_MAP_ALLOCATED;
+
+#ifdef JEMALLOC_STATS
+ arena->stats.ndalloc_large++;
+ arena->stats.allocated_large -= oldsize;
+ arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++;
+ arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--;
+
+ arena->stats.nmalloc_large++;
+ arena->stats.nrequests_large++;
+ arena->stats.allocated_large += size;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
+ if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
+ }
+#endif
+ malloc_mutex_unlock(&arena->lock);
+ return (false);
+ }
+ malloc_mutex_unlock(&arena->lock);
+
+ return (true);
+}
+
+/*
+ * Try to resize a large allocation, in order to avoid copying. This will
+ * always fail if growing an object, and the following run is already in use.
+ */
+static bool
+arena_ralloc_large(void *ptr, size_t size, size_t oldsize)
+{
+ size_t psize;
+
+ psize = PAGE_CEILING(size);
+ if (psize == oldsize) {
+ /* Same size class. */
+#ifdef JEMALLOC_FILL
+ if (opt_junk && size < oldsize) {
+ memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize -
+ size);
+ }
+#endif
+ return (false);
+ } else {
+ arena_chunk_t *chunk;
+ arena_t *arena;
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ arena = chunk->arena;
+ assert(arena->magic == ARENA_MAGIC);
+
+ if (psize < oldsize) {
+#ifdef JEMALLOC_FILL
+ /* Fill before shrinking in order avoid a race. */
+ if (opt_junk) {
+ memset((void *)((uintptr_t)ptr + size), 0x5a,
+ oldsize - size);
+ }
+#endif
+ arena_ralloc_large_shrink(arena, chunk, ptr, psize,
+ oldsize);
+ return (false);
+ } else {
+ bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
+ psize, oldsize);
+#ifdef JEMALLOC_FILL
+ if (ret == false && opt_zero) {
+ memset((void *)((uintptr_t)ptr + oldsize), 0,
+ size - oldsize);
+ }
+#endif
+ return (ret);
+ }
+ }
+}
+
+void *
+arena_ralloc(void *ptr, size_t size, size_t oldsize)
+{
+ void *ret;
+ size_t copysize;
+
+ /* Try to avoid moving the allocation. */
+ if (oldsize <= arena_maxclass) {
+ if (oldsize <= small_maxclass) {
+ if (size <= small_maxclass && small_size2bin[size] ==
+ small_size2bin[oldsize])
+ goto IN_PLACE;
+ } else {
+ assert(size <= arena_maxclass);
+ if (size > small_maxclass) {
+ if (arena_ralloc_large(ptr, size, oldsize) ==
+ false)
+ return (ptr);
+ }
+ }
+ }
+
+ /*
+ * If we get here, then size and oldsize are different enough that we
+ * need to move the object. In that case, fall back to allocating new
+ * space and copying.
+ */
+ ret = arena_malloc(size, false);
+ if (ret == NULL)
+ return (NULL);
+
+ /* Junk/zero-filling were already done by arena_malloc(). */
+ copysize = (size < oldsize) ? size : oldsize;
+ memcpy(ret, ptr, copysize);
+ idalloc(ptr);
+ return (ret);
+IN_PLACE:
+#ifdef JEMALLOC_FILL
+ if (opt_junk && size < oldsize)
+ memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
+ else if (opt_zero && size > oldsize)
+ memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
+#endif
+ return (ptr);
+}
+
+bool
+arena_new(arena_t *arena, unsigned ind)
+{
+ unsigned i;
+ arena_bin_t *bin;
+ size_t prev_run_size;
+
+ arena->ind = ind;
+
+ if (malloc_mutex_init(&arena->lock))
+ return (true);
+
+#ifdef JEMALLOC_STATS
+ memset(&arena->stats, 0, sizeof(arena_stats_t));
+ arena->stats.lstats = (malloc_large_stats_t *)base_alloc(nlclasses *
+ sizeof(malloc_large_stats_t));
+ if (arena->stats.lstats == NULL)
+ return (true);
+ memset(arena->stats.lstats, 0, nlclasses *
+ sizeof(malloc_large_stats_t));
+# ifdef JEMALLOC_TCACHE
+ ql_new(&arena->tcache_ql);
+# endif
+#endif
+
+#ifdef JEMALLOC_PROF
+ arena->prof_accumbytes = 0;
+#endif
+
+ /* Initialize chunks. */
+ ql_new(&arena->chunks_dirty);
+ arena->spare = NULL;
+
+ arena->nactive = 0;
+ arena->ndirty = 0;
+ arena->npurgatory = 0;
+
+ arena_avail_tree_new(&arena->runs_avail_clean);
+ arena_avail_tree_new(&arena->runs_avail_dirty);
+
+ /* Initialize bins. */
+ prev_run_size = PAGE_SIZE;
+
+ i = 0;
+#ifdef JEMALLOC_TINY
+ /* (2^n)-spaced tiny bins. */
+ for (; i < ntbins; i++) {
+ bin = &arena->bins[i];
+ if (malloc_mutex_init(&bin->lock))
+ return (true);
+ bin->runcur = NULL;
+ arena_run_tree_new(&bin->runs);
+
+ bin->reg_size = (1U << (LG_TINY_MIN + i));
+
+ prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
+
+#ifdef JEMALLOC_STATS
+ memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
+#endif
+ }
+#endif
+
+ /* Quantum-spaced bins. */
+ for (; i < ntbins + nqbins; i++) {
+ bin = &arena->bins[i];
+ if (malloc_mutex_init(&bin->lock))
+ return (true);
+ bin->runcur = NULL;
+ arena_run_tree_new(&bin->runs);
+
+ bin->reg_size = (i - ntbins + 1) << LG_QUANTUM;
+
+ prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
+
+#ifdef JEMALLOC_STATS
+ memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
+#endif
+ }
+
+ /* Cacheline-spaced bins. */
+ for (; i < ntbins + nqbins + ncbins; i++) {
+ bin = &arena->bins[i];
+ if (malloc_mutex_init(&bin->lock))
+ return (true);
+ bin->runcur = NULL;
+ arena_run_tree_new(&bin->runs);
+
+ bin->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
+ LG_CACHELINE);
+
+ prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
+
+#ifdef JEMALLOC_STATS
+ memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
+#endif
+ }
+
+ /* Subpage-spaced bins. */
+ for (; i < nbins; i++) {
+ bin = &arena->bins[i];
+ if (malloc_mutex_init(&bin->lock))
+ return (true);
+ bin->runcur = NULL;
+ arena_run_tree_new(&bin->runs);
+
+ bin->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins))
+ << LG_SUBPAGE);
+
+ prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
+
+#ifdef JEMALLOC_STATS
+ memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
+#endif
+ }
+
+#ifdef JEMALLOC_DEBUG
+ arena->magic = ARENA_MAGIC;
+#endif
+
+ return (false);
+}
+
+#ifdef JEMALLOC_TINY
+/* Compute the smallest power of 2 that is >= x. */
+static size_t
+pow2_ceil(size_t x)
+{
+
+ x--;
+ x |= x >> 1;
+ x |= x >> 2;
+ x |= x >> 4;
+ x |= x >> 8;
+ x |= x >> 16;
+#if (SIZEOF_PTR == 8)
+ x |= x >> 32;
+#endif
+ x++;
+ return (x);
+}
+#endif
+
+#ifdef JEMALLOC_DEBUG
+static void
+small_size2bin_validate(void)
+{
+ size_t i, size, binind;
+
+ assert(small_size2bin[0] == 0xffU);
+ i = 1;
+# ifdef JEMALLOC_TINY
+ /* Tiny. */
+ for (; i < (1U << LG_TINY_MIN); i++) {
+ size = pow2_ceil(1U << LG_TINY_MIN);
+ binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
+ assert(small_size2bin[i] == binind);
+ }
+ for (; i < qspace_min; i++) {
+ size = pow2_ceil(i);
+ binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
+ assert(small_size2bin[i] == binind);
+ }
+# endif
+ /* Quantum-spaced. */
+ for (; i <= qspace_max; i++) {
+ size = QUANTUM_CEILING(i);
+ binind = ntbins + (size >> LG_QUANTUM) - 1;
+ assert(small_size2bin[i] == binind);
+ }
+ /* Cacheline-spaced. */
+ for (; i <= cspace_max; i++) {
+ size = CACHELINE_CEILING(i);
+ binind = ntbins + nqbins + ((size - cspace_min) >>
+ LG_CACHELINE);
+ assert(small_size2bin[i] == binind);
+ }
+ /* Sub-page. */
+ for (; i <= sspace_max; i++) {
+ size = SUBPAGE_CEILING(i);
+ binind = ntbins + nqbins + ncbins + ((size - sspace_min)
+ >> LG_SUBPAGE);
+ assert(small_size2bin[i] == binind);
+ }
+}
+#endif
+
+static bool
+small_size2bin_init(void)
+{
+
+ if (opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
+ || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
+ || sizeof(const_small_size2bin) != small_maxclass + 1)
+ return (small_size2bin_init_hard());
+
+ small_size2bin = const_small_size2bin;
+#ifdef JEMALLOC_DEBUG
+ assert(sizeof(const_small_size2bin) == small_maxclass + 1);
+ small_size2bin_validate();
+#endif
+ return (false);
+}
+
+static bool
+small_size2bin_init_hard(void)
+{
+ size_t i, size, binind;
+ uint8_t *custom_small_size2bin;
+
+ assert(opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
+ || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
+ || sizeof(const_small_size2bin) != small_maxclass + 1);
+
+ custom_small_size2bin = (uint8_t *)base_alloc(small_maxclass + 1);
+ if (custom_small_size2bin == NULL)
+ return (true);
+
+ custom_small_size2bin[0] = 0xffU;
+ i = 1;
+#ifdef JEMALLOC_TINY
+ /* Tiny. */
+ for (; i < (1U << LG_TINY_MIN); i++) {
+ size = pow2_ceil(1U << LG_TINY_MIN);
+ binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
+ custom_small_size2bin[i] = binind;
+ }
+ for (; i < qspace_min; i++) {
+ size = pow2_ceil(i);
+ binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
+ custom_small_size2bin[i] = binind;
+ }
+#endif
+ /* Quantum-spaced. */
+ for (; i <= qspace_max; i++) {
+ size = QUANTUM_CEILING(i);
+ binind = ntbins + (size >> LG_QUANTUM) - 1;
+ custom_small_size2bin[i] = binind;
+ }
+ /* Cacheline-spaced. */
+ for (; i <= cspace_max; i++) {
+ size = CACHELINE_CEILING(i);
+ binind = ntbins + nqbins + ((size - cspace_min) >>
+ LG_CACHELINE);
+ custom_small_size2bin[i] = binind;
+ }
+ /* Sub-page. */
+ for (; i <= sspace_max; i++) {
+ size = SUBPAGE_CEILING(i);
+ binind = ntbins + nqbins + ncbins + ((size - sspace_min) >>
+ LG_SUBPAGE);
+ custom_small_size2bin[i] = binind;
+ }
+
+ small_size2bin = custom_small_size2bin;
+#ifdef JEMALLOC_DEBUG
+ small_size2bin_validate();
+#endif
+ return (false);
+}
+
+bool
+arena_boot(void)
+{
+ size_t header_size;
+
+ /* Set variables according to the value of opt_lg_[qc]space_max. */
+ qspace_max = (1U << opt_lg_qspace_max);
+ cspace_min = CACHELINE_CEILING(qspace_max);
+ if (cspace_min == qspace_max)
+ cspace_min += CACHELINE;
+ cspace_max = (1U << opt_lg_cspace_max);
+ sspace_min = SUBPAGE_CEILING(cspace_max);
+ if (sspace_min == cspace_max)
+ sspace_min += SUBPAGE;
+ assert(sspace_min < PAGE_SIZE);
+ sspace_max = PAGE_SIZE - SUBPAGE;
+
+#ifdef JEMALLOC_TINY
+ assert(LG_QUANTUM >= LG_TINY_MIN);
+#endif
+ assert(ntbins <= LG_QUANTUM);
+ nqbins = qspace_max >> LG_QUANTUM;
+ ncbins = ((cspace_max - cspace_min) >> LG_CACHELINE) + 1;
+ nsbins = ((sspace_max - sspace_min) >> LG_SUBPAGE) + 1;
+ nbins = ntbins + nqbins + ncbins + nsbins;
+
+ /*
+ * The small_size2bin lookup table uses uint8_t to encode each bin
+ * index, so we cannot support more than 256 small size classes. This
+ * limit is difficult to exceed (not even possible with 16B quantum and
+ * 4KiB pages), and such configurations are impractical, but
+ * nonetheless we need to protect against this case in order to avoid
+ * undefined behavior.
+ *
+ * Further constrain nbins to 255 if prof_promote is true, since all
+ * small size classes, plus a "not small" size class must be stored in
+ * 8 bits of arena_chunk_map_t's bits field.
+ */
+#ifdef JEMALLOC_PROF
+ if (opt_prof && prof_promote) {
+ if (nbins > 255) {
+ char line_buf[UMAX2S_BUFSIZE];
+ malloc_write("<jemalloc>: Too many small size classes (");
+ malloc_write(umax2s(nbins, 10, line_buf));
+ malloc_write(" > max 255)\n");
+ abort();
+ }
+ } else
+#endif
+ if (nbins > 256) {
+ char line_buf[UMAX2S_BUFSIZE];
+ malloc_write("<jemalloc>: Too many small size classes (");
+ malloc_write(umax2s(nbins, 10, line_buf));
+ malloc_write(" > max 256)\n");
+ abort();
+ }
+
+ if (small_size2bin_init())
+ return (true);
+
+ /*
+ * Compute the header size such that it is large enough to contain the
+ * page map.
+ */
+ header_size = sizeof(arena_chunk_t) +
+ (sizeof(arena_chunk_map_t) * (chunk_npages - 1));
+ arena_chunk_header_npages = (header_size >> PAGE_SHIFT) +
+ ((header_size & PAGE_MASK) != 0);
+ arena_maxclass = chunksize - (arena_chunk_header_npages << PAGE_SHIFT);
+
+ return (false);
+}
diff --git a/dep/src/jmalloc/base.c b/dep/src/jmalloc/base.c
new file mode 100644
index 00000000000..605197eaced
--- /dev/null
+++ b/dep/src/jmalloc/base.c
@@ -0,0 +1,106 @@
+#define JEMALLOC_BASE_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+malloc_mutex_t base_mtx;
+
+/*
+ * Current pages that are being used for internal memory allocations. These
+ * pages are carved up in cacheline-size quanta, so that there is no chance of
+ * false cache line sharing.
+ */
+static void *base_pages;
+static void *base_next_addr;
+static void *base_past_addr; /* Addr immediately past base_pages. */
+static extent_node_t *base_nodes;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static bool base_pages_alloc(size_t minsize);
+
+/******************************************************************************/
+
+static bool
+base_pages_alloc(size_t minsize)
+{
+ size_t csize;
+ bool zero;
+
+ assert(minsize != 0);
+ csize = CHUNK_CEILING(minsize);
+ zero = false;
+ base_pages = chunk_alloc(csize, &zero);
+ if (base_pages == NULL)
+ return (true);
+ base_next_addr = base_pages;
+ base_past_addr = (void *)((uintptr_t)base_pages + csize);
+
+ return (false);
+}
+
+void *
+base_alloc(size_t size)
+{
+ void *ret;
+ size_t csize;
+
+ /* Round size up to nearest multiple of the cacheline size. */
+ csize = CACHELINE_CEILING(size);
+
+ malloc_mutex_lock(&base_mtx);
+ /* Make sure there's enough space for the allocation. */
+ if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
+ if (base_pages_alloc(csize)) {
+ malloc_mutex_unlock(&base_mtx);
+ return (NULL);
+ }
+ }
+ /* Allocate. */
+ ret = base_next_addr;
+ base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
+ malloc_mutex_unlock(&base_mtx);
+
+ return (ret);
+}
+
+extent_node_t *
+base_node_alloc(void)
+{
+ extent_node_t *ret;
+
+ malloc_mutex_lock(&base_mtx);
+ if (base_nodes != NULL) {
+ ret = base_nodes;
+ base_nodes = *(extent_node_t **)ret;
+ malloc_mutex_unlock(&base_mtx);
+ } else {
+ malloc_mutex_unlock(&base_mtx);
+ ret = (extent_node_t *)base_alloc(sizeof(extent_node_t));
+ }
+
+ return (ret);
+}
+
+void
+base_node_dealloc(extent_node_t *node)
+{
+
+ malloc_mutex_lock(&base_mtx);
+ *(extent_node_t **)node = base_nodes;
+ base_nodes = node;
+ malloc_mutex_unlock(&base_mtx);
+}
+
+bool
+base_boot(void)
+{
+
+ base_nodes = NULL;
+ if (malloc_mutex_init(&base_mtx))
+ return (true);
+
+ return (false);
+}
diff --git a/dep/src/jmalloc/chunk.c b/dep/src/jmalloc/chunk.c
new file mode 100644
index 00000000000..e6e3bcd195a
--- /dev/null
+++ b/dep/src/jmalloc/chunk.c
@@ -0,0 +1,150 @@
+#define JEMALLOC_CHUNK_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+size_t opt_lg_chunk = LG_CHUNK_DEFAULT;
+#ifdef JEMALLOC_SWAP
+bool opt_overcommit = true;
+#endif
+
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+malloc_mutex_t chunks_mtx;
+chunk_stats_t stats_chunks;
+#endif
+
+/* Various chunk-related settings. */
+size_t chunksize;
+size_t chunksize_mask; /* (chunksize - 1). */
+size_t chunk_npages;
+size_t arena_chunk_header_npages;
+size_t arena_maxclass; /* Max size class for arenas. */
+
+/******************************************************************************/
+
+/*
+ * If the caller specifies (*zero == false), it is still possible to receive
+ * zeroed memory, in which case *zero is toggled to true. arena_chunk_alloc()
+ * takes advantage of this to avoid demanding zeroed chunks, but taking
+ * advantage of them if they are returned.
+ */
+void *
+chunk_alloc(size_t size, bool *zero)
+{
+ void *ret;
+
+ assert(size != 0);
+ assert((size & chunksize_mask) == 0);
+
+#ifdef JEMALLOC_SWAP
+ if (swap_enabled) {
+ ret = chunk_alloc_swap(size, zero);
+ if (ret != NULL)
+ goto RETURN;
+ }
+
+ if (swap_enabled == false || opt_overcommit) {
+#endif
+#ifdef JEMALLOC_DSS
+ ret = chunk_alloc_dss(size, zero);
+ if (ret != NULL)
+ goto RETURN;
+#endif
+ ret = chunk_alloc_mmap(size);
+ if (ret != NULL) {
+ *zero = true;
+ goto RETURN;
+ }
+#ifdef JEMALLOC_SWAP
+ }
+#endif
+
+ /* All strategies for allocation failed. */
+ ret = NULL;
+RETURN:
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+ if (ret != NULL) {
+# ifdef JEMALLOC_PROF
+ bool udump;
+# endif
+ malloc_mutex_lock(&chunks_mtx);
+# ifdef JEMALLOC_STATS
+ stats_chunks.nchunks += (size / chunksize);
+# endif
+ stats_chunks.curchunks += (size / chunksize);
+ if (stats_chunks.curchunks > stats_chunks.highchunks) {
+ stats_chunks.highchunks = stats_chunks.curchunks;
+# ifdef JEMALLOC_PROF
+ udump = true;
+# endif
+ }
+# ifdef JEMALLOC_PROF
+ else
+ udump = false;
+# endif
+ malloc_mutex_unlock(&chunks_mtx);
+# ifdef JEMALLOC_PROF
+ if (opt_prof && opt_prof_udump && udump)
+ prof_udump();
+# endif
+ }
+#endif
+
+ assert(CHUNK_ADDR2BASE(ret) == ret);
+ return (ret);
+}
+
+void
+chunk_dealloc(void *chunk, size_t size)
+{
+
+ assert(chunk != NULL);
+ assert(CHUNK_ADDR2BASE(chunk) == chunk);
+ assert(size != 0);
+ assert((size & chunksize_mask) == 0);
+
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+ malloc_mutex_lock(&chunks_mtx);
+ stats_chunks.curchunks -= (size / chunksize);
+ malloc_mutex_unlock(&chunks_mtx);
+#endif
+
+#ifdef JEMALLOC_SWAP
+ if (swap_enabled && chunk_dealloc_swap(chunk, size) == false)
+ return;
+#endif
+#ifdef JEMALLOC_DSS
+ if (chunk_dealloc_dss(chunk, size) == false)
+ return;
+#endif
+ chunk_dealloc_mmap(chunk, size);
+}
+
+bool
+chunk_boot(void)
+{
+
+ /* Set variables according to the value of opt_lg_chunk. */
+ chunksize = (1LU << opt_lg_chunk);
+ assert(chunksize >= PAGE_SIZE);
+ chunksize_mask = chunksize - 1;
+ chunk_npages = (chunksize >> PAGE_SHIFT);
+
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+ if (malloc_mutex_init(&chunks_mtx))
+ return (true);
+ memset(&stats_chunks, 0, sizeof(chunk_stats_t));
+#endif
+
+#ifdef JEMALLOC_SWAP
+ if (chunk_swap_boot())
+ return (true);
+#endif
+#ifdef JEMALLOC_DSS
+ if (chunk_dss_boot())
+ return (true);
+#endif
+
+ return (false);
+}
diff --git a/dep/src/jmalloc/chunk_dss.c b/dep/src/jmalloc/chunk_dss.c
new file mode 100644
index 00000000000..d9bd63c3ac4
--- /dev/null
+++ b/dep/src/jmalloc/chunk_dss.c
@@ -0,0 +1,268 @@
+#define JEMALLOC_CHUNK_DSS_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+#ifdef JEMALLOC_DSS
+/******************************************************************************/
+/* Data. */
+
+malloc_mutex_t dss_mtx;
+
+/* Base address of the DSS. */
+static void *dss_base;
+/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
+static void *dss_prev;
+/* Current upper limit on DSS addresses. */
+static void *dss_max;
+
+/*
+ * Trees of chunks that were previously allocated (trees differ only in node
+ * ordering). These are used when allocating chunks, in an attempt to re-use
+ * address space. Depending on function, different tree orderings are needed,
+ * which is why there are two trees with the same contents.
+ */
+static extent_tree_t dss_chunks_szad;
+static extent_tree_t dss_chunks_ad;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void *chunk_recycle_dss(size_t size, bool *zero);
+static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size);
+
+/******************************************************************************/
+
+static void *
+chunk_recycle_dss(size_t size, bool *zero)
+{
+ extent_node_t *node, key;
+
+ key.addr = NULL;
+ key.size = size;
+ malloc_mutex_lock(&dss_mtx);
+ node = extent_tree_szad_nsearch(&dss_chunks_szad, &key);
+ if (node != NULL) {
+ void *ret = node->addr;
+
+ /* Remove node from the tree. */
+ extent_tree_szad_remove(&dss_chunks_szad, node);
+ if (node->size == size) {
+ extent_tree_ad_remove(&dss_chunks_ad, node);
+ base_node_dealloc(node);
+ } else {
+ /*
+ * Insert the remainder of node's address range as a
+ * smaller chunk. Its position within dss_chunks_ad
+ * does not change.
+ */
+ assert(node->size > size);
+ node->addr = (void *)((uintptr_t)node->addr + size);
+ node->size -= size;
+ extent_tree_szad_insert(&dss_chunks_szad, node);
+ }
+ malloc_mutex_unlock(&dss_mtx);
+
+ if (*zero)
+ memset(ret, 0, size);
+ return (ret);
+ }
+ malloc_mutex_unlock(&dss_mtx);
+
+ return (NULL);
+}
+
+void *
+chunk_alloc_dss(size_t size, bool *zero)
+{
+ void *ret;
+
+ ret = chunk_recycle_dss(size, zero);
+ if (ret != NULL)
+ return (ret);
+
+ /*
+ * sbrk() uses a signed increment argument, so take care not to
+ * interpret a huge allocation request as a negative increment.
+ */
+ if ((intptr_t)size < 0)
+ return (NULL);
+
+ malloc_mutex_lock(&dss_mtx);
+ if (dss_prev != (void *)-1) {
+ intptr_t incr;
+
+ /*
+ * The loop is necessary to recover from races with other
+ * threads that are using the DSS for something other than
+ * malloc.
+ */
+ do {
+ /* Get the current end of the DSS. */
+ dss_max = sbrk(0);
+
+ /*
+ * Calculate how much padding is necessary to
+ * chunk-align the end of the DSS.
+ */
+ incr = (intptr_t)size
+ - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
+ if (incr == (intptr_t)size)
+ ret = dss_max;
+ else {
+ ret = (void *)((intptr_t)dss_max + incr);
+ incr += size;
+ }
+
+ dss_prev = sbrk(incr);
+ if (dss_prev == dss_max) {
+ /* Success. */
+ dss_max = (void *)((intptr_t)dss_prev + incr);
+ malloc_mutex_unlock(&dss_mtx);
+ *zero = true;
+ return (ret);
+ }
+ } while (dss_prev != (void *)-1);
+ }
+ malloc_mutex_unlock(&dss_mtx);
+
+ return (NULL);
+}
+
+static extent_node_t *
+chunk_dealloc_dss_record(void *chunk, size_t size)
+{
+ extent_node_t *xnode, *node, *prev, key;
+
+ xnode = NULL;
+ while (true) {
+ key.addr = (void *)((uintptr_t)chunk + size);
+ node = extent_tree_ad_nsearch(&dss_chunks_ad, &key);
+ /* Try to coalesce forward. */
+ if (node != NULL && node->addr == key.addr) {
+ /*
+ * Coalesce chunk with the following address range.
+ * This does not change the position within
+ * dss_chunks_ad, so only remove/insert from/into
+ * dss_chunks_szad.
+ */
+ extent_tree_szad_remove(&dss_chunks_szad, node);
+ node->addr = chunk;
+ node->size += size;
+ extent_tree_szad_insert(&dss_chunks_szad, node);
+ break;
+ } else if (xnode == NULL) {
+ /*
+ * It is possible that base_node_alloc() will cause a
+ * new base chunk to be allocated, so take care not to
+ * deadlock on dss_mtx, and recover if another thread
+ * deallocates an adjacent chunk while this one is busy
+ * allocating xnode.
+ */
+ malloc_mutex_unlock(&dss_mtx);
+ xnode = base_node_alloc();
+ malloc_mutex_lock(&dss_mtx);
+ if (xnode == NULL)
+ return (NULL);
+ } else {
+ /* Coalescing forward failed, so insert a new node. */
+ node = xnode;
+ xnode = NULL;
+ node->addr = chunk;
+ node->size = size;
+ extent_tree_ad_insert(&dss_chunks_ad, node);
+ extent_tree_szad_insert(&dss_chunks_szad, node);
+ break;
+ }
+ }
+ /* Discard xnode if it ended up unused do to a race. */
+ if (xnode != NULL)
+ base_node_dealloc(xnode);
+
+ /* Try to coalesce backward. */
+ prev = extent_tree_ad_prev(&dss_chunks_ad, node);
+ if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
+ chunk) {
+ /*
+ * Coalesce chunk with the previous address range. This does
+ * not change the position within dss_chunks_ad, so only
+ * remove/insert node from/into dss_chunks_szad.
+ */
+ extent_tree_szad_remove(&dss_chunks_szad, prev);
+ extent_tree_ad_remove(&dss_chunks_ad, prev);
+
+ extent_tree_szad_remove(&dss_chunks_szad, node);
+ node->addr = prev->addr;
+ node->size += prev->size;
+ extent_tree_szad_insert(&dss_chunks_szad, node);
+
+ base_node_dealloc(prev);
+ }
+
+ return (node);
+}
+
+bool
+chunk_dealloc_dss(void *chunk, size_t size)
+{
+ bool ret;
+
+ malloc_mutex_lock(&dss_mtx);
+ if ((uintptr_t)chunk >= (uintptr_t)dss_base
+ && (uintptr_t)chunk < (uintptr_t)dss_max) {
+ extent_node_t *node;
+
+ /* Try to coalesce with other unused chunks. */
+ node = chunk_dealloc_dss_record(chunk, size);
+ if (node != NULL) {
+ chunk = node->addr;
+ size = node->size;
+ }
+
+ /* Get the current end of the DSS. */
+ dss_max = sbrk(0);
+
+ /*
+ * Try to shrink the DSS if this chunk is at the end of the
+ * DSS. The sbrk() call here is subject to a race condition
+ * with threads that use brk(2) or sbrk(2) directly, but the
+ * alternative would be to leak memory for the sake of poorly
+ * designed multi-threaded programs.
+ */
+ if ((void *)((uintptr_t)chunk + size) == dss_max
+ && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) {
+ /* Success. */
+ dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size);
+
+ if (node != NULL) {
+ extent_tree_szad_remove(&dss_chunks_szad, node);
+ extent_tree_ad_remove(&dss_chunks_ad, node);
+ base_node_dealloc(node);
+ }
+ } else
+ madvise(chunk, size, MADV_DONTNEED);
+
+ ret = false;
+ goto RETURN;
+ }
+
+ ret = true;
+RETURN:
+ malloc_mutex_unlock(&dss_mtx);
+ return (ret);
+}
+
+bool
+chunk_dss_boot(void)
+{
+
+ if (malloc_mutex_init(&dss_mtx))
+ return (true);
+ dss_base = sbrk(0);
+ dss_prev = dss_base;
+ dss_max = dss_base;
+ extent_tree_szad_new(&dss_chunks_szad);
+ extent_tree_ad_new(&dss_chunks_ad);
+
+ return (false);
+}
+
+/******************************************************************************/
+#endif /* JEMALLOC_DSS */
diff --git a/dep/src/jmalloc/chunk_mmap.c b/dep/src/jmalloc/chunk_mmap.c
new file mode 100644
index 00000000000..8f0711384e3
--- /dev/null
+++ b/dep/src/jmalloc/chunk_mmap.c
@@ -0,0 +1,201 @@
+#define JEMALLOC_CHUNK_MMAP_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+/*
+ * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and
+ * potentially avoid some system calls. We can get away without TLS here,
+ * since the state of mmap_unaligned only affects performance, rather than
+ * correct function.
+ */
+static
+#ifndef NO_TLS
+ __thread
+#endif
+ bool mmap_unaligned
+#ifndef NO_TLS
+ JEMALLOC_ATTR(tls_model("initial-exec"))
+#endif
+ ;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void *pages_map(void *addr, size_t size);
+static void pages_unmap(void *addr, size_t size);
+static void *chunk_alloc_mmap_slow(size_t size, bool unaligned);
+
+/******************************************************************************/
+
+static void *
+pages_map(void *addr, size_t size)
+{
+ void *ret;
+
+ /*
+ * We don't use MAP_FIXED here, because it can cause the *replacement*
+ * of existing mappings, and we only want to create new mappings.
+ */
+ ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
+ -1, 0);
+ assert(ret != NULL);
+
+ if (ret == MAP_FAILED)
+ ret = NULL;
+ else if (addr != NULL && ret != addr) {
+ /*
+ * We succeeded in mapping memory, but not in the right place.
+ */
+ if (munmap(ret, size) == -1) {
+ char buf[STRERROR_BUF];
+
+ strerror_r(errno, buf, sizeof(buf));
+ malloc_write("<jemalloc>: Error in munmap(): ");
+ malloc_write(buf);
+ malloc_write("\n");
+ if (opt_abort)
+ abort();
+ }
+ ret = NULL;
+ }
+
+ assert(ret == NULL || (addr == NULL && ret != addr)
+ || (addr != NULL && ret == addr));
+ return (ret);
+}
+
+static void
+pages_unmap(void *addr, size_t size)
+{
+
+ if (munmap(addr, size) == -1) {
+ char buf[STRERROR_BUF];
+
+ strerror_r(errno, buf, sizeof(buf));
+ malloc_write("<jemalloc>: Error in munmap(): ");
+ malloc_write(buf);
+ malloc_write("\n");
+ if (opt_abort)
+ abort();
+ }
+}
+
+static void *
+chunk_alloc_mmap_slow(size_t size, bool unaligned)
+{
+ void *ret;
+ size_t offset;
+
+ /* Beware size_t wrap-around. */
+ if (size + chunksize <= size)
+ return (NULL);
+
+ ret = pages_map(NULL, size + chunksize);
+ if (ret == NULL)
+ return (NULL);
+
+ /* Clean up unneeded leading/trailing space. */
+ offset = CHUNK_ADDR2OFFSET(ret);
+ if (offset != 0) {
+ /* Note that mmap() returned an unaligned mapping. */
+ unaligned = true;
+
+ /* Leading space. */
+ pages_unmap(ret, chunksize - offset);
+
+ ret = (void *)((uintptr_t)ret +
+ (chunksize - offset));
+
+ /* Trailing space. */
+ pages_unmap((void *)((uintptr_t)ret + size),
+ offset);
+ } else {
+ /* Trailing space only. */
+ pages_unmap((void *)((uintptr_t)ret + size),
+ chunksize);
+ }
+
+ /*
+ * If mmap() returned an aligned mapping, reset mmap_unaligned so that
+ * the next chunk_alloc_mmap() execution tries the fast allocation
+ * method.
+ */
+ if (unaligned == false)
+ mmap_unaligned = false;
+
+ return (ret);
+}
+
+void *
+chunk_alloc_mmap(size_t size)
+{
+ void *ret;
+
+ /*
+ * Ideally, there would be a way to specify alignment to mmap() (like
+ * NetBSD has), but in the absence of such a feature, we have to work
+ * hard to efficiently create aligned mappings. The reliable, but
+ * slow method is to create a mapping that is over-sized, then trim the
+ * excess. However, that always results in at least one call to
+ * pages_unmap().
+ *
+ * A more optimistic approach is to try mapping precisely the right
+ * amount, then try to append another mapping if alignment is off. In
+ * practice, this works out well as long as the application is not
+ * interleaving mappings via direct mmap() calls. If we do run into a
+ * situation where there is an interleaved mapping and we are unable to
+ * extend an unaligned mapping, our best option is to switch to the
+ * slow method until mmap() returns another aligned mapping. This will
+ * tend to leave a gap in the memory map that is too small to cause
+ * later problems for the optimistic method.
+ *
+ * Another possible confounding factor is address space layout
+ * randomization (ASLR), which causes mmap(2) to disregard the
+ * requested address. mmap_unaligned tracks whether the previous
+ * chunk_alloc_mmap() execution received any unaligned or relocated
+ * mappings, and if so, the current execution will immediately fall
+ * back to the slow method. However, we keep track of whether the fast
+ * method would have succeeded, and if so, we make a note to try the
+ * fast method next time.
+ */
+
+ if (mmap_unaligned == false) {
+ size_t offset;
+
+ ret = pages_map(NULL, size);
+ if (ret == NULL)
+ return (NULL);
+
+ offset = CHUNK_ADDR2OFFSET(ret);
+ if (offset != 0) {
+ mmap_unaligned = true;
+ /* Try to extend chunk boundary. */
+ if (pages_map((void *)((uintptr_t)ret + size),
+ chunksize - offset) == NULL) {
+ /*
+ * Extension failed. Clean up, then revert to
+ * the reliable-but-expensive method.
+ */
+ pages_unmap(ret, size);
+ ret = chunk_alloc_mmap_slow(size, true);
+ } else {
+ /* Clean up unneeded leading space. */
+ pages_unmap(ret, chunksize - offset);
+ ret = (void *)((uintptr_t)ret + (chunksize -
+ offset));
+ }
+ }
+ } else
+ ret = chunk_alloc_mmap_slow(size, false);
+
+ return (ret);
+}
+
+void
+chunk_dealloc_mmap(void *chunk, size_t size)
+{
+
+ pages_unmap(chunk, size);
+}
diff --git a/dep/src/jmalloc/chunk_swap.c b/dep/src/jmalloc/chunk_swap.c
new file mode 100644
index 00000000000..b8c880f0a17
--- /dev/null
+++ b/dep/src/jmalloc/chunk_swap.c
@@ -0,0 +1,383 @@
+#define JEMALLOC_CHUNK_SWAP_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+#ifdef JEMALLOC_SWAP
+/******************************************************************************/
+/* Data. */
+
+malloc_mutex_t swap_mtx;
+bool swap_enabled;
+bool swap_prezeroed;
+size_t swap_nfds;
+int *swap_fds;
+#ifdef JEMALLOC_STATS
+size_t swap_avail;
+#endif
+
+/* Base address of the mmap()ed file(s). */
+static void *swap_base;
+/* Current end of the space in use (<= swap_max). */
+static void *swap_end;
+/* Absolute upper limit on file-backed addresses. */
+static void *swap_max;
+
+/*
+ * Trees of chunks that were previously allocated (trees differ only in node
+ * ordering). These are used when allocating chunks, in an attempt to re-use
+ * address space. Depending on function, different tree orderings are needed,
+ * which is why there are two trees with the same contents.
+ */
+static extent_tree_t swap_chunks_szad;
+static extent_tree_t swap_chunks_ad;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void *chunk_recycle_swap(size_t size, bool *zero);
+static extent_node_t *chunk_dealloc_swap_record(void *chunk, size_t size);
+
+/******************************************************************************/
+
+static void *
+chunk_recycle_swap(size_t size, bool *zero)
+{
+ extent_node_t *node, key;
+
+ key.addr = NULL;
+ key.size = size;
+ malloc_mutex_lock(&swap_mtx);
+ node = extent_tree_szad_nsearch(&swap_chunks_szad, &key);
+ if (node != NULL) {
+ void *ret = node->addr;
+
+ /* Remove node from the tree. */
+ extent_tree_szad_remove(&swap_chunks_szad, node);
+ if (node->size == size) {
+ extent_tree_ad_remove(&swap_chunks_ad, node);
+ base_node_dealloc(node);
+ } else {
+ /*
+ * Insert the remainder of node's address range as a
+ * smaller chunk. Its position within swap_chunks_ad
+ * does not change.
+ */
+ assert(node->size > size);
+ node->addr = (void *)((uintptr_t)node->addr + size);
+ node->size -= size;
+ extent_tree_szad_insert(&swap_chunks_szad, node);
+ }
+#ifdef JEMALLOC_STATS
+ swap_avail -= size;
+#endif
+ malloc_mutex_unlock(&swap_mtx);
+
+ if (*zero)
+ memset(ret, 0, size);
+ return (ret);
+ }
+ malloc_mutex_unlock(&swap_mtx);
+
+ return (NULL);
+}
+
+void *
+chunk_alloc_swap(size_t size, bool *zero)
+{
+ void *ret;
+
+ assert(swap_enabled);
+
+ ret = chunk_recycle_swap(size, zero);
+ if (ret != NULL)
+ return (ret);
+
+ malloc_mutex_lock(&swap_mtx);
+ if ((uintptr_t)swap_end + size <= (uintptr_t)swap_max) {
+ ret = swap_end;
+ swap_end = (void *)((uintptr_t)swap_end + size);
+#ifdef JEMALLOC_STATS
+ swap_avail -= size;
+#endif
+ malloc_mutex_unlock(&swap_mtx);
+
+ if (swap_prezeroed)
+ *zero = true;
+ else if (*zero)
+ memset(ret, 0, size);
+ } else {
+ malloc_mutex_unlock(&swap_mtx);
+ return (NULL);
+ }
+
+ return (ret);
+}
+
+static extent_node_t *
+chunk_dealloc_swap_record(void *chunk, size_t size)
+{
+ extent_node_t *xnode, *node, *prev, key;
+
+ xnode = NULL;
+ while (true) {
+ key.addr = (void *)((uintptr_t)chunk + size);
+ node = extent_tree_ad_nsearch(&swap_chunks_ad, &key);
+ /* Try to coalesce forward. */
+ if (node != NULL && node->addr == key.addr) {
+ /*
+ * Coalesce chunk with the following address range.
+ * This does not change the position within
+ * swap_chunks_ad, so only remove/insert from/into
+ * swap_chunks_szad.
+ */
+ extent_tree_szad_remove(&swap_chunks_szad, node);
+ node->addr = chunk;
+ node->size += size;
+ extent_tree_szad_insert(&swap_chunks_szad, node);
+ break;
+ } else if (xnode == NULL) {
+ /*
+ * It is possible that base_node_alloc() will cause a
+ * new base chunk to be allocated, so take care not to
+ * deadlock on swap_mtx, and recover if another thread
+ * deallocates an adjacent chunk while this one is busy
+ * allocating xnode.
+ */
+ malloc_mutex_unlock(&swap_mtx);
+ xnode = base_node_alloc();
+ malloc_mutex_lock(&swap_mtx);
+ if (xnode == NULL)
+ return (NULL);
+ } else {
+ /* Coalescing forward failed, so insert a new node. */
+ node = xnode;
+ xnode = NULL;
+ node->addr = chunk;
+ node->size = size;
+ extent_tree_ad_insert(&swap_chunks_ad, node);
+ extent_tree_szad_insert(&swap_chunks_szad, node);
+ break;
+ }
+ }
+ /* Discard xnode if it ended up unused do to a race. */
+ if (xnode != NULL)
+ base_node_dealloc(xnode);
+
+ /* Try to coalesce backward. */
+ prev = extent_tree_ad_prev(&swap_chunks_ad, node);
+ if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
+ chunk) {
+ /*
+ * Coalesce chunk with the previous address range. This does
+ * not change the position within swap_chunks_ad, so only
+ * remove/insert node from/into swap_chunks_szad.
+ */
+ extent_tree_szad_remove(&swap_chunks_szad, prev);
+ extent_tree_ad_remove(&swap_chunks_ad, prev);
+
+ extent_tree_szad_remove(&swap_chunks_szad, node);
+ node->addr = prev->addr;
+ node->size += prev->size;
+ extent_tree_szad_insert(&swap_chunks_szad, node);
+
+ base_node_dealloc(prev);
+ }
+
+ return (node);
+}
+
+bool
+chunk_dealloc_swap(void *chunk, size_t size)
+{
+ bool ret;
+
+ assert(swap_enabled);
+
+ malloc_mutex_lock(&swap_mtx);
+ if ((uintptr_t)chunk >= (uintptr_t)swap_base
+ && (uintptr_t)chunk < (uintptr_t)swap_max) {
+ extent_node_t *node;
+
+ /* Try to coalesce with other unused chunks. */
+ node = chunk_dealloc_swap_record(chunk, size);
+ if (node != NULL) {
+ chunk = node->addr;
+ size = node->size;
+ }
+
+ /*
+ * Try to shrink the in-use memory if this chunk is at the end
+ * of the in-use memory.
+ */
+ if ((void *)((uintptr_t)chunk + size) == swap_end) {
+ swap_end = (void *)((uintptr_t)swap_end - size);
+
+ if (node != NULL) {
+ extent_tree_szad_remove(&swap_chunks_szad,
+ node);
+ extent_tree_ad_remove(&swap_chunks_ad, node);
+ base_node_dealloc(node);
+ }
+ } else
+ madvise(chunk, size, MADV_DONTNEED);
+
+ ret = false;
+ goto RETURN;
+ }
+
+ ret = true;
+RETURN:
+#ifdef JEMALLOC_STATS
+ swap_avail += size;
+#endif
+ malloc_mutex_unlock(&swap_mtx);
+ return (ret);
+}
+
+bool
+chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed)
+{
+ bool ret;
+ unsigned i;
+ off_t off;
+ void *vaddr;
+ size_t cumsize, voff;
+ size_t sizes[nfds];
+
+ malloc_mutex_lock(&swap_mtx);
+
+ /* Get file sizes. */
+ for (i = 0, cumsize = 0; i < nfds; i++) {
+ off = lseek(fds[i], 0, SEEK_END);
+ if (off == ((off_t)-1)) {
+ ret = true;
+ goto RETURN;
+ }
+ if (PAGE_CEILING(off) != off) {
+ /* Truncate to a multiple of the page size. */
+ off &= ~PAGE_MASK;
+ if (ftruncate(fds[i], off) != 0) {
+ ret = true;
+ goto RETURN;
+ }
+ }
+ sizes[i] = off;
+ if (cumsize + off < cumsize) {
+ /*
+ * Cumulative file size is greater than the total
+ * address space. Bail out while it's still obvious
+ * what the problem is.
+ */
+ ret = true;
+ goto RETURN;
+ }
+ cumsize += off;
+ }
+
+ /* Round down to a multiple of the chunk size. */
+ cumsize &= ~chunksize_mask;
+ if (cumsize == 0) {
+ ret = true;
+ goto RETURN;
+ }
+
+ /*
+ * Allocate a chunk-aligned region of anonymous memory, which will
+ * be the final location for the memory-mapped files.
+ */
+ vaddr = chunk_alloc_mmap(cumsize);
+ if (vaddr == NULL) {
+ ret = true;
+ goto RETURN;
+ }
+
+ /* Overlay the files onto the anonymous mapping. */
+ for (i = 0, voff = 0; i < nfds; i++) {
+ void *addr = mmap((void *)((uintptr_t)vaddr + voff), sizes[i],
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fds[i], 0);
+ if (addr == MAP_FAILED) {
+ char buf[STRERROR_BUF];
+
+ strerror_r(errno, buf, sizeof(buf));
+ malloc_write(
+ "<jemalloc>: Error in mmap(..., MAP_FIXED, ...): ");
+ malloc_write(buf);
+ malloc_write("\n");
+ if (opt_abort)
+ abort();
+ if (munmap(vaddr, voff) == -1) {
+ strerror_r(errno, buf, sizeof(buf));
+ malloc_write("<jemalloc>: Error in munmap(): ");
+ malloc_write(buf);
+ malloc_write("\n");
+ }
+ ret = true;
+ goto RETURN;
+ }
+ assert(addr == (void *)((uintptr_t)vaddr + voff));
+
+ /*
+ * Tell the kernel that the mapping will be accessed randomly,
+ * and that it should not gratuitously sync pages to the
+ * filesystem.
+ */
+#ifdef MADV_RANDOM
+ madvise(addr, sizes[i], MADV_RANDOM);
+#endif
+#ifdef MADV_NOSYNC
+ madvise(addr, sizes[i], MADV_NOSYNC);
+#endif
+
+ voff += sizes[i];
+ }
+
+ swap_prezeroed = prezeroed;
+ swap_base = vaddr;
+ swap_end = swap_base;
+ swap_max = (void *)((uintptr_t)vaddr + cumsize);
+
+ /* Copy the fds array for mallctl purposes. */
+ swap_fds = (int *)base_alloc(nfds * sizeof(int));
+ if (swap_fds == NULL) {
+ ret = true;
+ goto RETURN;
+ }
+ memcpy(swap_fds, fds, nfds * sizeof(int));
+ swap_nfds = nfds;
+
+#ifdef JEMALLOC_STATS
+ swap_avail = cumsize;
+#endif
+
+ swap_enabled = true;
+
+ ret = false;
+RETURN:
+ malloc_mutex_unlock(&swap_mtx);
+ return (ret);
+}
+
+bool
+chunk_swap_boot(void)
+{
+
+ if (malloc_mutex_init(&swap_mtx))
+ return (true);
+
+ swap_enabled = false;
+ swap_prezeroed = false; /* swap.* mallctl's depend on this. */
+ swap_nfds = 0;
+ swap_fds = NULL;
+#ifdef JEMALLOC_STATS
+ swap_avail = 0;
+#endif
+ swap_base = NULL;
+ swap_end = NULL;
+ swap_max = NULL;
+
+ extent_tree_szad_new(&swap_chunks_szad);
+ extent_tree_ad_new(&swap_chunks_ad);
+
+ return (false);
+}
+
+/******************************************************************************/
+#endif /* JEMALLOC_SWAP */
diff --git a/dep/src/jmalloc/ckh.c b/dep/src/jmalloc/ckh.c
new file mode 100644
index 00000000000..a0c4162aa19
--- /dev/null
+++ b/dep/src/jmalloc/ckh.c
@@ -0,0 +1,601 @@
+/*
+ *******************************************************************************
+ * Implementation of (2^1+,2) cuckoo hashing, where 2^1+ indicates that each
+ * hash bucket contains 2^n cells, for n >= 1, and 2 indicates that two hash
+ * functions are employed. The original cuckoo hashing algorithm was described
+ * in:
+ *
+ * Pagh, R., F.F. Rodler (2004) Cuckoo Hashing. Journal of Algorithms
+ * 51(2):122-144.
+ *
+ * Generalization of cuckoo hashing was discussed in:
+ *
+ * Erlingsson, U., M. Manasse, F. McSherry (2006) A cool and practical
+ * alternative to traditional hash tables. In Proceedings of the 7th
+ * Workshop on Distributed Data and Structures (WDAS'06), Santa Clara, CA,
+ * January 2006.
+ *
+ * This implementation uses precisely two hash functions because that is the
+ * fewest that can work, and supporting multiple hashes is an implementation
+ * burden. Here is a reproduction of Figure 1 from Erlingsson et al. (2006)
+ * that shows approximate expected maximum load factors for various
+ * configurations:
+ *
+ * | #cells/bucket |
+ * #hashes | 1 | 2 | 4 | 8 |
+ * --------+-------+-------+-------+-------+
+ * 1 | 0.006 | 0.006 | 0.03 | 0.12 |
+ * 2 | 0.49 | 0.86 |>0.93< |>0.96< |
+ * 3 | 0.91 | 0.97 | 0.98 | 0.999 |
+ * 4 | 0.97 | 0.99 | 0.999 | |
+ *
+ * The number of cells per bucket is chosen such that a bucket fits in one cache
+ * line. So, on 32- and 64-bit systems, we use (8,2) and (4,2) cuckoo hashing,
+ * respectively.
+ *
+ ******************************************************************************/
+#define CKH_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static bool ckh_grow(ckh_t *ckh);
+static void ckh_shrink(ckh_t *ckh);
+
+/******************************************************************************/
+
+/*
+ * Search bucket for key and return the cell number if found; SIZE_T_MAX
+ * otherwise.
+ */
+JEMALLOC_INLINE size_t
+ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key)
+{
+ ckhc_t *cell;
+ unsigned i;
+
+ for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
+ cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
+ if (cell->key != NULL && ckh->keycomp(key, cell->key))
+ return ((bucket << LG_CKH_BUCKET_CELLS) + i);
+ }
+
+ return (SIZE_T_MAX);
+}
+
+/*
+ * Search table for key and return cell number if found; SIZE_T_MAX otherwise.
+ */
+JEMALLOC_INLINE size_t
+ckh_isearch(ckh_t *ckh, const void *key)
+{
+ size_t hash1, hash2, bucket, cell;
+
+ assert(ckh != NULL);
+ assert(ckh->magic = CKH_MAGIG);
+
+ ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
+
+ /* Search primary bucket. */
+ bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+ cell = ckh_bucket_search(ckh, bucket, key);
+ if (cell != SIZE_T_MAX)
+ return (cell);
+
+ /* Search secondary bucket. */
+ bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+ cell = ckh_bucket_search(ckh, bucket, key);
+ return (cell);
+}
+
+JEMALLOC_INLINE bool
+ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
+ const void *data)
+{
+ ckhc_t *cell;
+ unsigned offset, i;
+
+ /*
+ * Cycle through the cells in the bucket, starting at a random position.
+ * The randomness avoids worst-case search overhead as buckets fill up.
+ */
+ prn32(offset, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
+ for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
+ cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
+ ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
+ if (cell->key == NULL) {
+ cell->key = key;
+ cell->data = data;
+ ckh->count++;
+ return (false);
+ }
+ }
+
+ return (true);
+}
+
+/*
+ * No space is available in bucket. Randomly evict an item, then try to find an
+ * alternate location for that item. Iteratively repeat this
+ * eviction/relocation procedure until either success or detection of an
+ * eviction/relocation bucket cycle.
+ */
+JEMALLOC_INLINE bool
+ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
+ void const **argdata)
+{
+ const void *key, *data, *tkey, *tdata;
+ ckhc_t *cell;
+ size_t hash1, hash2, bucket, tbucket;
+ unsigned i;
+
+ bucket = argbucket;
+ key = *argkey;
+ data = *argdata;
+ while (true) {
+ /*
+ * Choose a random item within the bucket to evict. This is
+ * critical to correct function, because without (eventually)
+ * evicting all items within a bucket during iteration, it
+ * would be possible to get stuck in an infinite loop if there
+ * were an item for which both hashes indicated the same
+ * bucket.
+ */
+ prn32(i, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
+ cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
+ assert(cell->key != NULL);
+
+ /* Swap cell->{key,data} and {key,data} (evict). */
+ tkey = cell->key; tdata = cell->data;
+ cell->key = key; cell->data = data;
+ key = tkey; data = tdata;
+
+#ifdef CKH_COUNT
+ ckh->nrelocs++;
+#endif
+
+ /* Find the alternate bucket for the evicted item. */
+ ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
+ tbucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+ if (tbucket == bucket) {
+ tbucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+ /*
+ * It may be that (tbucket == bucket) still, if the
+ * item's hashes both indicate this bucket. However,
+ * we are guaranteed to eventually escape this bucket
+ * during iteration, assuming pseudo-random item
+ * selection (true randomness would make infinite
+ * looping a remote possibility). The reason we can
+ * never get trapped forever is that there are two
+ * cases:
+ *
+ * 1) This bucket == argbucket, so we will quickly
+ * detect an eviction cycle and terminate.
+ * 2) An item was evicted to this bucket from another,
+ * which means that at least one item in this bucket
+ * has hashes that indicate distinct buckets.
+ */
+ }
+ /* Check for a cycle. */
+ if (tbucket == argbucket) {
+ *argkey = key;
+ *argdata = data;
+ return (true);
+ }
+
+ bucket = tbucket;
+ if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
+ return (false);
+ }
+}
+
+JEMALLOC_INLINE bool
+ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata)
+{
+ size_t hash1, hash2, bucket;
+ const void *key = *argkey;
+ const void *data = *argdata;
+
+ ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
+
+ /* Try to insert in primary bucket. */
+ bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+ if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
+ return (false);
+
+ /* Try to insert in secondary bucket. */
+ bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+ if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
+ return (false);
+
+ /*
+ * Try to find a place for this item via iterative eviction/relocation.
+ */
+ return (ckh_evict_reloc_insert(ckh, bucket, argkey, argdata));
+}
+
+/*
+ * Try to rebuild the hash table from scratch by inserting all items from the
+ * old table into the new.
+ */
+JEMALLOC_INLINE bool
+ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
+{
+ size_t count, i, nins;
+ const void *key, *data;
+
+ count = ckh->count;
+ ckh->count = 0;
+ for (i = nins = 0; nins < count; i++) {
+ if (aTab[i].key != NULL) {
+ key = aTab[i].key;
+ data = aTab[i].data;
+ if (ckh_try_insert(ckh, &key, &data)) {
+ ckh->count = count;
+ return (true);
+ }
+ nins++;
+ }
+ }
+
+ return (false);
+}
+
+static bool
+ckh_grow(ckh_t *ckh)
+{
+ bool ret;
+ ckhc_t *tab, *ttab;
+ size_t lg_curcells;
+ unsigned lg_prevbuckets;
+
+#ifdef CKH_COUNT
+ ckh->ngrows++;
+#endif
+
+ /*
+ * It is possible (though unlikely, given well behaved hashes) that the
+ * table will have to be doubled more than once in order to create a
+ * usable table.
+ */
+ lg_prevbuckets = ckh->lg_curbuckets;
+ lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS;
+ while (true) {
+ lg_curcells++;
+ tab = (ckhc_t *) ipalloc((ZU(1) << LG_CACHELINE),
+ sizeof(ckhc_t) << lg_curcells);
+ if (tab == NULL) {
+ ret = true;
+ goto RETURN;
+ }
+ memset(tab, 0, sizeof(ckhc_t) << lg_curcells);
+ /* Swap in new table. */
+ ttab = ckh->tab;
+ ckh->tab = tab;
+ tab = ttab;
+ ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
+
+ if (ckh_rebuild(ckh, tab) == false) {
+ idalloc(tab);
+ break;
+ }
+
+ /* Rebuilding failed, so back out partially rebuilt table. */
+ idalloc(ckh->tab);
+ ckh->tab = tab;
+ ckh->lg_curbuckets = lg_prevbuckets;
+ }
+
+ ret = false;
+RETURN:
+ return (ret);
+}
+
+static void
+ckh_shrink(ckh_t *ckh)
+{
+ ckhc_t *tab, *ttab;
+ size_t lg_curcells;
+ unsigned lg_prevbuckets;
+
+ /*
+ * It is possible (though unlikely, given well behaved hashes) that the
+ * table rebuild will fail.
+ */
+ lg_prevbuckets = ckh->lg_curbuckets;
+ lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
+ tab = (ckhc_t *)ipalloc((ZU(1) << LG_CACHELINE),
+ sizeof(ckhc_t) << lg_curcells);
+ if (tab == NULL) {
+ /*
+ * An OOM error isn't worth propagating, since it doesn't
+ * prevent this or future operations from proceeding.
+ */
+ return;
+ }
+ memset(tab, 0, sizeof(ckhc_t) << lg_curcells);
+ /* Swap in new table. */
+ ttab = ckh->tab;
+ ckh->tab = tab;
+ tab = ttab;
+ ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
+
+ if (ckh_rebuild(ckh, tab) == false) {
+ idalloc(tab);
+#ifdef CKH_COUNT
+ ckh->nshrinks++;
+#endif
+ return;
+ }
+
+ /* Rebuilding failed, so back out partially rebuilt table. */
+ idalloc(ckh->tab);
+ ckh->tab = tab;
+ ckh->lg_curbuckets = lg_prevbuckets;
+#ifdef CKH_COUNT
+ ckh->nshrinkfails++;
+#endif
+}
+
+bool
+ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
+{
+ bool ret;
+ size_t mincells;
+ unsigned lg_mincells;
+
+ assert(minitems > 0);
+ assert(hash != NULL);
+ assert(keycomp != NULL);
+
+#ifdef CKH_COUNT
+ ckh->ngrows = 0;
+ ckh->nshrinks = 0;
+ ckh->nshrinkfails = 0;
+ ckh->ninserts = 0;
+ ckh->nrelocs = 0;
+#endif
+ ckh->prn_state = 42; /* Value doesn't really matter. */
+ ckh->count = 0;
+
+ /*
+ * Find the minimum power of 2 that is large enough to fit aBaseCount
+ * entries. We are using (2+,2) cuckoo hashing, which has an expected
+ * maximum load factor of at least ~0.86, so 0.75 is a conservative load
+ * factor that will typically allow 2^aLgMinItems to fit without ever
+ * growing the table.
+ */
+ assert(LG_CKH_BUCKET_CELLS > 0);
+ mincells = ((minitems + (3 - (minitems % 3))) / 3) << 2;
+ for (lg_mincells = LG_CKH_BUCKET_CELLS;
+ (ZU(1) << lg_mincells) < mincells;
+ lg_mincells++)
+ ; /* Do nothing. */
+ ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
+ ckh->lg_curbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
+ ckh->hash = hash;
+ ckh->keycomp = keycomp;
+
+ ckh->tab = (ckhc_t *)ipalloc((ZU(1) << LG_CACHELINE),
+ sizeof(ckhc_t) << lg_mincells);
+ if (ckh->tab == NULL) {
+ ret = true;
+ goto RETURN;
+ }
+ memset(ckh->tab, 0, sizeof(ckhc_t) << lg_mincells);
+
+#ifdef JEMALLOC_DEBUG
+ ckh->magic = CKH_MAGIG;
+#endif
+
+ ret = false;
+RETURN:
+ return (ret);
+}
+
+void
+ckh_delete(ckh_t *ckh)
+{
+
+ assert(ckh != NULL);
+ assert(ckh->magic = CKH_MAGIG);
+
+#ifdef CKH_VERBOSE
+ malloc_printf(
+ "%s(%p): ngrows: %"PRIu64", nshrinks: %"PRIu64","
+ " nshrinkfails: %"PRIu64", ninserts: %"PRIu64","
+ " nrelocs: %"PRIu64"\n", __func__, ckh,
+ (unsigned long long)ckh->ngrows,
+ (unsigned long long)ckh->nshrinks,
+ (unsigned long long)ckh->nshrinkfails,
+ (unsigned long long)ckh->ninserts,
+ (unsigned long long)ckh->nrelocs);
+#endif
+
+ idalloc(ckh->tab);
+#ifdef JEMALLOC_DEBUG
+ memset(ckh, 0x5a, sizeof(ckh_t));
+#endif
+}
+
+size_t
+ckh_count(ckh_t *ckh)
+{
+
+ assert(ckh != NULL);
+ assert(ckh->magic = CKH_MAGIG);
+
+ return (ckh->count);
+}
+
+bool
+ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data)
+{
+ size_t i, ncells;
+
+ for (i = *tabind, ncells = (ZU(1) << (ckh->lg_curbuckets +
+ LG_CKH_BUCKET_CELLS)); i < ncells; i++) {
+ if (ckh->tab[i].key != NULL) {
+ if (key != NULL)
+ *key = (void *)ckh->tab[i].key;
+ if (data != NULL)
+ *data = (void *)ckh->tab[i].data;
+ *tabind = i + 1;
+ return (false);
+ }
+ }
+
+ return (true);
+}
+
+bool
+ckh_insert(ckh_t *ckh, const void *key, const void *data)
+{
+ bool ret;
+
+ assert(ckh != NULL);
+ assert(ckh->magic = CKH_MAGIG);
+ assert(ckh_search(ckh, key, NULL, NULL));
+
+#ifdef CKH_COUNT
+ ckh->ninserts++;
+#endif
+
+ while (ckh_try_insert(ckh, &key, &data)) {
+ if (ckh_grow(ckh)) {
+ ret = true;
+ goto RETURN;
+ }
+ }
+
+ ret = false;
+RETURN:
+ return (ret);
+}
+
+bool
+ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data)
+{
+ size_t cell;
+
+ assert(ckh != NULL);
+ assert(ckh->magic = CKH_MAGIG);
+
+ cell = ckh_isearch(ckh, searchkey);
+ if (cell != SIZE_T_MAX) {
+ if (key != NULL)
+ *key = (void *)ckh->tab[cell].key;
+ if (data != NULL)
+ *data = (void *)ckh->tab[cell].data;
+ ckh->tab[cell].key = NULL;
+ ckh->tab[cell].data = NULL; /* Not necessary. */
+
+ ckh->count--;
+ /* Try to halve the table if it is less than 1/4 full. */
+ if (ckh->count < (ZU(1) << (ckh->lg_curbuckets
+ + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
+ > ckh->lg_minbuckets) {
+ /* Ignore error due to OOM. */
+ ckh_shrink(ckh);
+ }
+
+ return (false);
+ }
+
+ return (true);
+}
+
+bool
+ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
+{
+ size_t cell;
+
+ assert(ckh != NULL);
+ assert(ckh->magic = CKH_MAGIG);
+
+ cell = ckh_isearch(ckh, searchkey);
+ if (cell != SIZE_T_MAX) {
+ if (key != NULL)
+ *key = (void *)ckh->tab[cell].key;
+ if (data != NULL)
+ *data = (void *)ckh->tab[cell].data;
+ return (false);
+ }
+
+ return (true);
+}
+
+void
+ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
+{
+ size_t ret1, ret2;
+ uint64_t h;
+
+ assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
+ assert(hash1 != NULL);
+ assert(hash2 != NULL);
+
+ h = hash(key, strlen((const char *)key), 0x94122f335b332aeaLLU);
+ if (minbits <= 32) {
+ /*
+ * Avoid doing multiple hashes, since a single hash provides
+ * enough bits.
+ */
+ ret1 = h & ZU(0xffffffffU);
+ ret2 = h >> 32;
+ } else {
+ ret1 = h;
+ ret2 = hash(key, strlen((const char *)key),
+ 0x8432a476666bbc13U);
+ }
+
+ *hash1 = ret1;
+ *hash2 = ret2;
+}
+
+bool
+ckh_string_keycomp(const void *k1, const void *k2)
+{
+
+ assert(k1 != NULL);
+ assert(k2 != NULL);
+
+ return (strcmp((char *)k1, (char *)k2) ? false : true);
+}
+
+void
+ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1,
+ size_t *hash2)
+{
+ size_t ret1, ret2;
+ uint64_t h;
+
+ assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
+ assert(hash1 != NULL);
+ assert(hash2 != NULL);
+
+ h = hash(&key, sizeof(void *), 0xd983396e68886082LLU);
+ if (minbits <= 32) {
+ /*
+ * Avoid doing multiple hashes, since a single hash provides
+ * enough bits.
+ */
+ ret1 = h & ZU(0xffffffffU);
+ ret2 = h >> 32;
+ } else {
+ assert(SIZEOF_PTR == 8);
+ ret1 = h;
+ ret2 = hash(&key, sizeof(void *), 0x5e2be9aff8709a5dLLU);
+ }
+
+ *hash1 = ret1;
+ *hash2 = ret2;
+}
+
+bool
+ckh_pointer_keycomp(const void *k1, const void *k2)
+{
+
+ return ((k1 == k2) ? true : false);
+}
diff --git a/dep/src/jmalloc/ctl.c b/dep/src/jmalloc/ctl.c
new file mode 100644
index 00000000000..ffb732d5bef
--- /dev/null
+++ b/dep/src/jmalloc/ctl.c
@@ -0,0 +1,1482 @@
+#define JEMALLOC_CTL_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+static malloc_mutex_t ctl_mtx;
+static bool ctl_initialized;
+static uint64_t ctl_epoch;
+static ctl_stats_t ctl_stats;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+#define CTL_PROTO(n) \
+static int n##_ctl(const size_t *mib, size_t miblen, void *oldp, \
+ size_t *oldlenp, void *newp, size_t newlen);
+
+#define INDEX_PROTO(n) \
+const ctl_node_t *n##_index(const size_t *mib, size_t miblen, \
+ size_t i);
+
+#ifdef JEMALLOC_STATS
+static bool ctl_arena_init(ctl_arena_stats_t *astats);
+#endif
+static void ctl_arena_clear(ctl_arena_stats_t *astats);
+#ifdef JEMALLOC_STATS
+static void ctl_arena_stats_amerge(ctl_arena_stats_t *cstats,
+ arena_t *arena);
+static void ctl_arena_stats_smerge(ctl_arena_stats_t *sstats,
+ ctl_arena_stats_t *astats);
+#endif
+static void ctl_arena_refresh(arena_t *arena, unsigned i);
+static void ctl_refresh(void);
+static bool ctl_init(void);
+static int ctl_lookup(const char *name, ctl_node_t const **nodesp,
+ size_t *mibp, size_t *depthp);
+
+CTL_PROTO(version)
+CTL_PROTO(epoch)
+#ifdef JEMALLOC_TCACHE
+CTL_PROTO(tcache_flush)
+#endif
+CTL_PROTO(config_debug)
+CTL_PROTO(config_dss)
+CTL_PROTO(config_dynamic_page_shift)
+CTL_PROTO(config_fill)
+CTL_PROTO(config_lazy_lock)
+CTL_PROTO(config_prof)
+CTL_PROTO(config_prof_libgcc)
+CTL_PROTO(config_prof_libunwind)
+CTL_PROTO(config_stats)
+CTL_PROTO(config_swap)
+CTL_PROTO(config_sysv)
+CTL_PROTO(config_tcache)
+CTL_PROTO(config_tiny)
+CTL_PROTO(config_tls)
+CTL_PROTO(config_xmalloc)
+CTL_PROTO(opt_abort)
+#ifdef JEMALLOC_FILL
+CTL_PROTO(opt_junk)
+#endif
+#ifdef JEMALLOC_SYSV
+CTL_PROTO(opt_sysv)
+#endif
+#ifdef JEMALLOC_XMALLOC
+CTL_PROTO(opt_xmalloc)
+#endif
+#ifdef JEMALLOC_ZERO
+CTL_PROTO(opt_zero)
+#endif
+#ifdef JEMALLOC_TCACHE
+CTL_PROTO(opt_tcache)
+CTL_PROTO(opt_lg_tcache_gc_sweep)
+#endif
+#ifdef JEMALLOC_PROF
+CTL_PROTO(opt_prof)
+CTL_PROTO(opt_prof_active)
+CTL_PROTO(opt_lg_prof_bt_max)
+CTL_PROTO(opt_lg_prof_sample)
+CTL_PROTO(opt_lg_prof_interval)
+CTL_PROTO(opt_prof_udump)
+CTL_PROTO(opt_prof_leak)
+#endif
+CTL_PROTO(opt_stats_print)
+CTL_PROTO(opt_lg_qspace_max)
+CTL_PROTO(opt_lg_cspace_max)
+CTL_PROTO(opt_lg_dirty_mult)
+CTL_PROTO(opt_lg_chunk)
+#ifdef JEMALLOC_SWAP
+CTL_PROTO(opt_overcommit)
+#endif
+CTL_PROTO(arenas_bin_i_size)
+CTL_PROTO(arenas_bin_i_nregs)
+CTL_PROTO(arenas_bin_i_run_size)
+INDEX_PROTO(arenas_bin_i)
+CTL_PROTO(arenas_lrun_i_size)
+INDEX_PROTO(arenas_lrun_i)
+CTL_PROTO(arenas_narenas)
+CTL_PROTO(arenas_initialized)
+CTL_PROTO(arenas_quantum)
+CTL_PROTO(arenas_cacheline)
+CTL_PROTO(arenas_subpage)
+CTL_PROTO(arenas_pagesize)
+CTL_PROTO(arenas_chunksize)
+#ifdef JEMALLOC_TINY
+CTL_PROTO(arenas_tspace_min)
+CTL_PROTO(arenas_tspace_max)
+#endif
+CTL_PROTO(arenas_qspace_min)
+CTL_PROTO(arenas_qspace_max)
+CTL_PROTO(arenas_cspace_min)
+CTL_PROTO(arenas_cspace_max)
+CTL_PROTO(arenas_sspace_min)
+CTL_PROTO(arenas_sspace_max)
+#ifdef JEMALLOC_TCACHE
+CTL_PROTO(arenas_tcache_max)
+#endif
+CTL_PROTO(arenas_ntbins)
+CTL_PROTO(arenas_nqbins)
+CTL_PROTO(arenas_ncbins)
+CTL_PROTO(arenas_nsbins)
+CTL_PROTO(arenas_nbins)
+#ifdef JEMALLOC_TCACHE
+CTL_PROTO(arenas_nhbins)
+#endif
+CTL_PROTO(arenas_nlruns)
+#ifdef JEMALLOC_PROF
+CTL_PROTO(prof_active)
+CTL_PROTO(prof_dump)
+CTL_PROTO(prof_interval)
+#endif
+#ifdef JEMALLOC_STATS
+CTL_PROTO(stats_chunks_current)
+CTL_PROTO(stats_chunks_total)
+CTL_PROTO(stats_chunks_high)
+CTL_PROTO(stats_huge_allocated)
+CTL_PROTO(stats_huge_nmalloc)
+CTL_PROTO(stats_huge_ndalloc)
+CTL_PROTO(stats_arenas_i_small_allocated)
+CTL_PROTO(stats_arenas_i_small_nmalloc)
+CTL_PROTO(stats_arenas_i_small_ndalloc)
+CTL_PROTO(stats_arenas_i_small_nrequests)
+CTL_PROTO(stats_arenas_i_large_allocated)
+CTL_PROTO(stats_arenas_i_large_nmalloc)
+CTL_PROTO(stats_arenas_i_large_ndalloc)
+CTL_PROTO(stats_arenas_i_large_nrequests)
+CTL_PROTO(stats_arenas_i_bins_j_allocated)
+CTL_PROTO(stats_arenas_i_bins_j_nmalloc)
+CTL_PROTO(stats_arenas_i_bins_j_ndalloc)
+CTL_PROTO(stats_arenas_i_bins_j_nrequests)
+#ifdef JEMALLOC_TCACHE
+CTL_PROTO(stats_arenas_i_bins_j_nfills)
+CTL_PROTO(stats_arenas_i_bins_j_nflushes)
+#endif
+CTL_PROTO(stats_arenas_i_bins_j_nruns)
+CTL_PROTO(stats_arenas_i_bins_j_nreruns)
+CTL_PROTO(stats_arenas_i_bins_j_highruns)
+CTL_PROTO(stats_arenas_i_bins_j_curruns)
+INDEX_PROTO(stats_arenas_i_bins_j)
+CTL_PROTO(stats_arenas_i_lruns_j_nmalloc)
+CTL_PROTO(stats_arenas_i_lruns_j_ndalloc)
+CTL_PROTO(stats_arenas_i_lruns_j_nrequests)
+CTL_PROTO(stats_arenas_i_lruns_j_highruns)
+CTL_PROTO(stats_arenas_i_lruns_j_curruns)
+INDEX_PROTO(stats_arenas_i_lruns_j)
+#endif
+CTL_PROTO(stats_arenas_i_pactive)
+CTL_PROTO(stats_arenas_i_pdirty)
+#ifdef JEMALLOC_STATS
+CTL_PROTO(stats_arenas_i_mapped)
+CTL_PROTO(stats_arenas_i_npurge)
+CTL_PROTO(stats_arenas_i_nmadvise)
+CTL_PROTO(stats_arenas_i_purged)
+#endif
+INDEX_PROTO(stats_arenas_i)
+#ifdef JEMALLOC_STATS
+CTL_PROTO(stats_allocated)
+CTL_PROTO(stats_active)
+CTL_PROTO(stats_mapped)
+#endif
+#ifdef JEMALLOC_SWAP
+# ifdef JEMALLOC_STATS
+CTL_PROTO(swap_avail)
+# endif
+CTL_PROTO(swap_prezeroed)
+CTL_PROTO(swap_nfds)
+CTL_PROTO(swap_fds)
+#endif
+
+/******************************************************************************/
+/* mallctl tree. */
+
+/* Maximum tree depth. */
+#define CTL_MAX_DEPTH 6
+
+#define NAME(n) true, {.named = {n
+#define CHILD(c) sizeof(c##_node) / sizeof(ctl_node_t), c##_node}}, NULL
+#define CTL(c) 0, NULL}}, c##_ctl
+
+/*
+ * Only handles internal indexed nodes, since there are currently no external
+ * ones.
+ */
+#define INDEX(i) false, {.indexed = {i##_index}}, NULL
+
+#ifdef JEMALLOC_TCACHE
+static const ctl_node_t tcache_node[] = {
+ {NAME("flush"), CTL(tcache_flush)}
+};
+#endif
+
+static const ctl_node_t config_node[] = {
+ {NAME("debug"), CTL(config_debug)},
+ {NAME("dss"), CTL(config_dss)},
+ {NAME("dynamic_page_shift"), CTL(config_dynamic_page_shift)},
+ {NAME("fill"), CTL(config_fill)},
+ {NAME("lazy_lock"), CTL(config_lazy_lock)},
+ {NAME("prof"), CTL(config_prof)},
+ {NAME("prof_libgcc"), CTL(config_prof_libgcc)},
+ {NAME("prof_libunwind"), CTL(config_prof_libunwind)},
+ {NAME("stats"), CTL(config_stats)},
+ {NAME("swap"), CTL(config_swap)},
+ {NAME("sysv"), CTL(config_sysv)},
+ {NAME("tcache"), CTL(config_tcache)},
+ {NAME("tiny"), CTL(config_tiny)},
+ {NAME("tls"), CTL(config_tls)},
+ {NAME("xmalloc"), CTL(config_xmalloc)}
+};
+
+static const ctl_node_t opt_node[] = {
+ {NAME("abort"), CTL(opt_abort)},
+#ifdef JEMALLOC_FILL
+ {NAME("junk"), CTL(opt_junk)},
+#endif
+#ifdef JEMALLOC_SYSV
+ {NAME("sysv"), CTL(opt_sysv)},
+#endif
+#ifdef JEMALLOC_XMALLOC
+ {NAME("xmalloc"), CTL(opt_xmalloc)},
+#endif
+#ifdef JEMALLOC_ZERO
+ {NAME("zero"), CTL(opt_zero)},
+#endif
+#ifdef JEMALLOC_TCACHE
+ {NAME("tcache"), CTL(opt_tcache)},
+ {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)},
+#endif
+#ifdef JEMALLOC_PROF
+ {NAME("prof"), CTL(opt_prof)},
+ {NAME("prof_active"), CTL(opt_prof_active)},
+ {NAME("lg_prof_bt_max"), CTL(opt_lg_prof_bt_max)},
+ {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
+ {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
+ {NAME("prof_udump"), CTL(opt_prof_udump)},
+ {NAME("prof_leak"), CTL(opt_prof_leak)},
+#endif
+ {NAME("stats_print"), CTL(opt_stats_print)},
+ {NAME("lg_qspace_max"), CTL(opt_lg_qspace_max)},
+ {NAME("lg_cspace_max"), CTL(opt_lg_cspace_max)},
+ {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)},
+ {NAME("lg_chunk"), CTL(opt_lg_chunk)}
+#ifdef JEMALLOC_SWAP
+ ,
+ {NAME("overcommit"), CTL(opt_overcommit)}
+#endif
+};
+
+static const ctl_node_t arenas_bin_i_node[] = {
+ {NAME("size"), CTL(arenas_bin_i_size)},
+ {NAME("nregs"), CTL(arenas_bin_i_nregs)},
+ {NAME("run_size"), CTL(arenas_bin_i_run_size)}
+};
+static const ctl_node_t super_arenas_bin_i_node[] = {
+ {NAME(""), CHILD(arenas_bin_i)}
+};
+
+static const ctl_node_t arenas_bin_node[] = {
+ {INDEX(arenas_bin_i)}
+};
+
+static const ctl_node_t arenas_lrun_i_node[] = {
+ {NAME("size"), CTL(arenas_lrun_i_size)}
+};
+static const ctl_node_t super_arenas_lrun_i_node[] = {
+ {NAME(""), CHILD(arenas_lrun_i)}
+};
+
+static const ctl_node_t arenas_lrun_node[] = {
+ {INDEX(arenas_lrun_i)}
+};
+
+static const ctl_node_t arenas_node[] = {
+ {NAME("narenas"), CTL(arenas_narenas)},
+ {NAME("initialized"), CTL(arenas_initialized)},
+ {NAME("quantum"), CTL(arenas_quantum)},
+ {NAME("cacheline"), CTL(arenas_cacheline)},
+ {NAME("subpage"), CTL(arenas_subpage)},
+ {NAME("pagesize"), CTL(arenas_pagesize)},
+ {NAME("chunksize"), CTL(arenas_chunksize)},
+#ifdef JEMALLOC_TINY
+ {NAME("tspace_min"), CTL(arenas_tspace_min)},
+ {NAME("tspace_max"), CTL(arenas_tspace_max)},
+#endif
+ {NAME("qspace_min"), CTL(arenas_qspace_min)},
+ {NAME("qspace_max"), CTL(arenas_qspace_max)},
+ {NAME("cspace_min"), CTL(arenas_cspace_min)},
+ {NAME("cspace_max"), CTL(arenas_cspace_max)},
+ {NAME("sspace_min"), CTL(arenas_sspace_min)},
+ {NAME("sspace_max"), CTL(arenas_sspace_max)},
+#ifdef JEMALLOC_TCACHE
+ {NAME("tcache_max"), CTL(arenas_tcache_max)},
+#endif
+ {NAME("ntbins"), CTL(arenas_ntbins)},
+ {NAME("nqbins"), CTL(arenas_nqbins)},
+ {NAME("ncbins"), CTL(arenas_ncbins)},
+ {NAME("nsbins"), CTL(arenas_nsbins)},
+ {NAME("nbins"), CTL(arenas_nbins)},
+#ifdef JEMALLOC_TCACHE
+ {NAME("nhbins"), CTL(arenas_nhbins)},
+#endif
+ {NAME("bin"), CHILD(arenas_bin)},
+ {NAME("nlruns"), CTL(arenas_nlruns)},
+ {NAME("lrun"), CHILD(arenas_lrun)}
+};
+
+#ifdef JEMALLOC_PROF
+static const ctl_node_t prof_node[] = {
+ {NAME("active"), CTL(prof_active)},
+ {NAME("dump"), CTL(prof_dump)},
+ {NAME("interval"), CTL(prof_interval)}
+};
+#endif
+
+#ifdef JEMALLOC_STATS
+static const ctl_node_t stats_chunks_node[] = {
+ {NAME("current"), CTL(stats_chunks_current)},
+ {NAME("total"), CTL(stats_chunks_total)},
+ {NAME("high"), CTL(stats_chunks_high)}
+};
+
+static const ctl_node_t stats_huge_node[] = {
+ {NAME("allocated"), CTL(stats_huge_allocated)},
+ {NAME("nmalloc"), CTL(stats_huge_nmalloc)},
+ {NAME("ndalloc"), CTL(stats_huge_ndalloc)}
+};
+
+static const ctl_node_t stats_arenas_i_small_node[] = {
+ {NAME("allocated"), CTL(stats_arenas_i_small_allocated)},
+ {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)},
+ {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)},
+ {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)}
+};
+
+static const ctl_node_t stats_arenas_i_large_node[] = {
+ {NAME("allocated"), CTL(stats_arenas_i_large_allocated)},
+ {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)},
+ {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)},
+ {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)}
+};
+
+static const ctl_node_t stats_arenas_i_bins_j_node[] = {
+ {NAME("allocated"), CTL(stats_arenas_i_bins_j_allocated)},
+ {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)},
+ {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)},
+ {NAME("nrequests"), CTL(stats_arenas_i_bins_j_nrequests)},
+#ifdef JEMALLOC_TCACHE
+ {NAME("nfills"), CTL(stats_arenas_i_bins_j_nfills)},
+ {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)},
+#endif
+ {NAME("nruns"), CTL(stats_arenas_i_bins_j_nruns)},
+ {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)},
+ {NAME("highruns"), CTL(stats_arenas_i_bins_j_highruns)},
+ {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)}
+};
+static const ctl_node_t super_stats_arenas_i_bins_j_node[] = {
+ {NAME(""), CHILD(stats_arenas_i_bins_j)}
+};
+
+static const ctl_node_t stats_arenas_i_bins_node[] = {
+ {INDEX(stats_arenas_i_bins_j)}
+};
+
+static const ctl_node_t stats_arenas_i_lruns_j_node[] = {
+ {NAME("nmalloc"), CTL(stats_arenas_i_lruns_j_nmalloc)},
+ {NAME("ndalloc"), CTL(stats_arenas_i_lruns_j_ndalloc)},
+ {NAME("nrequests"), CTL(stats_arenas_i_lruns_j_nrequests)},
+ {NAME("highruns"), CTL(stats_arenas_i_lruns_j_highruns)},
+ {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)}
+};
+static const ctl_node_t super_stats_arenas_i_lruns_j_node[] = {
+ {NAME(""), CHILD(stats_arenas_i_lruns_j)}
+};
+
+static const ctl_node_t stats_arenas_i_lruns_node[] = {
+ {INDEX(stats_arenas_i_lruns_j)}
+};
+#endif
+
+static const ctl_node_t stats_arenas_i_node[] = {
+ {NAME("pactive"), CTL(stats_arenas_i_pactive)},
+ {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}
+#ifdef JEMALLOC_STATS
+ ,
+ {NAME("mapped"), CTL(stats_arenas_i_mapped)},
+ {NAME("npurge"), CTL(stats_arenas_i_npurge)},
+ {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)},
+ {NAME("purged"), CTL(stats_arenas_i_purged)},
+ {NAME("small"), CHILD(stats_arenas_i_small)},
+ {NAME("large"), CHILD(stats_arenas_i_large)},
+ {NAME("bins"), CHILD(stats_arenas_i_bins)},
+ {NAME("lruns"), CHILD(stats_arenas_i_lruns)}
+#endif
+};
+static const ctl_node_t super_stats_arenas_i_node[] = {
+ {NAME(""), CHILD(stats_arenas_i)}
+};
+
+static const ctl_node_t stats_arenas_node[] = {
+ {INDEX(stats_arenas_i)}
+};
+
+static const ctl_node_t stats_node[] = {
+#ifdef JEMALLOC_STATS
+ {NAME("allocated"), CTL(stats_allocated)},
+ {NAME("active"), CTL(stats_active)},
+ {NAME("mapped"), CTL(stats_mapped)},
+ {NAME("chunks"), CHILD(stats_chunks)},
+ {NAME("huge"), CHILD(stats_huge)},
+#endif
+ {NAME("arenas"), CHILD(stats_arenas)}
+};
+
+#ifdef JEMALLOC_SWAP
+static const ctl_node_t swap_node[] = {
+# ifdef JEMALLOC_STATS
+ {NAME("avail"), CTL(swap_avail)},
+# endif
+ {NAME("prezeroed"), CTL(swap_prezeroed)},
+ {NAME("nfds"), CTL(swap_nfds)},
+ {NAME("fds"), CTL(swap_fds)}
+};
+#endif
+
+static const ctl_node_t root_node[] = {
+ {NAME("version"), CTL(version)},
+ {NAME("epoch"), CTL(epoch)},
+#ifdef JEMALLOC_TCACHE
+ {NAME("tcache"), CHILD(tcache)},
+#endif
+ {NAME("config"), CHILD(config)},
+ {NAME("opt"), CHILD(opt)},
+ {NAME("arenas"), CHILD(arenas)},
+#ifdef JEMALLOC_PROF
+ {NAME("prof"), CHILD(prof)},
+#endif
+ {NAME("stats"), CHILD(stats)}
+#ifdef JEMALLOC_SWAP
+ ,
+ {NAME("swap"), CHILD(swap)}
+#endif
+};
+static const ctl_node_t super_root_node[] = {
+ {NAME(""), CHILD(root)}
+};
+
+#undef NAME
+#undef CHILD
+#undef CTL
+#undef INDEX
+
+/******************************************************************************/
+
+#ifdef JEMALLOC_STATS
+static bool
+ctl_arena_init(ctl_arena_stats_t *astats)
+{
+
+ if (astats->bstats == NULL) {
+ astats->bstats = (malloc_bin_stats_t *)base_alloc(nbins *
+ sizeof(malloc_bin_stats_t));
+ if (astats->bstats == NULL)
+ return (true);
+ }
+ if (astats->lstats == NULL) {
+ astats->lstats = (malloc_large_stats_t *)base_alloc(nlclasses *
+ sizeof(malloc_large_stats_t));
+ if (astats->lstats == NULL)
+ return (true);
+ }
+
+ return (false);
+}
+#endif
+
+static void
+ctl_arena_clear(ctl_arena_stats_t *astats)
+{
+
+ astats->pactive = 0;
+ astats->pdirty = 0;
+#ifdef JEMALLOC_STATS
+ memset(&astats->astats, 0, sizeof(arena_stats_t));
+ astats->allocated_small = 0;
+ astats->nmalloc_small = 0;
+ astats->ndalloc_small = 0;
+ astats->nrequests_small = 0;
+ memset(astats->bstats, 0, nbins * sizeof(malloc_bin_stats_t));
+ memset(astats->lstats, 0, nlclasses * sizeof(malloc_large_stats_t));
+#endif
+}
+
+#ifdef JEMALLOC_STATS
+static void
+ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena)
+{
+ unsigned i;
+
+ arena_stats_merge(arena, &cstats->pactive, &cstats->pdirty,
+ &cstats->astats, cstats->bstats, cstats->lstats);
+
+ for (i = 0; i < nbins; i++) {
+ cstats->allocated_small += cstats->bstats[i].allocated;
+ cstats->nmalloc_small += cstats->bstats[i].nmalloc;
+ cstats->ndalloc_small += cstats->bstats[i].ndalloc;
+ cstats->nrequests_small += cstats->bstats[i].nrequests;
+ }
+}
+
+static void
+ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
+{
+ unsigned i;
+
+ sstats->pactive += astats->pactive;
+ sstats->pdirty += astats->pdirty;
+
+ sstats->astats.mapped += astats->astats.mapped;
+ sstats->astats.npurge += astats->astats.npurge;
+ sstats->astats.nmadvise += astats->astats.nmadvise;
+ sstats->astats.purged += astats->astats.purged;
+
+ sstats->allocated_small += astats->allocated_small;
+ sstats->nmalloc_small += astats->nmalloc_small;
+ sstats->ndalloc_small += astats->ndalloc_small;
+ sstats->nrequests_small += astats->nrequests_small;
+
+ sstats->astats.allocated_large += astats->astats.allocated_large;
+ sstats->astats.nmalloc_large += astats->astats.nmalloc_large;
+ sstats->astats.ndalloc_large += astats->astats.ndalloc_large;
+ sstats->astats.nrequests_large += astats->astats.nrequests_large;
+
+ for (i = 0; i < nlclasses; i++) {
+ sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc;
+ sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc;
+ sstats->lstats[i].nrequests += astats->lstats[i].nrequests;
+ sstats->lstats[i].highruns += astats->lstats[i].highruns;
+ sstats->lstats[i].curruns += astats->lstats[i].curruns;
+ }
+
+ for (i = 0; i < nbins; i++) {
+ sstats->bstats[i].allocated += astats->bstats[i].allocated;
+ sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
+ sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc;
+ sstats->bstats[i].nrequests += astats->bstats[i].nrequests;
+#ifdef JEMALLOC_TCACHE
+ sstats->bstats[i].nfills += astats->bstats[i].nfills;
+ sstats->bstats[i].nflushes += astats->bstats[i].nflushes;
+#endif
+ sstats->bstats[i].nruns += astats->bstats[i].nruns;
+ sstats->bstats[i].reruns += astats->bstats[i].reruns;
+ sstats->bstats[i].highruns += astats->bstats[i].highruns;
+ sstats->bstats[i].curruns += astats->bstats[i].curruns;
+ }
+}
+#endif
+
+static void
+ctl_arena_refresh(arena_t *arena, unsigned i)
+{
+ ctl_arena_stats_t *astats = &ctl_stats.arenas[i];
+ ctl_arena_stats_t *sstats = &ctl_stats.arenas[narenas];
+
+ ctl_arena_clear(astats);
+
+#ifdef JEMALLOC_STATS
+ ctl_arena_stats_amerge(astats, arena);
+ /* Merge into sum stats as well. */
+ ctl_arena_stats_smerge(sstats, astats);
+#else
+ astats->pactive += arena->nactive;
+ astats->pdirty += arena->ndirty;
+ /* Merge into sum stats as well. */
+ sstats->pactive += arena->nactive;
+ sstats->pdirty += arena->ndirty;
+#endif
+}
+
+static void
+ctl_refresh(void)
+{
+ unsigned i;
+ arena_t *tarenas[narenas];
+
+#ifdef JEMALLOC_STATS
+ malloc_mutex_lock(&chunks_mtx);
+ ctl_stats.chunks.current = stats_chunks.curchunks;
+ ctl_stats.chunks.total = stats_chunks.nchunks;
+ ctl_stats.chunks.high = stats_chunks.highchunks;
+ malloc_mutex_unlock(&chunks_mtx);
+
+ malloc_mutex_lock(&huge_mtx);
+ ctl_stats.huge.allocated = huge_allocated;
+ ctl_stats.huge.nmalloc = huge_nmalloc;
+ ctl_stats.huge.ndalloc = huge_ndalloc;
+ malloc_mutex_unlock(&huge_mtx);
+#endif
+
+ /*
+ * Clear sum stats, since they will be merged into by
+ * ctl_arena_refresh().
+ */
+ ctl_arena_clear(&ctl_stats.arenas[narenas]);
+
+ malloc_mutex_lock(&arenas_lock);
+ memcpy(tarenas, arenas, sizeof(arena_t *) * narenas);
+ malloc_mutex_unlock(&arenas_lock);
+ for (i = 0; i < narenas; i++) {
+ bool initialized = (tarenas[i] != NULL);
+
+ ctl_stats.arenas[i].initialized = initialized;
+ if (initialized)
+ ctl_arena_refresh(tarenas[i], i);
+ }
+
+#ifdef JEMALLOC_STATS
+ ctl_stats.allocated = ctl_stats.arenas[narenas].allocated_small
+ + ctl_stats.arenas[narenas].astats.allocated_large
+ + ctl_stats.huge.allocated;
+ ctl_stats.active = (ctl_stats.arenas[narenas].pactive << PAGE_SHIFT)
+ + ctl_stats.huge.allocated;
+ ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk);
+
+# ifdef JEMALLOC_SWAP
+ malloc_mutex_lock(&swap_mtx);
+ ctl_stats.swap_avail = swap_avail;
+ malloc_mutex_unlock(&swap_mtx);
+# endif
+#endif
+
+ ctl_epoch++;
+}
+
+static bool
+ctl_init(void)
+{
+
+ if (ctl_initialized == false) {
+#ifdef JEMALLOC_STATS
+ unsigned i;
+#endif
+
+ /*
+ * Allocate space for one extra arena stats element, which
+ * contains summed stats across all arenas.
+ */
+ ctl_stats.arenas = (ctl_arena_stats_t *)base_alloc(
+ (narenas + 1) * sizeof(ctl_arena_stats_t));
+ if (ctl_stats.arenas == NULL)
+ return (true);
+ memset(ctl_stats.arenas, 0, (narenas + 1) *
+ sizeof(ctl_arena_stats_t));
+
+ /*
+ * Initialize all stats structures, regardless of whether they
+ * ever get used. Lazy initialization would allow errors to
+ * cause inconsistent state to be viewable by the application.
+ */
+#ifdef JEMALLOC_STATS
+ for (i = 0; i <= narenas; i++) {
+ if (ctl_arena_init(&ctl_stats.arenas[i]))
+ return (true);
+ }
+#endif
+ ctl_stats.arenas[narenas].initialized = true;
+
+ ctl_epoch = 0;
+ ctl_refresh();
+ ctl_initialized = true;
+ }
+
+ return (false);
+}
+
+static int
+ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
+ size_t *depthp)
+{
+ int ret;
+ const char *elm, *tdot, *dot;
+ size_t elen, i, j;
+ const ctl_node_t *node;
+
+ elm = name;
+ /* Equivalent to strchrnul(). */
+ dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot : strchr(elm, '\0');
+ elen = (size_t)((uintptr_t)dot - (uintptr_t)elm);
+ if (elen == 0) {
+ ret = ENOENT;
+ goto RETURN;
+ }
+ node = super_root_node;
+ for (i = 0; i < *depthp; i++) {
+ assert(node->named);
+ assert(node->u.named.nchildren > 0);
+ if (node->u.named.children[0].named) {
+ const ctl_node_t *pnode = node;
+
+ /* Children are named. */
+ for (j = 0; j < node->u.named.nchildren; j++) {
+ const ctl_node_t *child =
+ &node->u.named.children[j];
+ if (strlen(child->u.named.name) == elen
+ && strncmp(elm, child->u.named.name,
+ elen) == 0) {
+ node = child;
+ if (nodesp != NULL)
+ nodesp[i] = node;
+ mibp[i] = j;
+ break;
+ }
+ }
+ if (node == pnode) {
+ ret = ENOENT;
+ goto RETURN;
+ }
+ } else {
+ unsigned long index;
+ const ctl_node_t *inode;
+
+ /* Children are indexed. */
+ index = strtoul(elm, NULL, 10);
+ if (index == ULONG_MAX) {
+ ret = ENOENT;
+ goto RETURN;
+ }
+
+ inode = &node->u.named.children[0];
+ node = inode->u.indexed.index(mibp, *depthp,
+ index);
+ if (node == NULL) {
+ ret = ENOENT;
+ goto RETURN;
+ }
+
+ if (nodesp != NULL)
+ nodesp[i] = node;
+ mibp[i] = (size_t)index;
+ }
+
+ if (node->ctl != NULL) {
+ /* Terminal node. */
+ if (*dot != '\0') {
+ /*
+ * The name contains more elements than are
+ * in this path through the tree.
+ */
+ ret = ENOENT;
+ goto RETURN;
+ }
+ /* Complete lookup successful. */
+ *depthp = i + 1;
+ break;
+ }
+
+ /* Update elm. */
+ if (*dot == '\0') {
+ /* No more elements. */
+ ret = ENOENT;
+ goto RETURN;
+ }
+ elm = &dot[1];
+ dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot :
+ strchr(elm, '\0');
+ elen = (size_t)((uintptr_t)dot - (uintptr_t)elm);
+ }
+
+ ret = 0;
+RETURN:
+ return (ret);
+}
+
+int
+ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
+ size_t newlen)
+{
+ int ret;
+ size_t depth;
+ ctl_node_t const *nodes[CTL_MAX_DEPTH];
+ size_t mib[CTL_MAX_DEPTH];
+
+ malloc_mutex_lock(&ctl_mtx);
+ if (ctl_init()) {
+ ret = EAGAIN;
+ goto RETURN;
+ }
+
+ depth = CTL_MAX_DEPTH;
+ ret = ctl_lookup(name, nodes, mib, &depth);
+ if (ret != 0)
+ goto RETURN;
+
+ if (nodes[depth-1]->ctl == NULL) {
+ /* The name refers to a partial path through the ctl tree. */
+ ret = ENOENT;
+ goto RETURN;
+ }
+ ret = nodes[depth-1]->ctl(mib, depth, oldp, oldlenp, newp, newlen);
+
+RETURN:
+ malloc_mutex_unlock(&ctl_mtx);
+ return(ret);
+}
+
+int
+ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp)
+{
+ int ret;
+
+ malloc_mutex_lock(&ctl_mtx);
+ if (ctl_init()) {
+ ret = EAGAIN;
+ goto RETURN;
+ }
+
+ ret = ctl_lookup(name, NULL, mibp, miblenp);
+
+RETURN:
+ malloc_mutex_unlock(&ctl_mtx);
+ return(ret);
+}
+
+int
+ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen)
+{
+ int ret;
+ const ctl_node_t *node;
+ size_t i;
+
+ malloc_mutex_lock(&ctl_mtx);
+ if (ctl_init()) {
+ ret = EAGAIN;
+ goto RETURN;
+ }
+
+ /* Iterate down the tree. */
+ node = super_root_node;
+ for (i = 0; i < miblen; i++) {
+ if (node->u.named.children[0].named) {
+ /* Children are named. */
+ if (node->u.named.nchildren <= mib[i]) {
+ ret = ENOENT;
+ goto RETURN;
+ }
+ node = &node->u.named.children[mib[i]];
+ } else {
+ const ctl_node_t *inode;
+
+ /* Indexed element. */
+ inode = &node->u.named.children[0];
+ node = inode->u.indexed.index(mib, miblen, mib[i]);
+ if (node == NULL) {
+ ret = ENOENT;
+ goto RETURN;
+ }
+ }
+ }
+
+ /* Call the ctl function. */
+ if (node->ctl == NULL) {
+ /* Partial MIB. */
+ ret = ENOENT;
+ goto RETURN;
+ }
+ ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen);
+
+RETURN:
+ malloc_mutex_unlock(&ctl_mtx);
+ return(ret);
+}
+
+bool
+ctl_boot(void)
+{
+
+ if (malloc_mutex_init(&ctl_mtx))
+ return (true);
+
+ ctl_initialized = false;
+
+ return (false);
+}
+
+/******************************************************************************/
+/* *_ctl() functions. */
+
+#define READONLY() do { \
+ if (newp != NULL || newlen != 0) { \
+ ret = EPERM; \
+ goto RETURN; \
+ } \
+} while (0)
+
+#define WRITEONLY() do { \
+ if (oldp != NULL || oldlenp != NULL) { \
+ ret = EPERM; \
+ goto RETURN; \
+ } \
+} while (0)
+
+#define VOID() do { \
+ READONLY(); \
+ WRITEONLY(); \
+} while (0)
+
+#define READ(v, t) do { \
+ if (oldp != NULL && oldlenp != NULL) { \
+ if (*oldlenp != sizeof(t)) { \
+ size_t copylen = (sizeof(t) <= *oldlenp) \
+ ? sizeof(t) : *oldlenp; \
+ memcpy(oldp, (void *)&v, copylen); \
+ ret = EINVAL; \
+ goto RETURN; \
+ } else \
+ *(t *)oldp = v; \
+ } \
+} while (0)
+
+#define WRITE(v, t) do { \
+ if (newp != NULL) { \
+ if (newlen != sizeof(t)) { \
+ ret = EINVAL; \
+ goto RETURN; \
+ } \
+ v = *(t *)newp; \
+ } \
+} while (0)
+
+#define CTL_RO_GEN(n, v, t) \
+static int \
+n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \
+ void *newp, size_t newlen) \
+{ \
+ int ret; \
+ t oldval; \
+ \
+ READONLY(); \
+ oldval = v; \
+ READ(oldval, t); \
+ \
+ ret = 0; \
+RETURN: \
+ return (ret); \
+}
+
+#define CTL_RO_TRUE_GEN(n) \
+static int \
+n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \
+ void *newp, size_t newlen) \
+{ \
+ int ret; \
+ bool oldval; \
+ \
+ READONLY(); \
+ oldval = true; \
+ READ(oldval, bool); \
+ \
+ ret = 0; \
+RETURN: \
+ return (ret); \
+}
+
+#define CTL_RO_FALSE_GEN(n) \
+static int \
+n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \
+ void *newp, size_t newlen) \
+{ \
+ int ret; \
+ bool oldval; \
+ \
+ READONLY(); \
+ oldval = false; \
+ READ(oldval, bool); \
+ \
+ ret = 0; \
+RETURN: \
+ return (ret); \
+}
+
+CTL_RO_GEN(version, JEMALLOC_VERSION, const char *)
+
+static int
+epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen)
+{
+ int ret;
+ uint64_t newval;
+
+ newval = 0;
+ WRITE(newval, uint64_t);
+ if (newval != 0)
+ ctl_refresh();
+ READ(ctl_epoch, uint64_t);
+
+ ret = 0;
+RETURN:
+ return (ret);
+}
+
+#ifdef JEMALLOC_TCACHE
+static int
+tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen)
+{
+ int ret;
+ tcache_t *tcache;
+
+ VOID();
+
+ tcache = tcache_tls;
+ if (tcache == NULL) {
+ ret = 0;
+ goto RETURN;
+ }
+ tcache_destroy(tcache);
+ tcache_tls = NULL;
+
+ ret = 0;
+RETURN:
+ return (ret);
+}
+#endif
+
+/******************************************************************************/
+
+#ifdef JEMALLOC_DEBUG
+CTL_RO_TRUE_GEN(config_debug)
+#else
+CTL_RO_FALSE_GEN(config_debug)
+#endif
+
+#ifdef JEMALLOC_DSS
+CTL_RO_TRUE_GEN(config_dss)
+#else
+CTL_RO_FALSE_GEN(config_dss)
+#endif
+
+#ifdef JEMALLOC_DYNAMIC_PAGE_SHIFT
+CTL_RO_TRUE_GEN(config_dynamic_page_shift)
+#else
+CTL_RO_FALSE_GEN(config_dynamic_page_shift)
+#endif
+
+#ifdef JEMALLOC_FILL
+CTL_RO_TRUE_GEN(config_fill)
+#else
+CTL_RO_FALSE_GEN(config_fill)
+#endif
+
+#ifdef JEMALLOC_LAZY_LOCK
+CTL_RO_TRUE_GEN(config_lazy_lock)
+#else
+CTL_RO_FALSE_GEN(config_lazy_lock)
+#endif
+
+#ifdef JEMALLOC_PROF
+CTL_RO_TRUE_GEN(config_prof)
+#else
+CTL_RO_FALSE_GEN(config_prof)
+#endif
+
+#ifdef JEMALLOC_PROF_LIBGCC
+CTL_RO_TRUE_GEN(config_prof_libgcc)
+#else
+CTL_RO_FALSE_GEN(config_prof_libgcc)
+#endif
+
+#ifdef JEMALLOC_PROF_LIBUNWIND
+CTL_RO_TRUE_GEN(config_prof_libunwind)
+#else
+CTL_RO_FALSE_GEN(config_prof_libunwind)
+#endif
+
+#ifdef JEMALLOC_STATS
+CTL_RO_TRUE_GEN(config_stats)
+#else
+CTL_RO_FALSE_GEN(config_stats)
+#endif
+
+#ifdef JEMALLOC_SWAP
+CTL_RO_TRUE_GEN(config_swap)
+#else
+CTL_RO_FALSE_GEN(config_swap)
+#endif
+
+#ifdef JEMALLOC_SYSV
+CTL_RO_TRUE_GEN(config_sysv)
+#else
+CTL_RO_FALSE_GEN(config_sysv)
+#endif
+
+#ifdef JEMALLOC_TCACHE
+CTL_RO_TRUE_GEN(config_tcache)
+#else
+CTL_RO_FALSE_GEN(config_tcache)
+#endif
+
+#ifdef JEMALLOC_TINY
+CTL_RO_TRUE_GEN(config_tiny)
+#else
+CTL_RO_FALSE_GEN(config_tiny)
+#endif
+
+#ifdef JEMALLOC_TLS
+CTL_RO_TRUE_GEN(config_tls)
+#else
+CTL_RO_FALSE_GEN(config_tls)
+#endif
+
+#ifdef JEMALLOC_XMALLOC
+CTL_RO_TRUE_GEN(config_xmalloc)
+#else
+CTL_RO_FALSE_GEN(config_xmalloc)
+#endif
+
+/******************************************************************************/
+
+CTL_RO_GEN(opt_abort, opt_abort, bool)
+#ifdef JEMALLOC_FILL
+CTL_RO_GEN(opt_junk, opt_junk, bool)
+#endif
+#ifdef JEMALLOC_SYSV
+CTL_RO_GEN(opt_sysv, opt_sysv, bool)
+#endif
+#ifdef JEMALLOC_XMALLOC
+CTL_RO_GEN(opt_xmalloc, opt_xmalloc, bool)
+#endif
+#ifdef JEMALLOC_ZERO
+CTL_RO_GEN(opt_zero, opt_zero, bool)
+#endif
+#ifdef JEMALLOC_TCACHE
+CTL_RO_GEN(opt_tcache, opt_tcache, bool)
+CTL_RO_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t)
+#endif
+#ifdef JEMALLOC_PROF
+CTL_RO_GEN(opt_prof, opt_prof, bool)
+CTL_RO_GEN(opt_prof_active, opt_prof_active, bool)
+CTL_RO_GEN(opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t)
+CTL_RO_GEN(opt_lg_prof_sample, opt_lg_prof_sample, size_t)
+CTL_RO_GEN(opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
+CTL_RO_GEN(opt_prof_udump, opt_prof_udump, bool)
+CTL_RO_GEN(opt_prof_leak, opt_prof_leak, bool)
+#endif
+CTL_RO_GEN(opt_stats_print, opt_stats_print, bool)
+CTL_RO_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t)
+CTL_RO_GEN(opt_lg_cspace_max, opt_lg_cspace_max, size_t)
+CTL_RO_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t)
+CTL_RO_GEN(opt_lg_chunk, opt_lg_chunk, size_t)
+#ifdef JEMALLOC_SWAP
+CTL_RO_GEN(opt_overcommit, opt_overcommit, bool)
+#endif
+
+/******************************************************************************/
+
+CTL_RO_GEN(arenas_bin_i_size, arenas[0]->bins[mib[2]].reg_size, size_t)
+CTL_RO_GEN(arenas_bin_i_nregs, arenas[0]->bins[mib[2]].nregs, uint32_t)
+CTL_RO_GEN(arenas_bin_i_run_size, arenas[0]->bins[mib[2]].run_size, size_t)
+const ctl_node_t *
+arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
+{
+
+ if (i > nbins)
+ return (NULL);
+ return (super_arenas_bin_i_node);
+}
+
+CTL_RO_GEN(arenas_lrun_i_size, ((mib[2]+1) << PAGE_SHIFT), size_t)
+const ctl_node_t *
+arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i)
+{
+
+ if (i > nlclasses)
+ return (NULL);
+ return (super_arenas_lrun_i_node);
+}
+
+CTL_RO_GEN(arenas_narenas, narenas, unsigned)
+
+static int
+arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen)
+{
+ int ret;
+ unsigned nread, i;
+
+ READONLY();
+ if (*oldlenp != narenas * sizeof(bool)) {
+ ret = EINVAL;
+ nread = (*oldlenp < narenas * sizeof(bool))
+ ? (*oldlenp / sizeof(bool)) : narenas;
+ } else {
+ ret = 0;
+ nread = narenas;
+ }
+
+ for (i = 0; i < nread; i++)
+ ((bool *)oldp)[i] = ctl_stats.arenas[i].initialized;
+
+RETURN:
+ return (ret);
+}
+
+CTL_RO_GEN(arenas_quantum, QUANTUM, size_t)
+CTL_RO_GEN(arenas_cacheline, CACHELINE, size_t)
+CTL_RO_GEN(arenas_subpage, SUBPAGE, size_t)
+CTL_RO_GEN(arenas_pagesize, PAGE_SIZE, size_t)
+CTL_RO_GEN(arenas_chunksize, chunksize, size_t)
+#ifdef JEMALLOC_TINY
+CTL_RO_GEN(arenas_tspace_min, (1U << LG_TINY_MIN), size_t)
+CTL_RO_GEN(arenas_tspace_max, (qspace_min >> 1), size_t)
+#endif
+CTL_RO_GEN(arenas_qspace_min, qspace_min, size_t)
+CTL_RO_GEN(arenas_qspace_max, qspace_max, size_t)
+CTL_RO_GEN(arenas_cspace_min, cspace_min, size_t)
+CTL_RO_GEN(arenas_cspace_max, cspace_max, size_t)
+CTL_RO_GEN(arenas_sspace_min, sspace_min, size_t)
+CTL_RO_GEN(arenas_sspace_max, sspace_max, size_t)
+#ifdef JEMALLOC_TCACHE
+CTL_RO_GEN(arenas_tcache_max, tcache_maxclass, size_t)
+#endif
+CTL_RO_GEN(arenas_ntbins, ntbins, unsigned)
+CTL_RO_GEN(arenas_nqbins, nqbins, unsigned)
+CTL_RO_GEN(arenas_ncbins, ncbins, unsigned)
+CTL_RO_GEN(arenas_nsbins, nsbins, unsigned)
+CTL_RO_GEN(arenas_nbins, nbins, unsigned)
+#ifdef JEMALLOC_TCACHE
+CTL_RO_GEN(arenas_nhbins, nhbins, unsigned)
+#endif
+CTL_RO_GEN(arenas_nlruns, nlclasses, size_t)
+
+/******************************************************************************/
+
+#ifdef JEMALLOC_PROF
+static int
+prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen)
+{
+ int ret;
+ bool oldval;
+
+ oldval = opt_prof_active;
+ if (newp != NULL) {
+ /*
+ * The memory barriers will tend to make opt_prof_active
+ * propagate faster on systems with weak memory ordering.
+ */
+ mb_write();
+ WRITE(opt_prof_active, bool);
+ mb_write();
+ }
+ READ(oldval, bool);
+
+ ret = 0;
+RETURN:
+ return (ret);
+}
+
+static int
+prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen)
+{
+ int ret;
+ const char *filename = NULL;
+
+ WRITEONLY();
+ WRITE(filename, const char *);
+
+ if (prof_mdump(filename)) {
+ ret = EFAULT;
+ goto RETURN;
+ }
+
+ ret = 0;
+RETURN:
+ return (ret);
+}
+
+CTL_RO_GEN(prof_interval, prof_interval, uint64_t)
+#endif
+
+/******************************************************************************/
+
+#ifdef JEMALLOC_STATS
+CTL_RO_GEN(stats_chunks_current, ctl_stats.chunks.current, size_t)
+CTL_RO_GEN(stats_chunks_total, ctl_stats.chunks.total, uint64_t)
+CTL_RO_GEN(stats_chunks_high, ctl_stats.chunks.high, size_t)
+CTL_RO_GEN(stats_huge_allocated, huge_allocated, size_t)
+CTL_RO_GEN(stats_huge_nmalloc, huge_nmalloc, uint64_t)
+CTL_RO_GEN(stats_huge_ndalloc, huge_ndalloc, uint64_t)
+CTL_RO_GEN(stats_arenas_i_small_allocated,
+ ctl_stats.arenas[mib[2]].allocated_small, size_t)
+CTL_RO_GEN(stats_arenas_i_small_nmalloc,
+ ctl_stats.arenas[mib[2]].nmalloc_small, uint64_t)
+CTL_RO_GEN(stats_arenas_i_small_ndalloc,
+ ctl_stats.arenas[mib[2]].ndalloc_small, uint64_t)
+CTL_RO_GEN(stats_arenas_i_small_nrequests,
+ ctl_stats.arenas[mib[2]].nrequests_small, uint64_t)
+CTL_RO_GEN(stats_arenas_i_large_allocated,
+ ctl_stats.arenas[mib[2]].astats.allocated_large, size_t)
+CTL_RO_GEN(stats_arenas_i_large_nmalloc,
+ ctl_stats.arenas[mib[2]].astats.nmalloc_large, uint64_t)
+CTL_RO_GEN(stats_arenas_i_large_ndalloc,
+ ctl_stats.arenas[mib[2]].astats.ndalloc_large, uint64_t)
+CTL_RO_GEN(stats_arenas_i_large_nrequests,
+ ctl_stats.arenas[mib[2]].astats.nrequests_large, uint64_t)
+
+CTL_RO_GEN(stats_arenas_i_bins_j_allocated,
+ ctl_stats.arenas[mib[2]].bstats[mib[4]].allocated, size_t)
+CTL_RO_GEN(stats_arenas_i_bins_j_nmalloc,
+ ctl_stats.arenas[mib[2]].bstats[mib[4]].nmalloc, uint64_t)
+CTL_RO_GEN(stats_arenas_i_bins_j_ndalloc,
+ ctl_stats.arenas[mib[2]].bstats[mib[4]].ndalloc, uint64_t)
+CTL_RO_GEN(stats_arenas_i_bins_j_nrequests,
+ ctl_stats.arenas[mib[2]].bstats[mib[4]].nrequests, uint64_t)
+#ifdef JEMALLOC_TCACHE
+CTL_RO_GEN(stats_arenas_i_bins_j_nfills,
+ ctl_stats.arenas[mib[2]].bstats[mib[4]].nfills, uint64_t)
+CTL_RO_GEN(stats_arenas_i_bins_j_nflushes,
+ ctl_stats.arenas[mib[2]].bstats[mib[4]].nflushes, uint64_t)
+#endif
+CTL_RO_GEN(stats_arenas_i_bins_j_nruns,
+ ctl_stats.arenas[mib[2]].bstats[mib[4]].nruns, uint64_t)
+CTL_RO_GEN(stats_arenas_i_bins_j_nreruns,
+ ctl_stats.arenas[mib[2]].bstats[mib[4]].reruns, uint64_t)
+CTL_RO_GEN(stats_arenas_i_bins_j_highruns,
+ ctl_stats.arenas[mib[2]].bstats[mib[4]].highruns, size_t)
+CTL_RO_GEN(stats_arenas_i_bins_j_curruns,
+ ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t)
+
+const ctl_node_t *
+stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j)
+{
+
+ if (j > nbins)
+ return (NULL);
+ return (super_stats_arenas_i_bins_j_node);
+}
+
+CTL_RO_GEN(stats_arenas_i_lruns_j_nmalloc,
+ ctl_stats.arenas[mib[2]].lstats[mib[4]].nmalloc, uint64_t)
+CTL_RO_GEN(stats_arenas_i_lruns_j_ndalloc,
+ ctl_stats.arenas[mib[2]].lstats[mib[4]].ndalloc, uint64_t)
+CTL_RO_GEN(stats_arenas_i_lruns_j_nrequests,
+ ctl_stats.arenas[mib[2]].lstats[mib[4]].nrequests, uint64_t)
+CTL_RO_GEN(stats_arenas_i_lruns_j_curruns,
+ ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t)
+CTL_RO_GEN(stats_arenas_i_lruns_j_highruns,
+ ctl_stats.arenas[mib[2]].lstats[mib[4]].highruns, size_t)
+
+const ctl_node_t *
+stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j)
+{
+
+ if (j > nlclasses)
+ return (NULL);
+ return (super_stats_arenas_i_lruns_j_node);
+}
+
+#endif
+CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
+CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
+#ifdef JEMALLOC_STATS
+CTL_RO_GEN(stats_arenas_i_mapped, ctl_stats.arenas[mib[2]].astats.mapped,
+ size_t)
+CTL_RO_GEN(stats_arenas_i_npurge, ctl_stats.arenas[mib[2]].astats.npurge,
+ uint64_t)
+CTL_RO_GEN(stats_arenas_i_nmadvise, ctl_stats.arenas[mib[2]].astats.nmadvise,
+ uint64_t)
+CTL_RO_GEN(stats_arenas_i_purged, ctl_stats.arenas[mib[2]].astats.purged,
+ uint64_t)
+#endif
+
+const ctl_node_t *
+stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i)
+{
+
+ if (ctl_stats.arenas[i].initialized == false)
+ return (NULL);
+ return (super_stats_arenas_i_node);
+}
+
+#ifdef JEMALLOC_STATS
+CTL_RO_GEN(stats_allocated, ctl_stats.allocated, size_t)
+CTL_RO_GEN(stats_active, ctl_stats.active, size_t)
+CTL_RO_GEN(stats_mapped, ctl_stats.mapped, size_t)
+#endif
+
+/******************************************************************************/
+
+#ifdef JEMALLOC_SWAP
+# ifdef JEMALLOC_STATS
+CTL_RO_GEN(swap_avail, ctl_stats.swap_avail, size_t)
+# endif
+
+static int
+swap_prezeroed_ctl(const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen)
+{
+ int ret;
+
+ if (swap_enabled) {
+ READONLY();
+ } else {
+ /*
+ * swap_prezeroed isn't actually used by the swap code until it
+ * is set during a successful chunk_swap_enabled() call. We
+ * use it here to store the value that we'll pass to
+ * chunk_swap_enable() in a swap.fds mallctl(). This is not
+ * very clean, but the obvious alternatives are even worse.
+ */
+ WRITE(swap_prezeroed, bool);
+ }
+
+ READ(swap_prezeroed, bool);
+
+ ret = 0;
+RETURN:
+ return (ret);
+}
+
+CTL_RO_GEN(swap_nfds, swap_nfds, size_t)
+
+static int
+swap_fds_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen)
+{
+ int ret;
+
+ if (swap_enabled) {
+ READONLY();
+ } else if (newp != NULL) {
+ size_t nfds = newlen / sizeof(int);
+
+ {
+ int fds[nfds];
+
+ memcpy(fds, newp, nfds * sizeof(int));
+ if (chunk_swap_enable(fds, nfds, swap_prezeroed)) {
+ ret = EFAULT;
+ goto RETURN;
+ }
+ }
+ }
+
+ if (oldp != NULL && oldlenp != NULL) {
+ if (*oldlenp != swap_nfds * sizeof(int)) {
+ size_t copylen = (swap_nfds * sizeof(int) <= *oldlenp)
+ ? swap_nfds * sizeof(int) : *oldlenp;
+
+ memcpy(oldp, swap_fds, copylen);
+ ret = EINVAL;
+ goto RETURN;
+ } else
+ memcpy(oldp, swap_fds, *oldlenp);
+ }
+
+ ret = 0;
+RETURN:
+ return (ret);
+}
+#endif
diff --git a/dep/src/jmalloc/extent.c b/dep/src/jmalloc/extent.c
new file mode 100644
index 00000000000..3c04d3aa5d1
--- /dev/null
+++ b/dep/src/jmalloc/extent.c
@@ -0,0 +1,41 @@
+#define JEMALLOC_EXTENT_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+
+#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS))
+static inline int
+extent_szad_comp(extent_node_t *a, extent_node_t *b)
+{
+ int ret;
+ size_t a_size = a->size;
+ size_t b_size = b->size;
+
+ ret = (a_size > b_size) - (a_size < b_size);
+ if (ret == 0) {
+ uintptr_t a_addr = (uintptr_t)a->addr;
+ uintptr_t b_addr = (uintptr_t)b->addr;
+
+ ret = (a_addr > b_addr) - (a_addr < b_addr);
+ }
+
+ return (ret);
+}
+
+/* Generate red-black tree functions. */
+rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, link_szad,
+ extent_szad_comp)
+#endif
+
+static inline int
+extent_ad_comp(extent_node_t *a, extent_node_t *b)
+{
+ uintptr_t a_addr = (uintptr_t)a->addr;
+ uintptr_t b_addr = (uintptr_t)b->addr;
+
+ return ((a_addr > b_addr) - (a_addr < b_addr));
+}
+
+/* Generate red-black tree functions. */
+rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad,
+ extent_ad_comp)
diff --git a/dep/src/jmalloc/hash.c b/dep/src/jmalloc/hash.c
new file mode 100644
index 00000000000..6a13d7a03c0
--- /dev/null
+++ b/dep/src/jmalloc/hash.c
@@ -0,0 +1,2 @@
+#define HASH_C_
+#include "jemalloc/internal/jemalloc_internal.h"
diff --git a/dep/src/jmalloc/huge.c b/dep/src/jmalloc/huge.c
new file mode 100644
index 00000000000..d35aa5cdd00
--- /dev/null
+++ b/dep/src/jmalloc/huge.c
@@ -0,0 +1,298 @@
+#define JEMALLOC_HUGE_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+#ifdef JEMALLOC_STATS
+uint64_t huge_nmalloc;
+uint64_t huge_ndalloc;
+size_t huge_allocated;
+#endif
+
+malloc_mutex_t huge_mtx;
+
+/******************************************************************************/
+
+/* Tree of chunks that are stand-alone huge allocations. */
+static extent_tree_t huge;
+
+void *
+huge_malloc(size_t size, bool zero)
+{
+ void *ret;
+ size_t csize;
+ extent_node_t *node;
+
+ /* Allocate one or more contiguous chunks for this request. */
+
+ csize = CHUNK_CEILING(size);
+ if (csize == 0) {
+ /* size is large enough to cause size_t wrap-around. */
+ return (NULL);
+ }
+
+ /* Allocate an extent node with which to track the chunk. */
+ node = base_node_alloc();
+ if (node == NULL)
+ return (NULL);
+
+ ret = chunk_alloc(csize, &zero);
+ if (ret == NULL) {
+ base_node_dealloc(node);
+ return (NULL);
+ }
+
+ /* Insert node into huge. */
+ node->addr = ret;
+ node->size = csize;
+
+ malloc_mutex_lock(&huge_mtx);
+ extent_tree_ad_insert(&huge, node);
+#ifdef JEMALLOC_STATS
+ huge_nmalloc++;
+ huge_allocated += csize;
+#endif
+ malloc_mutex_unlock(&huge_mtx);
+
+#ifdef JEMALLOC_FILL
+ if (zero == false) {
+ if (opt_junk)
+ memset(ret, 0xa5, csize);
+ else if (opt_zero)
+ memset(ret, 0, csize);
+ }
+#endif
+
+ return (ret);
+}
+
+/* Only handles large allocations that require more than chunk alignment. */
+void *
+huge_palloc(size_t alignment, size_t size)
+{
+ void *ret;
+ size_t alloc_size, chunk_size, offset;
+ extent_node_t *node;
+ bool zero;
+
+ /*
+ * This allocation requires alignment that is even larger than chunk
+ * alignment. This means that huge_malloc() isn't good enough.
+ *
+ * Allocate almost twice as many chunks as are demanded by the size or
+ * alignment, in order to assure the alignment can be achieved, then
+ * unmap leading and trailing chunks.
+ */
+ assert(alignment >= chunksize);
+
+ chunk_size = CHUNK_CEILING(size);
+
+ if (size >= alignment)
+ alloc_size = chunk_size + alignment - chunksize;
+ else
+ alloc_size = (alignment << 1) - chunksize;
+
+ /* Allocate an extent node with which to track the chunk. */
+ node = base_node_alloc();
+ if (node == NULL)
+ return (NULL);
+
+ zero = false;
+ ret = chunk_alloc(alloc_size, &zero);
+ if (ret == NULL) {
+ base_node_dealloc(node);
+ return (NULL);
+ }
+
+ offset = (uintptr_t)ret & (alignment - 1);
+ assert((offset & chunksize_mask) == 0);
+ assert(offset < alloc_size);
+ if (offset == 0) {
+ /* Trim trailing space. */
+ chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
+ - chunk_size);
+ } else {
+ size_t trailsize;
+
+ /* Trim leading space. */
+ chunk_dealloc(ret, alignment - offset);
+
+ ret = (void *)((uintptr_t)ret + (alignment - offset));
+
+ trailsize = alloc_size - (alignment - offset) - chunk_size;
+ if (trailsize != 0) {
+ /* Trim trailing space. */
+ assert(trailsize < alloc_size);
+ chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
+ trailsize);
+ }
+ }
+
+ /* Insert node into huge. */
+ node->addr = ret;
+ node->size = chunk_size;
+
+ malloc_mutex_lock(&huge_mtx);
+ extent_tree_ad_insert(&huge, node);
+#ifdef JEMALLOC_STATS
+ huge_nmalloc++;
+ huge_allocated += chunk_size;
+#endif
+ malloc_mutex_unlock(&huge_mtx);
+
+#ifdef JEMALLOC_FILL
+ if (opt_junk)
+ memset(ret, 0xa5, chunk_size);
+ else if (opt_zero)
+ memset(ret, 0, chunk_size);
+#endif
+
+ return (ret);
+}
+
+void *
+huge_ralloc(void *ptr, size_t size, size_t oldsize)
+{
+ void *ret;
+ size_t copysize;
+
+ /* Avoid moving the allocation if the size class would not change. */
+ if (oldsize > arena_maxclass &&
+ CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
+#ifdef JEMALLOC_FILL
+ if (opt_junk && size < oldsize) {
+ memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
+ - size);
+ } else if (opt_zero && size > oldsize) {
+ memset((void *)((uintptr_t)ptr + oldsize), 0, size
+ - oldsize);
+ }
+#endif
+ return (ptr);
+ }
+
+ /*
+ * If we get here, then size and oldsize are different enough that we
+ * need to use a different size class. In that case, fall back to
+ * allocating new space and copying.
+ */
+ ret = huge_malloc(size, false);
+ if (ret == NULL)
+ return (NULL);
+
+ copysize = (size < oldsize) ? size : oldsize;
+ memcpy(ret, ptr, copysize);
+ idalloc(ptr);
+ return (ret);
+}
+
+void
+huge_dalloc(void *ptr)
+{
+ extent_node_t *node, key;
+
+ malloc_mutex_lock(&huge_mtx);
+
+ /* Extract from tree of huge allocations. */
+ key.addr = ptr;
+ node = extent_tree_ad_search(&huge, &key);
+ assert(node != NULL);
+ assert(node->addr == ptr);
+ extent_tree_ad_remove(&huge, node);
+
+#ifdef JEMALLOC_STATS
+ huge_ndalloc++;
+ huge_allocated -= node->size;
+#endif
+
+ malloc_mutex_unlock(&huge_mtx);
+
+ /* Unmap chunk. */
+#ifdef JEMALLOC_FILL
+#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS))
+ if (opt_junk)
+ memset(node->addr, 0x5a, node->size);
+#endif
+#endif
+ chunk_dealloc(node->addr, node->size);
+
+ base_node_dealloc(node);
+}
+
+size_t
+huge_salloc(const void *ptr)
+{
+ size_t ret;
+ extent_node_t *node, key;
+
+ malloc_mutex_lock(&huge_mtx);
+
+ /* Extract from tree of huge allocations. */
+ key.addr = __DECONST(void *, ptr);
+ node = extent_tree_ad_search(&huge, &key);
+ assert(node != NULL);
+
+ ret = node->size;
+
+ malloc_mutex_unlock(&huge_mtx);
+
+ return (ret);
+}
+
+#ifdef JEMALLOC_PROF
+prof_thr_cnt_t *
+huge_prof_cnt_get(const void *ptr)
+{
+ prof_thr_cnt_t *ret;
+ extent_node_t *node, key;
+
+ malloc_mutex_lock(&huge_mtx);
+
+ /* Extract from tree of huge allocations. */
+ key.addr = __DECONST(void *, ptr);
+ node = extent_tree_ad_search(&huge, &key);
+ assert(node != NULL);
+
+ ret = node->prof_cnt;
+
+ malloc_mutex_unlock(&huge_mtx);
+
+ return (ret);
+}
+
+void
+huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
+{
+ extent_node_t *node, key;
+
+ malloc_mutex_lock(&huge_mtx);
+
+ /* Extract from tree of huge allocations. */
+ key.addr = __DECONST(void *, ptr);
+ node = extent_tree_ad_search(&huge, &key);
+ assert(node != NULL);
+
+ node->prof_cnt = cnt;
+
+ malloc_mutex_unlock(&huge_mtx);
+}
+#endif
+
+bool
+huge_boot(void)
+{
+
+ /* Initialize chunks data. */
+ if (malloc_mutex_init(&huge_mtx))
+ return (true);
+ extent_tree_ad_new(&huge);
+
+#ifdef JEMALLOC_STATS
+ huge_nmalloc = 0;
+ huge_ndalloc = 0;
+ huge_allocated = 0;
+#endif
+
+ return (false);
+}
diff --git a/dep/src/jmalloc/jemalloc.c b/dep/src/jmalloc/jemalloc.c
new file mode 100644
index 00000000000..e01de0d5066
--- /dev/null
+++ b/dep/src/jmalloc/jemalloc.c
@@ -0,0 +1,1349 @@
+/*-
+ * This allocator implementation is designed to provide scalable performance
+ * for multi-threaded programs on multi-processor systems. The following
+ * features are included for this purpose:
+ *
+ * + Multiple arenas are used if there are multiple CPUs, which reduces lock
+ * contention and cache sloshing.
+ *
+ * + Thread-specific caching is used if there are multiple threads, which
+ * reduces the amount of locking.
+ *
+ * + Cache line sharing between arenas is avoided for internal data
+ * structures.
+ *
+ * + Memory is managed in chunks and runs (chunks can be split into runs),
+ * rather than as individual pages. This provides a constant-time
+ * mechanism for associating allocations with particular arenas.
+ *
+ * Allocation requests are rounded up to the nearest size class, and no record
+ * of the original request size is maintained. Allocations are broken into
+ * categories according to size class. Assuming 1 MiB chunks, 4 KiB pages and
+ * a 16 byte quantum on a 32-bit system, the size classes in each category are
+ * as follows:
+ *
+ * |========================================|
+ * | Category | Subcategory | Size |
+ * |========================================|
+ * | Small | Tiny | 2 |
+ * | | | 4 |
+ * | | | 8 |
+ * | |------------------+----------|
+ * | | Quantum-spaced | 16 |
+ * | | | 32 |
+ * | | | 48 |
+ * | | | ... |
+ * | | | 96 |
+ * | | | 112 |
+ * | | | 128 |
+ * | |------------------+----------|
+ * | | Cacheline-spaced | 192 |
+ * | | | 256 |
+ * | | | 320 |
+ * | | | 384 |
+ * | | | 448 |
+ * | | | 512 |
+ * | |------------------+----------|
+ * | | Sub-page | 760 |
+ * | | | 1024 |
+ * | | | 1280 |
+ * | | | ... |
+ * | | | 3328 |
+ * | | | 3584 |
+ * | | | 3840 |
+ * |========================================|
+ * | Large | 4 KiB |
+ * | | 8 KiB |
+ * | | 12 KiB |
+ * | | ... |
+ * | | 1012 KiB |
+ * | | 1016 KiB |
+ * | | 1020 KiB |
+ * |========================================|
+ * | Huge | 1 MiB |
+ * | | 2 MiB |
+ * | | 3 MiB |
+ * | | ... |
+ * |========================================|
+ *
+ * Different mechanisms are used accoding to category:
+ *
+ * Small: Each size class is segregated into its own set of runs. Each run
+ * maintains a bitmap of which regions are free/allocated.
+ *
+ * Large : Each allocation is backed by a dedicated run. Metadata are stored
+ * in the associated arena chunk header maps.
+ *
+ * Huge : Each allocation is backed by a dedicated contiguous set of chunks.
+ * Metadata are stored in a separate red-black tree.
+ *
+ *******************************************************************************
+ */
+
+#define JEMALLOC_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+malloc_mutex_t arenas_lock;
+arena_t **arenas;
+unsigned narenas;
+#ifndef NO_TLS
+static unsigned next_arena;
+#endif
+
+#ifndef NO_TLS
+__thread arena_t *arenas_map JEMALLOC_ATTR(tls_model("initial-exec"));
+#endif
+
+/* Set to true once the allocator has been initialized. */
+static bool malloc_initialized = false;
+
+/* Used to let the initializing thread recursively allocate. */
+static pthread_t malloc_initializer = (unsigned long)0;
+
+/* Used to avoid initialization races. */
+static malloc_mutex_t init_lock = PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP;
+
+#ifdef DYNAMIC_PAGE_SHIFT
+size_t pagesize;
+size_t pagesize_mask;
+size_t lg_pagesize;
+#endif
+
+unsigned ncpus;
+
+/* Runtime configuration options. */
+const char *JEMALLOC_P(malloc_options)
+ JEMALLOC_ATTR(visibility("default"));
+#ifdef JEMALLOC_DEBUG
+bool opt_abort = true;
+# ifdef JEMALLOC_FILL
+bool opt_junk = true;
+# endif
+#else
+bool opt_abort = false;
+# ifdef JEMALLOC_FILL
+bool opt_junk = false;
+# endif
+#endif
+#ifdef JEMALLOC_SYSV
+bool opt_sysv = false;
+#endif
+#ifdef JEMALLOC_XMALLOC
+bool opt_xmalloc = false;
+#endif
+#ifdef JEMALLOC_FILL
+bool opt_zero = false;
+#endif
+static int opt_narenas_lshift = 0;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void wrtmessage(void *cbopaque, const char *s);
+static void stats_print_atexit(void);
+static unsigned malloc_ncpus(void);
+static bool malloc_init_hard(void);
+static void jemalloc_prefork(void);
+static void jemalloc_postfork(void);
+
+/******************************************************************************/
+/* malloc_message() setup. */
+
+#ifdef JEMALLOC_HAVE_ATTR
+JEMALLOC_ATTR(visibility("hidden"))
+#else
+static
+#endif
+void
+wrtmessage(void *cbopaque, const char *s)
+{
+
+ write(STDERR_FILENO, s, strlen(s));
+}
+
+void (*JEMALLOC_P(malloc_message))(void *, const char *s)
+ JEMALLOC_ATTR(visibility("default")) = wrtmessage;
+
+/******************************************************************************/
+/*
+ * Begin miscellaneous support functions.
+ */
+
+/* Create a new arena and insert it into the arenas array at index ind. */
+arena_t *
+arenas_extend(unsigned ind)
+{
+ arena_t *ret;
+
+ /* Allocate enough space for trailing bins. */
+ ret = (arena_t *)base_alloc(sizeof(arena_t)
+ + (sizeof(arena_bin_t) * (nbins - 1)));
+ if (ret != NULL && arena_new(ret, ind) == false) {
+ arenas[ind] = ret;
+ return (ret);
+ }
+ /* Only reached if there is an OOM error. */
+
+ /*
+ * OOM here is quite inconvenient to propagate, since dealing with it
+ * would require a check for failure in the fast path. Instead, punt
+ * by using arenas[0]. In practice, this is an extremely unlikely
+ * failure.
+ */
+ malloc_write("<jemalloc>: Error initializing arena\n");
+ if (opt_abort)
+ abort();
+
+ return (arenas[0]);
+}
+
+#ifndef NO_TLS
+/*
+ * Choose an arena based on a per-thread value (slow-path code only, called
+ * only by choose_arena()).
+ */
+arena_t *
+choose_arena_hard(void)
+{
+ arena_t *ret;
+
+ if (narenas > 1) {
+ malloc_mutex_lock(&arenas_lock);
+ if ((ret = arenas[next_arena]) == NULL)
+ ret = arenas_extend(next_arena);
+ next_arena = (next_arena + 1) % narenas;
+ malloc_mutex_unlock(&arenas_lock);
+ } else
+ ret = arenas[0];
+
+ arenas_map = ret;
+
+ return (ret);
+}
+#endif
+
+static void
+stats_print_atexit(void)
+{
+
+#if (defined(JEMALLOC_TCACHE) && defined(JEMALLOC_STATS))
+ unsigned i;
+
+ /*
+ * Merge stats from extant threads. This is racy, since individual
+ * threads do not lock when recording tcache stats events. As a
+ * consequence, the final stats may be slightly out of date by the time
+ * they are reported, if other threads continue to allocate.
+ */
+ for (i = 0; i < narenas; i++) {
+ arena_t *arena = arenas[i];
+ if (arena != NULL) {
+ tcache_t *tcache;
+
+ /*
+ * tcache_stats_merge() locks bins, so if any code is
+ * introduced that acquires both arena and bin locks in
+ * the opposite order, deadlocks may result.
+ */
+ malloc_mutex_lock(&arena->lock);
+ ql_foreach(tcache, &arena->tcache_ql, link) {
+ tcache_stats_merge(tcache, arena);
+ }
+ malloc_mutex_unlock(&arena->lock);
+ }
+ }
+#endif
+ JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL);
+}
+
+/*
+ * End miscellaneous support functions.
+ */
+/******************************************************************************/
+/*
+ * Begin initialization functions.
+ */
+
+static unsigned
+malloc_ncpus(void)
+{
+ unsigned ret;
+ long result;
+
+ result = sysconf(_SC_NPROCESSORS_ONLN);
+ if (result == -1) {
+ /* Error. */
+ ret = 1;
+ }
+ ret = (unsigned)result;
+
+ return (ret);
+}
+
+/*
+ * FreeBSD's pthreads implementation calls malloc(3), so the malloc
+ * implementation has to take pains to avoid infinite recursion during
+ * initialization.
+ */
+static inline bool
+malloc_init(void)
+{
+
+ if (malloc_initialized == false)
+ return (malloc_init_hard());
+
+ return (false);
+}
+
+static bool
+malloc_init_hard(void)
+{
+ unsigned i;
+ int linklen;
+ char buf[PATH_MAX + 1];
+ const char *opts;
+ arena_t *init_arenas[1];
+
+ malloc_mutex_lock(&init_lock);
+ if (malloc_initialized || malloc_initializer == pthread_self()) {
+ /*
+ * Another thread initialized the allocator before this one
+ * acquired init_lock, or this thread is the initializing
+ * thread, and it is recursively allocating.
+ */
+ malloc_mutex_unlock(&init_lock);
+ return (false);
+ }
+ if (malloc_initializer != (unsigned long)0) {
+ /* Busy-wait until the initializing thread completes. */
+ do {
+ malloc_mutex_unlock(&init_lock);
+ CPU_SPINWAIT;
+ malloc_mutex_lock(&init_lock);
+ } while (malloc_initialized == false);
+ return (false);
+ }
+
+#ifdef DYNAMIC_PAGE_SHIFT
+ /* Get page size. */
+ {
+ long result;
+
+ result = sysconf(_SC_PAGESIZE);
+ assert(result != -1);
+ pagesize = (unsigned)result;
+
+ /*
+ * We assume that pagesize is a power of 2 when calculating
+ * pagesize_mask and lg_pagesize.
+ */
+ assert(((result - 1) & result) == 0);
+ pagesize_mask = result - 1;
+ lg_pagesize = ffs((int)result) - 1;
+ }
+#endif
+
+ for (i = 0; i < 3; i++) {
+ unsigned j;
+
+ /* Get runtime configuration. */
+ switch (i) {
+ case 0:
+ if ((linklen = readlink("/etc/jemalloc.conf", buf,
+ sizeof(buf) - 1)) != -1) {
+ /*
+ * Use the contents of the "/etc/jemalloc.conf"
+ * symbolic link's name.
+ */
+ buf[linklen] = '\0';
+ opts = buf;
+ } else {
+ /* No configuration specified. */
+ buf[0] = '\0';
+ opts = buf;
+ }
+ break;
+ case 1:
+ if ((opts = getenv("JEMALLOC_OPTIONS")) != NULL) {
+ /*
+ * Do nothing; opts is already initialized to
+ * the value of the JEMALLOC_OPTIONS
+ * environment variable.
+ */
+ } else {
+ /* No configuration specified. */
+ buf[0] = '\0';
+ opts = buf;
+ }
+ break;
+ case 2:
+ if (JEMALLOC_P(malloc_options) != NULL) {
+ /*
+ * Use options that were compiled into the
+ * program.
+ */
+ opts = JEMALLOC_P(malloc_options);
+ } else {
+ /* No configuration specified. */
+ buf[0] = '\0';
+ opts = buf;
+ }
+ break;
+ default:
+ /* NOTREACHED */
+ assert(false);
+ buf[0] = '\0';
+ opts = buf;
+ }
+
+ for (j = 0; opts[j] != '\0'; j++) {
+ unsigned k, nreps;
+ bool nseen;
+
+ /* Parse repetition count, if any. */
+ for (nreps = 0, nseen = false;; j++, nseen = true) {
+ switch (opts[j]) {
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ case '8': case '9':
+ nreps *= 10;
+ nreps += opts[j] - '0';
+ break;
+ default:
+ goto MALLOC_OUT;
+ }
+ }
+MALLOC_OUT:
+ if (nseen == false)
+ nreps = 1;
+
+ for (k = 0; k < nreps; k++) {
+ switch (opts[j]) {
+ case 'a':
+ opt_abort = false;
+ break;
+ case 'A':
+ opt_abort = true;
+ break;
+#ifdef JEMALLOC_PROF
+ case 'b':
+ if (opt_lg_prof_bt_max > 0)
+ opt_lg_prof_bt_max--;
+ break;
+ case 'B':
+ if (opt_lg_prof_bt_max < LG_PROF_BT_MAX)
+ opt_lg_prof_bt_max++;
+ break;
+#endif
+ case 'c':
+ if (opt_lg_cspace_max - 1 >
+ opt_lg_qspace_max &&
+ opt_lg_cspace_max >
+ LG_CACHELINE)
+ opt_lg_cspace_max--;
+ break;
+ case 'C':
+ if (opt_lg_cspace_max < PAGE_SHIFT
+ - 1)
+ opt_lg_cspace_max++;
+ break;
+ case 'd':
+ if (opt_lg_dirty_mult + 1 <
+ (sizeof(size_t) << 3))
+ opt_lg_dirty_mult++;
+ break;
+ case 'D':
+ if (opt_lg_dirty_mult >= 0)
+ opt_lg_dirty_mult--;
+ break;
+#ifdef JEMALLOC_PROF
+ case 'e':
+ opt_prof_active = false;
+ break;
+ case 'E':
+ opt_prof_active = true;
+ break;
+ case 'f':
+ opt_prof = false;
+ break;
+ case 'F':
+ opt_prof = true;
+ break;
+#endif
+#ifdef JEMALLOC_TCACHE
+ case 'g':
+ if (opt_lg_tcache_gc_sweep >= 0)
+ opt_lg_tcache_gc_sweep--;
+ break;
+ case 'G':
+ if (opt_lg_tcache_gc_sweep + 1 <
+ (sizeof(size_t) << 3))
+ opt_lg_tcache_gc_sweep++;
+ break;
+ case 'h':
+ opt_tcache = false;
+ break;
+ case 'H':
+ opt_tcache = true;
+ break;
+#endif
+#ifdef JEMALLOC_PROF
+ case 'i':
+ if (opt_lg_prof_interval >= 0)
+ opt_lg_prof_interval--;
+ break;
+ case 'I':
+ if (opt_lg_prof_interval + 1 <
+ (sizeof(uint64_t) << 3))
+ opt_lg_prof_interval++;
+ break;
+#endif
+#ifdef JEMALLOC_FILL
+ case 'j':
+ opt_junk = false;
+ break;
+ case 'J':
+ opt_junk = true;
+ break;
+#endif
+ case 'k':
+ /*
+ * Chunks always require at least one
+ * header page, plus one data page.
+ */
+ if ((1U << (opt_lg_chunk - 1)) >=
+ (2U << PAGE_SHIFT))
+ opt_lg_chunk--;
+ break;
+ case 'K':
+ if (opt_lg_chunk + 1 <
+ (sizeof(size_t) << 3))
+ opt_lg_chunk++;
+ break;
+#ifdef JEMALLOC_PROF
+ case 'l':
+ opt_prof_leak = false;
+ break;
+ case 'L':
+ opt_prof_leak = true;
+ break;
+#endif
+#ifdef JEMALLOC_TCACHE
+ case 'm':
+ if (opt_lg_tcache_maxclass >= 0)
+ opt_lg_tcache_maxclass--;
+ break;
+ case 'M':
+ if (opt_lg_tcache_maxclass + 1 <
+ (sizeof(size_t) << 3))
+ opt_lg_tcache_maxclass++;
+ break;
+#endif
+ case 'n':
+ opt_narenas_lshift--;
+ break;
+ case 'N':
+ opt_narenas_lshift++;
+ break;
+#ifdef JEMALLOC_SWAP
+ case 'o':
+ opt_overcommit = false;
+ break;
+ case 'O':
+ opt_overcommit = true;
+ break;
+#endif
+ case 'p':
+ opt_stats_print = false;
+ break;
+ case 'P':
+ opt_stats_print = true;
+ break;
+ case 'q':
+ if (opt_lg_qspace_max > LG_QUANTUM)
+ opt_lg_qspace_max--;
+ break;
+ case 'Q':
+ if (opt_lg_qspace_max + 1 <
+ opt_lg_cspace_max)
+ opt_lg_qspace_max++;
+ break;
+#ifdef JEMALLOC_PROF
+ case 's':
+ if (opt_lg_prof_sample > 0)
+ opt_lg_prof_sample--;
+ break;
+ case 'S':
+ if (opt_lg_prof_sample + 1 <
+ (sizeof(uint64_t) << 3))
+ opt_lg_prof_sample++;
+ break;
+ case 'u':
+ opt_prof_udump = false;
+ break;
+ case 'U':
+ opt_prof_udump = true;
+ break;
+#endif
+#ifdef JEMALLOC_SYSV
+ case 'v':
+ opt_sysv = false;
+ break;
+ case 'V':
+ opt_sysv = true;
+ break;
+#endif
+#ifdef JEMALLOC_XMALLOC
+ case 'x':
+ opt_xmalloc = false;
+ break;
+ case 'X':
+ opt_xmalloc = true;
+ break;
+#endif
+#ifdef JEMALLOC_FILL
+ case 'z':
+ opt_zero = false;
+ break;
+ case 'Z':
+ opt_zero = true;
+ break;
+#endif
+ default: {
+ char cbuf[2];
+
+ cbuf[0] = opts[j];
+ cbuf[1] = '\0';
+ malloc_write(
+ "<jemalloc>: Unsupported character "
+ "in malloc options: '");
+ malloc_write(cbuf);
+ malloc_write("'\n");
+ }
+ }
+ }
+ }
+ }
+
+ /* Register fork handlers. */
+ if (pthread_atfork(jemalloc_prefork, jemalloc_postfork,
+ jemalloc_postfork) != 0) {
+ malloc_write("<jemalloc>: Error in pthread_atfork()\n");
+ if (opt_abort)
+ abort();
+ }
+
+ if (ctl_boot()) {
+ malloc_mutex_unlock(&init_lock);
+ return (true);
+ }
+
+ if (opt_stats_print) {
+ /* Print statistics at exit. */
+ if (atexit(stats_print_atexit) != 0) {
+ malloc_write("<jemalloc>: Error in atexit()\n");
+ if (opt_abort)
+ abort();
+ }
+ }
+
+ if (chunk_boot()) {
+ malloc_mutex_unlock(&init_lock);
+ return (true);
+ }
+
+ if (base_boot()) {
+ malloc_mutex_unlock(&init_lock);
+ return (true);
+ }
+
+#ifdef JEMALLOC_PROF
+ prof_boot0();
+#endif
+
+ if (arena_boot()) {
+ malloc_mutex_unlock(&init_lock);
+ return (true);
+ }
+
+#ifdef JEMALLOC_TCACHE
+ tcache_boot();
+#endif
+
+ if (huge_boot()) {
+ malloc_mutex_unlock(&init_lock);
+ return (true);
+ }
+
+ /*
+ * Create enough scaffolding to allow recursive allocation in
+ * malloc_ncpus().
+ */
+ narenas = 1;
+ arenas = init_arenas;
+ memset(arenas, 0, sizeof(arena_t *) * narenas);
+
+ /*
+ * Initialize one arena here. The rest are lazily created in
+ * choose_arena_hard().
+ */
+ arenas_extend(0);
+ if (arenas[0] == NULL) {
+ malloc_mutex_unlock(&init_lock);
+ return (true);
+ }
+
+#ifndef NO_TLS
+ /*
+ * Assign the initial arena to the initial thread, in order to avoid
+ * spurious creation of an extra arena if the application switches to
+ * threaded mode.
+ */
+ arenas_map = arenas[0];
+#endif
+
+ malloc_mutex_init(&arenas_lock);
+
+#ifdef JEMALLOC_PROF
+ if (prof_boot1()) {
+ malloc_mutex_unlock(&init_lock);
+ return (true);
+ }
+#endif
+
+ /* Get number of CPUs. */
+ malloc_initializer = pthread_self();
+ malloc_mutex_unlock(&init_lock);
+ ncpus = malloc_ncpus();
+ malloc_mutex_lock(&init_lock);
+
+ if (ncpus > 1) {
+ /*
+ * For SMP systems, create more than one arena per CPU by
+ * default.
+ */
+ opt_narenas_lshift += 2;
+ }
+
+ /* Determine how many arenas to use. */
+ narenas = ncpus;
+ if (opt_narenas_lshift > 0) {
+ if ((narenas << opt_narenas_lshift) > narenas)
+ narenas <<= opt_narenas_lshift;
+ /*
+ * Make sure not to exceed the limits of what base_alloc() can
+ * handle.
+ */
+ if (narenas * sizeof(arena_t *) > chunksize)
+ narenas = chunksize / sizeof(arena_t *);
+ } else if (opt_narenas_lshift < 0) {
+ if ((narenas >> -opt_narenas_lshift) < narenas)
+ narenas >>= -opt_narenas_lshift;
+ /* Make sure there is at least one arena. */
+ if (narenas == 0)
+ narenas = 1;
+ }
+
+#ifdef NO_TLS
+ if (narenas > 1) {
+ static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
+ 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
+ 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
+ 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
+ 223, 227, 229, 233, 239, 241, 251, 257, 263};
+ unsigned nprimes, parenas;
+
+ /*
+ * Pick a prime number of hash arenas that is more than narenas
+ * so that direct hashing of pthread_self() pointers tends to
+ * spread allocations evenly among the arenas.
+ */
+ assert((narenas & 1) == 0); /* narenas must be even. */
+ nprimes = (sizeof(primes) >> LG_SIZEOF_INT);
+ parenas = primes[nprimes - 1]; /* In case not enough primes. */
+ for (i = 1; i < nprimes; i++) {
+ if (primes[i] > narenas) {
+ parenas = primes[i];
+ break;
+ }
+ }
+ narenas = parenas;
+ }
+#endif
+
+#ifndef NO_TLS
+ next_arena = 0;
+#endif
+
+ /* Allocate and initialize arenas. */
+ arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
+ if (arenas == NULL) {
+ malloc_mutex_unlock(&init_lock);
+ return (true);
+ }
+ /*
+ * Zero the array. In practice, this should always be pre-zeroed,
+ * since it was just mmap()ed, but let's be sure.
+ */
+ memset(arenas, 0, sizeof(arena_t *) * narenas);
+ /* Copy the pointer to the one arena that was already initialized. */
+ arenas[0] = init_arenas[0];
+
+ malloc_initialized = true;
+ malloc_mutex_unlock(&init_lock);
+ return (false);
+}
+
+/*
+ * End initialization functions.
+ */
+/******************************************************************************/
+/*
+ * Begin malloc(3)-compatible functions.
+ */
+
+JEMALLOC_ATTR(malloc)
+JEMALLOC_ATTR(visibility("default"))
+void *
+JEMALLOC_P(malloc)(size_t size)
+{
+ void *ret;
+#ifdef JEMALLOC_PROF
+ prof_thr_cnt_t *cnt;
+#endif
+
+ if (malloc_init()) {
+ ret = NULL;
+ goto OOM;
+ }
+
+ if (size == 0) {
+#ifdef JEMALLOC_SYSV
+ if (opt_sysv == false)
+#endif
+ size = 1;
+#ifdef JEMALLOC_SYSV
+ else {
+# ifdef JEMALLOC_XMALLOC
+ if (opt_xmalloc) {
+ malloc_write("<jemalloc>: Error in malloc(): "
+ "invalid size 0\n");
+ abort();
+ }
+# endif
+ ret = NULL;
+ goto RETURN;
+ }
+#endif
+ }
+
+#ifdef JEMALLOC_PROF
+ if (opt_prof) {
+ if ((cnt = prof_alloc_prep(size)) == NULL) {
+ ret = NULL;
+ goto OOM;
+ }
+ if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && size <=
+ small_maxclass) {
+ ret = imalloc(small_maxclass+1);
+ if (ret != NULL)
+ arena_prof_promoted(ret, size);
+ } else
+ ret = imalloc(size);
+ } else
+#endif
+ ret = imalloc(size);
+
+OOM:
+ if (ret == NULL) {
+#ifdef JEMALLOC_XMALLOC
+ if (opt_xmalloc) {
+ malloc_write("<jemalloc>: Error in malloc(): "
+ "out of memory\n");
+ abort();
+ }
+#endif
+ errno = ENOMEM;
+ }
+
+#ifdef JEMALLOC_SYSV
+RETURN:
+#endif
+#ifdef JEMALLOC_PROF
+ if (opt_prof && ret != NULL)
+ prof_malloc(ret, cnt);
+#endif
+ return (ret);
+}
+
+JEMALLOC_ATTR(nonnull(1))
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
+{
+ int ret;
+ void *result;
+#ifdef JEMALLOC_PROF
+ prof_thr_cnt_t *cnt;
+#endif
+
+ if (malloc_init())
+ result = NULL;
+ else {
+ if (size == 0) {
+#ifdef JEMALLOC_SYSV
+ if (opt_sysv == false)
+#endif
+ size = 1;
+#ifdef JEMALLOC_SYSV
+ else {
+# ifdef JEMALLOC_XMALLOC
+ if (opt_xmalloc) {
+ malloc_write("<jemalloc>: Error in "
+ "posix_memalign(): invalid size "
+ "0\n");
+ abort();
+ }
+# endif
+ result = NULL;
+ *memptr = NULL;
+ ret = 0;
+ goto RETURN;
+ }
+#endif
+ }
+
+ /* Make sure that alignment is a large enough power of 2. */
+ if (((alignment - 1) & alignment) != 0
+ || alignment < sizeof(void *)) {
+#ifdef JEMALLOC_XMALLOC
+ if (opt_xmalloc) {
+ malloc_write("<jemalloc>: Error in "
+ "posix_memalign(): invalid alignment\n");
+ abort();
+ }
+#endif
+ result = NULL;
+ ret = EINVAL;
+ goto RETURN;
+ }
+
+#ifdef JEMALLOC_PROF
+ if (opt_prof) {
+ if ((cnt = prof_alloc_prep(size)) == NULL) {
+ result = NULL;
+ ret = EINVAL;
+ } else {
+ if (prof_promote && (uintptr_t)cnt !=
+ (uintptr_t)1U && size <= small_maxclass) {
+ result = ipalloc(alignment,
+ small_maxclass+1);
+ if (result != NULL) {
+ arena_prof_promoted(result,
+ size);
+ }
+ } else
+ result = ipalloc(alignment, size);
+ }
+ } else
+#endif
+ result = ipalloc(alignment, size);
+ }
+
+ if (result == NULL) {
+#ifdef JEMALLOC_XMALLOC
+ if (opt_xmalloc) {
+ malloc_write("<jemalloc>: Error in posix_memalign(): "
+ "out of memory\n");
+ abort();
+ }
+#endif
+ ret = ENOMEM;
+ goto RETURN;
+ }
+
+ *memptr = result;
+ ret = 0;
+
+RETURN:
+#ifdef JEMALLOC_PROF
+ if (opt_prof && result != NULL)
+ prof_malloc(result, cnt);
+#endif
+ return (ret);
+}
+
+JEMALLOC_ATTR(malloc)
+JEMALLOC_ATTR(visibility("default"))
+void *
+JEMALLOC_P(calloc)(size_t num, size_t size)
+{
+ void *ret;
+ size_t num_size;
+#ifdef JEMALLOC_PROF
+ prof_thr_cnt_t *cnt;
+#endif
+
+ if (malloc_init()) {
+ num_size = 0;
+ ret = NULL;
+ goto RETURN;
+ }
+
+ num_size = num * size;
+ if (num_size == 0) {
+#ifdef JEMALLOC_SYSV
+ if ((opt_sysv == false) && ((num == 0) || (size == 0)))
+#endif
+ num_size = 1;
+#ifdef JEMALLOC_SYSV
+ else {
+ ret = NULL;
+ goto RETURN;
+ }
+#endif
+ /*
+ * Try to avoid division here. We know that it isn't possible to
+ * overflow during multiplication if neither operand uses any of the
+ * most significant half of the bits in a size_t.
+ */
+ } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
+ && (num_size / size != num)) {
+ /* size_t overflow. */
+ ret = NULL;
+ goto RETURN;
+ }
+
+#ifdef JEMALLOC_PROF
+ if (opt_prof) {
+ if ((cnt = prof_alloc_prep(num_size)) == NULL) {
+ ret = NULL;
+ goto RETURN;
+ }
+ if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && num_size
+ <= small_maxclass) {
+ ret = icalloc(small_maxclass+1);
+ if (ret != NULL)
+ arena_prof_promoted(ret, num_size);
+ } else
+ ret = icalloc(num_size);
+ } else
+#endif
+ ret = icalloc(num_size);
+
+RETURN:
+ if (ret == NULL) {
+#ifdef JEMALLOC_XMALLOC
+ if (opt_xmalloc) {
+ malloc_write("<jemalloc>: Error in calloc(): out of "
+ "memory\n");
+ abort();
+ }
+#endif
+ errno = ENOMEM;
+ }
+
+#ifdef JEMALLOC_PROF
+ if (opt_prof && ret != NULL)
+ prof_malloc(ret, cnt);
+#endif
+ return (ret);
+}
+
+JEMALLOC_ATTR(visibility("default"))
+void *
+JEMALLOC_P(realloc)(void *ptr, size_t size)
+{
+ void *ret;
+#ifdef JEMALLOC_PROF
+ size_t old_size;
+ prof_thr_cnt_t *cnt, *old_cnt;
+#endif
+
+ if (size == 0) {
+#ifdef JEMALLOC_SYSV
+ if (opt_sysv == false)
+#endif
+ size = 1;
+#ifdef JEMALLOC_SYSV
+ else {
+ if (ptr != NULL) {
+#ifdef JEMALLOC_PROF
+ if (opt_prof) {
+ old_size = isalloc(ptr);
+ old_cnt = prof_cnt_get(ptr);
+ cnt = NULL;
+ }
+#endif
+ idalloc(ptr);
+ }
+#ifdef JEMALLOC_PROF
+ else if (opt_prof) {
+ old_size = 0;
+ old_cnt = NULL;
+ cnt = NULL;
+ }
+#endif
+ ret = NULL;
+ goto RETURN;
+ }
+#endif
+ }
+
+ if (ptr != NULL) {
+ assert(malloc_initialized || malloc_initializer ==
+ pthread_self());
+
+#ifdef JEMALLOC_PROF
+ if (opt_prof) {
+ old_size = isalloc(ptr);
+ old_cnt = prof_cnt_get(ptr);
+ if ((cnt = prof_alloc_prep(size)) == NULL) {
+ ret = NULL;
+ goto OOM;
+ }
+ if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U &&
+ size <= small_maxclass) {
+ ret = iralloc(ptr, small_maxclass+1);
+ if (ret != NULL)
+ arena_prof_promoted(ret, size);
+ } else
+ ret = iralloc(ptr, size);
+ } else
+#endif
+ ret = iralloc(ptr, size);
+
+#ifdef JEMALLOC_PROF
+OOM:
+#endif
+ if (ret == NULL) {
+#ifdef JEMALLOC_XMALLOC
+ if (opt_xmalloc) {
+ malloc_write("<jemalloc>: Error in realloc(): "
+ "out of memory\n");
+ abort();
+ }
+#endif
+ errno = ENOMEM;
+ }
+ } else {
+#ifdef JEMALLOC_PROF
+ if (opt_prof) {
+ old_size = 0;
+ old_cnt = NULL;
+ }
+#endif
+ if (malloc_init()) {
+#ifdef JEMALLOC_PROF
+ if (opt_prof)
+ cnt = NULL;
+#endif
+ ret = NULL;
+ } else {
+#ifdef JEMALLOC_PROF
+ if (opt_prof) {
+ if ((cnt = prof_alloc_prep(size)) == NULL)
+ ret = NULL;
+ else {
+ if (prof_promote && (uintptr_t)cnt !=
+ (uintptr_t)1U && size <=
+ small_maxclass) {
+ ret = imalloc(small_maxclass+1);
+ if (ret != NULL) {
+ arena_prof_promoted(ret,
+ size);
+ }
+ } else
+ ret = imalloc(size);
+ }
+ } else
+#endif
+ ret = imalloc(size);
+ }
+
+ if (ret == NULL) {
+#ifdef JEMALLOC_XMALLOC
+ if (opt_xmalloc) {
+ malloc_write("<jemalloc>: Error in realloc(): "
+ "out of memory\n");
+ abort();
+ }
+#endif
+ errno = ENOMEM;
+ }
+ }
+
+#ifdef JEMALLOC_SYSV
+RETURN:
+#endif
+#ifdef JEMALLOC_PROF
+ if (opt_prof)
+ prof_realloc(ret, cnt, ptr, old_size, old_cnt);
+#endif
+ return (ret);
+}
+
+JEMALLOC_ATTR(visibility("default"))
+void
+JEMALLOC_P(free)(void *ptr)
+{
+
+ if (ptr != NULL) {
+ assert(malloc_initialized || malloc_initializer ==
+ pthread_self());
+
+#ifdef JEMALLOC_PROF
+ if (opt_prof)
+ prof_free(ptr);
+#endif
+ idalloc(ptr);
+ }
+}
+
+/*
+ * End malloc(3)-compatible functions.
+ */
+/******************************************************************************/
+/*
+ * Begin non-standard functions.
+ */
+
+JEMALLOC_ATTR(visibility("default"))
+size_t
+JEMALLOC_P(malloc_usable_size)(const void *ptr)
+{
+ size_t ret;
+
+ assert(ptr != NULL);
+ ret = isalloc(ptr);
+
+ return (ret);
+}
+
+#ifdef JEMALLOC_SWAP
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(malloc_swap_enable)(const int *fds, unsigned nfds, int prezeroed)
+{
+
+ /*
+ * Make sure malloc is initialized, because we need page size, chunk
+ * size, etc.
+ */
+ if (malloc_init())
+ return (-1);
+
+ return (chunk_swap_enable(fds, nfds, (prezeroed != 0)) ? -1 : 0);
+}
+#endif
+
+JEMALLOC_ATTR(visibility("default"))
+void
+JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *),
+ void *cbopaque, const char *opts)
+{
+
+ stats_print(write_cb, cbopaque, opts);
+}
+
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, void *newp,
+ size_t newlen)
+{
+
+ if (malloc_init())
+ return (EAGAIN);
+
+ return (ctl_byname(name, oldp, oldlenp, newp, newlen));
+}
+
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, size_t *miblenp)
+{
+
+ if (malloc_init())
+ return (EAGAIN);
+
+ return (ctl_nametomib(name, mibp, miblenp));
+}
+
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen)
+{
+
+ if (malloc_init())
+ return (EAGAIN);
+
+ return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen));
+}
+
+/*
+ * End non-standard functions.
+ */
+/******************************************************************************/
+
+/*
+ * The following functions are used by threading libraries for protection of
+ * malloc during fork(). These functions are only called if the program is
+ * running in threaded mode, so there is no need to check whether the program
+ * is threaded here.
+ */
+
+static void
+jemalloc_prefork(void)
+{
+ unsigned i;
+
+ /* Acquire all mutexes in a safe order. */
+
+ malloc_mutex_lock(&arenas_lock);
+ for (i = 0; i < narenas; i++) {
+ if (arenas[i] != NULL)
+ malloc_mutex_lock(&arenas[i]->lock);
+ }
+
+ malloc_mutex_lock(&base_mtx);
+
+ malloc_mutex_lock(&huge_mtx);
+
+#ifdef JEMALLOC_DSS
+ malloc_mutex_lock(&dss_mtx);
+#endif
+
+#ifdef JEMALLOC_SWAP
+ malloc_mutex_lock(&swap_mtx);
+#endif
+}
+
+static void
+jemalloc_postfork(void)
+{
+ unsigned i;
+
+ /* Release all mutexes, now that fork() has completed. */
+
+#ifdef JEMALLOC_SWAP
+ malloc_mutex_unlock(&swap_mtx);
+#endif
+
+#ifdef JEMALLOC_DSS
+ malloc_mutex_unlock(&dss_mtx);
+#endif
+
+ malloc_mutex_unlock(&huge_mtx);
+
+ malloc_mutex_unlock(&base_mtx);
+
+ for (i = 0; i < narenas; i++) {
+ if (arenas[i] != NULL)
+ malloc_mutex_unlock(&arenas[i]->lock);
+ }
+ malloc_mutex_unlock(&arenas_lock);
+}
diff --git a/dep/src/jmalloc/mb.c b/dep/src/jmalloc/mb.c
new file mode 100644
index 00000000000..30a1a2e997a
--- /dev/null
+++ b/dep/src/jmalloc/mb.c
@@ -0,0 +1,2 @@
+#define MB_C_
+#include "jemalloc/internal/jemalloc_internal.h"
diff --git a/dep/src/jmalloc/mutex.c b/dep/src/jmalloc/mutex.c
new file mode 100644
index 00000000000..3b6081a4c4f
--- /dev/null
+++ b/dep/src/jmalloc/mutex.c
@@ -0,0 +1,70 @@
+#define JEMALLOC_MUTEX_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+#ifdef JEMALLOC_LAZY_LOCK
+bool isthreaded = false;
+#endif
+
+#ifdef JEMALLOC_LAZY_LOCK
+static void pthread_create_once(void);
+#endif
+
+/******************************************************************************/
+/*
+ * We intercept pthread_create() calls in order to toggle isthreaded if the
+ * process goes multi-threaded.
+ */
+
+#ifdef JEMALLOC_LAZY_LOCK
+static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
+ void *(*)(void *), void *__restrict);
+
+static void
+pthread_create_once(void)
+{
+
+ pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
+ if (pthread_create_fptr == NULL) {
+ malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
+ "\"pthread_create\")\n");
+ abort();
+ }
+
+ isthreaded = true;
+}
+
+JEMALLOC_ATTR(visibility("default"))
+int
+pthread_create(pthread_t *__restrict thread,
+ const pthread_attr_t *__restrict attr, void *(*start_routine)(void *),
+ void *__restrict arg)
+{
+ static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+
+ pthread_once(&once_control, pthread_create_once);
+
+ return (pthread_create_fptr(thread, attr, start_routine, arg));
+}
+#endif
+
+/******************************************************************************/
+
+bool
+malloc_mutex_init(malloc_mutex_t *mutex)
+{
+ pthread_mutexattr_t attr;
+
+ if (pthread_mutexattr_init(&attr) != 0)
+ return (true);
+ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+ if (pthread_mutex_init(mutex, &attr) != 0) {
+ pthread_mutexattr_destroy(&attr);
+ return (true);
+ }
+ pthread_mutexattr_destroy(&attr);
+
+ return (false);
+}
diff --git a/dep/src/jmalloc/prof.c b/dep/src/jmalloc/prof.c
new file mode 100644
index 00000000000..6326188e50f
--- /dev/null
+++ b/dep/src/jmalloc/prof.c
@@ -0,0 +1,1328 @@
+#define JEMALLOC_PROF_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+#ifdef JEMALLOC_PROF
+/******************************************************************************/
+
+#ifdef JEMALLOC_PROF_LIBGCC
+#include <unwind.h>
+#endif
+
+#ifdef JEMALLOC_PROF_LIBUNWIND
+#define UNW_LOCAL_ONLY
+#include <libunwind.h>
+#endif
+
+#include <math.h>
+
+/******************************************************************************/
+/* Data. */
+
+bool opt_prof = false;
+bool opt_prof_active = true;
+size_t opt_lg_prof_bt_max = LG_PROF_BT_MAX_DEFAULT;
+size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
+ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
+bool opt_prof_udump = false;
+bool opt_prof_leak = false;
+
+uint64_t prof_interval;
+bool prof_promote;
+
+/*
+ * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data
+ * structure that knows about all backtraces ever captured.
+ */
+static ckh_t bt2ctx;
+static malloc_mutex_t bt2ctx_mtx;
+
+/*
+ * Thread-specific hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread
+ * keeps a cache of backtraces, with associated thread-specific prof_thr_cnt_t
+ * objects. Other threads may read the prof_thr_cnt_t contents, but no others
+ * will ever write them.
+ *
+ * Upon thread exit, the thread must merge all the prof_thr_cnt_t counter data
+ * into the associated prof_ctx_t objects, and unlink/free the prof_thr_cnt_t
+ * objects.
+ */
+static __thread ckh_t *bt2cnt_tls JEMALLOC_ATTR(tls_model("initial-exec"));
+
+/*
+ * Same contents as b2cnt, but initialized such that the TSD destructor is
+ * called when a thread exits, so that bt2cnt_tls contents can be merged,
+ * unlinked, and deallocated.
+ */
+static pthread_key_t bt2cnt_tsd;
+
+/* (1U << opt_lg_prof_bt_max). */
+static unsigned prof_bt_max;
+
+static __thread uint64_t prof_sample_prn_state
+ JEMALLOC_ATTR(tls_model("initial-exec"));
+static __thread uint64_t prof_sample_threshold
+ JEMALLOC_ATTR(tls_model("initial-exec"));
+static __thread uint64_t prof_sample_accum
+ JEMALLOC_ATTR(tls_model("initial-exec"));
+
+static malloc_mutex_t prof_dump_seq_mtx;
+static uint64_t prof_dump_seq;
+static uint64_t prof_dump_iseq;
+static uint64_t prof_dump_mseq;
+static uint64_t prof_dump_useq;
+
+/*
+ * This buffer is rather large for stack allocation, so use a single buffer for
+ * all profile dumps. The buffer is implicitly protected by bt2ctx_mtx, since
+ * it must be locked anyway during dumping.
+ */
+static char prof_dump_buf[PROF_DUMP_BUF_SIZE];
+static unsigned prof_dump_buf_end;
+static int prof_dump_fd;
+
+/* Do not dump any profiles until bootstrapping is complete. */
+static bool prof_booted = false;
+
+static malloc_mutex_t enq_mtx;
+static bool enq;
+static bool enq_idump;
+static bool enq_udump;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static prof_bt_t *bt_dup(prof_bt_t *bt);
+static void bt_init(prof_bt_t *bt, void **vec);
+#ifdef JEMALLOC_PROF_LIBGCC
+static _Unwind_Reason_Code prof_unwind_init_callback(
+ struct _Unwind_Context *context, void *arg);
+static _Unwind_Reason_Code prof_unwind_callback(
+ struct _Unwind_Context *context, void *arg);
+#endif
+static void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max);
+static prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
+static void prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt);
+static bool prof_flush(bool propagate_err);
+static bool prof_write(const char *s, bool propagate_err);
+static void prof_ctx_merge(prof_ctx_t *ctx, prof_cnt_t *cnt_all,
+ size_t *leak_nctx);
+static bool prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt,
+ bool propagate_err);
+static bool prof_dump_maps(bool propagate_err);
+static bool prof_dump(const char *filename, bool leakcheck,
+ bool propagate_err);
+static void prof_dump_filename(char *filename, char v, int64_t vseq);
+static void prof_fdump(void);
+static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1,
+ size_t *hash2);
+static bool prof_bt_keycomp(const void *k1, const void *k2);
+static void bt2cnt_thread_cleanup(void *arg);
+
+/******************************************************************************/
+
+static void
+bt_init(prof_bt_t *bt, void **vec)
+{
+
+ bt->vec = vec;
+ bt->len = 0;
+}
+
+static prof_bt_t *
+bt_dup(prof_bt_t *bt)
+{
+ prof_bt_t *ret;
+
+ /*
+ * Create a single allocation that has space for vec immediately
+ * following the prof_bt_t structure. The backtraces that get
+ * stored in the backtrace caches are copied from stack-allocated
+ * temporary variables, so size is known at creation time. Making this
+ * a contiguous object improves cache locality.
+ */
+ ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) +
+ (bt->len * sizeof(void *)));
+ if (ret == NULL)
+ return (NULL);
+ ret->vec = (void **)((uintptr_t)ret +
+ QUANTUM_CEILING(sizeof(prof_bt_t)));
+ memcpy(ret->vec, bt->vec, bt->len * sizeof(void *));
+ ret->len = bt->len;
+
+ return (ret);
+}
+
+static inline void
+prof_enter(void)
+{
+
+ malloc_mutex_lock(&enq_mtx);
+ enq = true;
+ malloc_mutex_unlock(&enq_mtx);
+
+ malloc_mutex_lock(&bt2ctx_mtx);
+}
+
+static inline void
+prof_leave(void)
+{
+ bool idump, udump;
+
+ malloc_mutex_unlock(&bt2ctx_mtx);
+
+ malloc_mutex_lock(&enq_mtx);
+ enq = false;
+ idump = enq_idump;
+ enq_idump = false;
+ udump = enq_udump;
+ enq_udump = false;
+ malloc_mutex_unlock(&enq_mtx);
+
+ if (idump)
+ prof_idump();
+ if (udump)
+ prof_udump();
+}
+
+#ifdef JEMALLOC_PROF_LIBGCC
+static _Unwind_Reason_Code
+prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
+{
+
+ return (_URC_NO_REASON);
+}
+
+static _Unwind_Reason_Code
+prof_unwind_callback(struct _Unwind_Context *context, void *arg)
+{
+ prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
+
+ if (data->nignore > 0)
+ data->nignore--;
+ else {
+ data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
+ data->bt->len++;
+ if (data->bt->len == data->max)
+ return (_URC_END_OF_STACK);
+ }
+
+ return (_URC_NO_REASON);
+}
+
+static void
+prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
+{
+ prof_unwind_data_t data = {bt, nignore, max};
+
+ _Unwind_Backtrace(prof_unwind_callback, &data);
+}
+#elif defined(JEMALLOC_PROF_LIBUNWIND)
+static void
+prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
+{
+ unw_context_t uc;
+ unw_cursor_t cursor;
+ unsigned i;
+ int err;
+
+ assert(bt->len == 0);
+ assert(bt->vec != NULL);
+ assert(max <= (1U << opt_lg_prof_bt_max));
+
+ unw_getcontext(&uc);
+ unw_init_local(&cursor, &uc);
+
+ /* Throw away (nignore+1) stack frames, if that many exist. */
+ for (i = 0; i < nignore + 1; i++) {
+ err = unw_step(&cursor);
+ if (err <= 0)
+ return;
+ }
+
+ /*
+ * Iterate over stack frames until there are no more. Heap-allocate
+ * and iteratively grow a larger bt if necessary.
+ */
+ for (i = 0; i < max; i++) {
+ unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]);
+ err = unw_step(&cursor);
+ if (err <= 0) {
+ bt->len = i;
+ break;
+ }
+ }
+}
+#else
+static void
+prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
+{
+#define NIGNORE 3
+#define BT_FRAME(i) \
+ if ((i) < NIGNORE + max) { \
+ void *p; \
+ if (__builtin_frame_address(i) == 0) \
+ return; \
+ p = __builtin_return_address(i); \
+ if (p == NULL) \
+ return; \
+ if (i >= NIGNORE) { \
+ bt->vec[(i) - NIGNORE] = p; \
+ bt->len = (i) - NIGNORE + 1; \
+ } \
+ } else \
+ return;
+
+ assert(max <= (1U << opt_lg_prof_bt_max));
+
+ /*
+ * Ignore the first three frames, since they are:
+ *
+ * 0: prof_backtrace()
+ * 1: prof_alloc_prep()
+ * 2: malloc(), calloc(), etc.
+ */
+#if 1
+ assert(nignore + 1 == NIGNORE);
+#else
+ BT_FRAME(0)
+ BT_FRAME(1)
+ BT_FRAME(2)
+#endif
+ BT_FRAME(3)
+ BT_FRAME(4)
+ BT_FRAME(5)
+ BT_FRAME(6)
+ BT_FRAME(7)
+ BT_FRAME(8)
+ BT_FRAME(9)
+
+ BT_FRAME(10)
+ BT_FRAME(11)
+ BT_FRAME(12)
+ BT_FRAME(13)
+ BT_FRAME(14)
+ BT_FRAME(15)
+ BT_FRAME(16)
+ BT_FRAME(17)
+ BT_FRAME(18)
+ BT_FRAME(19)
+
+ BT_FRAME(20)
+ BT_FRAME(21)
+ BT_FRAME(22)
+ BT_FRAME(23)
+ BT_FRAME(24)
+ BT_FRAME(25)
+ BT_FRAME(26)
+ BT_FRAME(27)
+ BT_FRAME(28)
+ BT_FRAME(29)
+
+ BT_FRAME(30)
+ BT_FRAME(31)
+ BT_FRAME(32)
+ BT_FRAME(33)
+ BT_FRAME(34)
+ BT_FRAME(35)
+ BT_FRAME(36)
+ BT_FRAME(37)
+ BT_FRAME(38)
+ BT_FRAME(39)
+
+ BT_FRAME(40)
+ BT_FRAME(41)
+ BT_FRAME(42)
+ BT_FRAME(43)
+ BT_FRAME(44)
+ BT_FRAME(45)
+ BT_FRAME(46)
+ BT_FRAME(47)
+ BT_FRAME(48)
+ BT_FRAME(49)
+
+ BT_FRAME(50)
+ BT_FRAME(51)
+ BT_FRAME(52)
+ BT_FRAME(53)
+ BT_FRAME(54)
+ BT_FRAME(55)
+ BT_FRAME(56)
+ BT_FRAME(57)
+ BT_FRAME(58)
+ BT_FRAME(59)
+
+ BT_FRAME(60)
+ BT_FRAME(61)
+ BT_FRAME(62)
+ BT_FRAME(63)
+ BT_FRAME(64)
+ BT_FRAME(65)
+ BT_FRAME(66)
+ BT_FRAME(67)
+ BT_FRAME(68)
+ BT_FRAME(69)
+
+ BT_FRAME(70)
+ BT_FRAME(71)
+ BT_FRAME(72)
+ BT_FRAME(73)
+ BT_FRAME(74)
+ BT_FRAME(75)
+ BT_FRAME(76)
+ BT_FRAME(77)
+ BT_FRAME(78)
+ BT_FRAME(79)
+
+ BT_FRAME(80)
+ BT_FRAME(81)
+ BT_FRAME(82)
+ BT_FRAME(83)
+ BT_FRAME(84)
+ BT_FRAME(85)
+ BT_FRAME(86)
+ BT_FRAME(87)
+ BT_FRAME(88)
+ BT_FRAME(89)
+
+ BT_FRAME(90)
+ BT_FRAME(91)
+ BT_FRAME(92)
+ BT_FRAME(93)
+ BT_FRAME(94)
+ BT_FRAME(95)
+ BT_FRAME(96)
+ BT_FRAME(97)
+ BT_FRAME(98)
+ BT_FRAME(99)
+
+ BT_FRAME(100)
+ BT_FRAME(101)
+ BT_FRAME(102)
+ BT_FRAME(103)
+ BT_FRAME(104)
+ BT_FRAME(105)
+ BT_FRAME(106)
+ BT_FRAME(107)
+ BT_FRAME(108)
+ BT_FRAME(109)
+
+ BT_FRAME(110)
+ BT_FRAME(111)
+ BT_FRAME(112)
+ BT_FRAME(113)
+ BT_FRAME(114)
+ BT_FRAME(115)
+ BT_FRAME(116)
+ BT_FRAME(117)
+ BT_FRAME(118)
+ BT_FRAME(119)
+
+ BT_FRAME(120)
+ BT_FRAME(121)
+ BT_FRAME(122)
+ BT_FRAME(123)
+ BT_FRAME(124)
+ BT_FRAME(125)
+ BT_FRAME(126)
+ BT_FRAME(127)
+
+ /* Extras to compensate for NIGNORE. */
+ BT_FRAME(128)
+ BT_FRAME(129)
+ BT_FRAME(130)
+#undef BT_FRAME
+}
+#endif
+
+static prof_thr_cnt_t *
+prof_lookup(prof_bt_t *bt)
+{
+ prof_thr_cnt_t *ret;
+ ckh_t *bt2cnt = bt2cnt_tls;
+
+ if (bt2cnt == NULL) {
+ /* Initialize an empty cache for this thread. */
+ bt2cnt = (ckh_t *)imalloc(sizeof(ckh_t));
+ if (bt2cnt == NULL)
+ return (NULL);
+ if (ckh_new(bt2cnt, PROF_CKH_MINITEMS, prof_bt_hash,
+ prof_bt_keycomp)) {
+ idalloc(bt2cnt);
+ return (NULL);
+ }
+ bt2cnt_tls = bt2cnt;
+ }
+
+ if (ckh_search(bt2cnt, bt, NULL, (void **)&ret)) {
+ prof_bt_t *btkey;
+ prof_ctx_t *ctx;
+
+ /*
+ * This thread's cache lacks bt. Look for it in the global
+ * cache.
+ */
+ prof_enter();
+ if (ckh_search(&bt2ctx, bt, (void **)&btkey, (void **)&ctx)) {
+
+ /* bt has never been seen before. Insert it. */
+ ctx = (prof_ctx_t *)imalloc(sizeof(prof_ctx_t));
+ if (ctx == NULL) {
+ prof_leave();
+ return (NULL);
+ }
+ btkey = bt_dup(bt);
+ if (btkey == NULL) {
+ prof_leave();
+ idalloc(ctx);
+ return (NULL);
+ }
+ if (malloc_mutex_init(&ctx->lock)) {
+ prof_leave();
+ idalloc(btkey);
+ idalloc(ctx);
+ return (NULL);
+ }
+ memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t));
+ ql_new(&ctx->cnts_ql);
+ if (ckh_insert(&bt2ctx, btkey, ctx)) {
+ /* OOM. */
+ prof_leave();
+ idalloc(btkey);
+ idalloc(ctx);
+ return (NULL);
+ }
+ }
+ prof_leave();
+
+ /* Link a prof_thd_cnt_t into ctx for this thread. */
+ ret = (prof_thr_cnt_t *)imalloc(sizeof(prof_thr_cnt_t));
+ if (ret == NULL)
+ return (NULL);
+ ql_elm_new(ret, link);
+ ret->ctx = ctx;
+ ret->epoch = 0;
+ memset(&ret->cnts, 0, sizeof(prof_cnt_t));
+ if (ckh_insert(bt2cnt, btkey, ret)) {
+ idalloc(ret);
+ return (NULL);
+ }
+ malloc_mutex_lock(&ctx->lock);
+ ql_tail_insert(&ctx->cnts_ql, ret, link);
+ malloc_mutex_unlock(&ctx->lock);
+ }
+
+ return (ret);
+}
+
+static inline void
+prof_sample_threshold_update(void)
+{
+ uint64_t r;
+ double u;
+
+ /*
+ * Compute prof_sample_threshold as a geometrically distributed random
+ * variable with mean (2^opt_lg_prof_sample).
+ */
+ prn64(r, 53, prof_sample_prn_state, (uint64_t)1125899906842625LLU,
+ 1058392653243283975);
+ u = (double)r * (1.0/9007199254740992.0L);
+ prof_sample_threshold = (uint64_t)(log(u) /
+ log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
+ + (uint64_t)1U;
+}
+
+prof_thr_cnt_t *
+prof_alloc_prep(size_t size)
+{
+ prof_thr_cnt_t *ret;
+ void *vec[prof_bt_max];
+ prof_bt_t bt;
+
+ if (opt_prof_active == false) {
+ /* Sampling is currently inactive, so avoid sampling. */
+ ret = (prof_thr_cnt_t *)(uintptr_t)1U;
+ } else if (opt_lg_prof_sample == 0) {
+ /*
+ * Don't bother with sampling logic, since sampling interval is
+ * 1.
+ */
+ bt_init(&bt, vec);
+ prof_backtrace(&bt, 2, prof_bt_max);
+ ret = prof_lookup(&bt);
+ } else {
+ if (prof_sample_threshold == 0) {
+ /*
+ * Initialize. Seed the prng differently for each
+ * thread.
+ */
+ prof_sample_prn_state = (uint64_t)(uintptr_t)&size;
+ prof_sample_threshold_update();
+ }
+
+ /*
+ * Determine whether to capture a backtrace based on whether
+ * size is enough for prof_accum to reach
+ * prof_sample_threshold. However, delay updating these
+ * variables until prof_{m,re}alloc(), because we don't know
+ * for sure that the allocation will succeed.
+ *
+ * Use subtraction rather than addition to avoid potential
+ * integer overflow.
+ */
+ if (size >= prof_sample_threshold - prof_sample_accum) {
+ bt_init(&bt, vec);
+ prof_backtrace(&bt, 2, prof_bt_max);
+ ret = prof_lookup(&bt);
+ } else
+ ret = (prof_thr_cnt_t *)(uintptr_t)1U;
+ }
+
+ return (ret);
+}
+
+prof_thr_cnt_t *
+prof_cnt_get(const void *ptr)
+{
+ prof_thr_cnt_t *ret;
+ arena_chunk_t *chunk;
+
+ assert(ptr != NULL);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (chunk != ptr) {
+ /* Region. */
+ assert(chunk->arena->magic == ARENA_MAGIC);
+
+ ret = arena_prof_cnt_get(ptr);
+ } else
+ ret = huge_prof_cnt_get(ptr);
+
+ return (ret);
+}
+
+static void
+prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
+{
+ arena_chunk_t *chunk;
+
+ assert(ptr != NULL);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (chunk != ptr) {
+ /* Region. */
+ assert(chunk->arena->magic == ARENA_MAGIC);
+
+ arena_prof_cnt_set(ptr, cnt);
+ } else
+ huge_prof_cnt_set(ptr, cnt);
+}
+
+static inline void
+prof_sample_accum_update(size_t size)
+{
+
+ if (opt_lg_prof_sample == 0) {
+ /*
+ * Don't bother with sampling logic, since sampling interval is
+ * 1.
+ */
+ return;
+ }
+
+ /* Take care to avoid integer overflow. */
+ if (size >= prof_sample_threshold - prof_sample_accum) {
+ prof_sample_accum -= (prof_sample_threshold - size);
+ /* Compute new prof_sample_threshold. */
+ prof_sample_threshold_update();
+ while (prof_sample_accum >= prof_sample_threshold) {
+ prof_sample_accum -= prof_sample_threshold;
+ prof_sample_threshold_update();
+ }
+ } else
+ prof_sample_accum += size;
+}
+
+void
+prof_malloc(const void *ptr, prof_thr_cnt_t *cnt)
+{
+ size_t size = isalloc(ptr);
+
+ assert(ptr != NULL);
+
+ prof_cnt_set(ptr, cnt);
+ prof_sample_accum_update(size);
+
+ if ((uintptr_t)cnt > (uintptr_t)1U) {
+ cnt->epoch++;
+ /*********/
+ mb_write();
+ /*********/
+ cnt->cnts.curobjs++;
+ cnt->cnts.curbytes += size;
+ cnt->cnts.accumobjs++;
+ cnt->cnts.accumbytes += size;
+ /*********/
+ mb_write();
+ /*********/
+ cnt->epoch++;
+ /*********/
+ mb_write();
+ /*********/
+ }
+}
+
+void
+prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
+ size_t old_size, prof_thr_cnt_t *old_cnt)
+{
+ size_t size = isalloc(ptr);
+
+ if (ptr != NULL) {
+ prof_cnt_set(ptr, cnt);
+ prof_sample_accum_update(size);
+ }
+
+ if ((uintptr_t)old_cnt > (uintptr_t)1U)
+ old_cnt->epoch++;
+ if ((uintptr_t)cnt > (uintptr_t)1U)
+ cnt->epoch++;
+ /*********/
+ mb_write();
+ /*********/
+ if ((uintptr_t)old_cnt > (uintptr_t)1U) {
+ old_cnt->cnts.curobjs--;
+ old_cnt->cnts.curbytes -= old_size;
+ }
+ if ((uintptr_t)cnt > (uintptr_t)1U) {
+ cnt->cnts.curobjs++;
+ cnt->cnts.curbytes += size;
+ cnt->cnts.accumobjs++;
+ cnt->cnts.accumbytes += size;
+ }
+ /*********/
+ mb_write();
+ /*********/
+ if ((uintptr_t)old_cnt > (uintptr_t)1U)
+ old_cnt->epoch++;
+ if ((uintptr_t)cnt > (uintptr_t)1U)
+ cnt->epoch++;
+ /*********/
+ mb_write(); /* Not strictly necessary. */
+}
+
+void
+prof_free(const void *ptr)
+{
+ prof_thr_cnt_t *cnt = prof_cnt_get(ptr);
+
+ if ((uintptr_t)cnt > (uintptr_t)1) {
+ size_t size = isalloc(ptr);
+
+ cnt->epoch++;
+ /*********/
+ mb_write();
+ /*********/
+ cnt->cnts.curobjs--;
+ cnt->cnts.curbytes -= size;
+ /*********/
+ mb_write();
+ /*********/
+ cnt->epoch++;
+ /*********/
+ mb_write();
+ /*********/
+ }
+}
+
+static bool
+prof_flush(bool propagate_err)
+{
+ bool ret = false;
+ ssize_t err;
+
+ err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
+ if (err == -1) {
+ if (propagate_err == false) {
+ malloc_write("<jemalloc>: write() failed during heap "
+ "profile flush\n");
+ if (opt_abort)
+ abort();
+ }
+ ret = true;
+ }
+ prof_dump_buf_end = 0;
+
+ return (ret);
+}
+
+static bool
+prof_write(const char *s, bool propagate_err)
+{
+ unsigned i, slen, n;
+
+ i = 0;
+ slen = strlen(s);
+ while (i < slen) {
+ /* Flush the buffer if it is full. */
+ if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE)
+ if (prof_flush(propagate_err) && propagate_err)
+ return (true);
+
+ if (prof_dump_buf_end + slen <= PROF_DUMP_BUF_SIZE) {
+ /* Finish writing. */
+ n = slen - i;
+ } else {
+ /* Write as much of s as will fit. */
+ n = PROF_DUMP_BUF_SIZE - prof_dump_buf_end;
+ }
+ memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
+ prof_dump_buf_end += n;
+ i += n;
+ }
+
+ return (false);
+}
+
+static void
+prof_ctx_merge(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx)
+{
+ prof_thr_cnt_t *thr_cnt;
+ prof_cnt_t tcnt;
+
+ malloc_mutex_lock(&ctx->lock);
+
+ memcpy(&ctx->cnt_dump, &ctx->cnt_merged, sizeof(prof_cnt_t));
+ ql_foreach(thr_cnt, &ctx->cnts_ql, link) {
+ volatile unsigned *epoch = &thr_cnt->epoch;
+
+ while (true) {
+ unsigned epoch0 = *epoch;
+
+ /* Make sure epoch is even. */
+ if (epoch0 & 1U)
+ continue;
+
+ memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t));
+
+ /* Terminate if epoch didn't change while reading. */
+ if (*epoch == epoch0)
+ break;
+ }
+
+ ctx->cnt_dump.curobjs += tcnt.curobjs;
+ ctx->cnt_dump.curbytes += tcnt.curbytes;
+ ctx->cnt_dump.accumobjs += tcnt.accumobjs;
+ ctx->cnt_dump.accumbytes += tcnt.accumbytes;
+
+ if (tcnt.curobjs != 0)
+ (*leak_nctx)++;
+ }
+
+ /* Merge into cnt_all. */
+ cnt_all->curobjs += ctx->cnt_dump.curobjs;
+ cnt_all->curbytes += ctx->cnt_dump.curbytes;
+ cnt_all->accumobjs += ctx->cnt_dump.accumobjs;
+ cnt_all->accumbytes += ctx->cnt_dump.accumbytes;
+
+ malloc_mutex_unlock(&ctx->lock);
+}
+
+static bool
+prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err)
+{
+ char buf[UMAX2S_BUFSIZE];
+ unsigned i;
+
+ if (prof_write(umax2s(ctx->cnt_dump.curobjs, 10, buf), propagate_err)
+ || prof_write(": ", propagate_err)
+ || prof_write(umax2s(ctx->cnt_dump.curbytes, 10, buf),
+ propagate_err)
+ || prof_write(" [", propagate_err)
+ || prof_write(umax2s(ctx->cnt_dump.accumobjs, 10, buf),
+ propagate_err)
+ || prof_write(": ", propagate_err)
+ || prof_write(umax2s(ctx->cnt_dump.accumbytes, 10, buf),
+ propagate_err)
+ || prof_write("] @", propagate_err))
+ return (true);
+
+ for (i = 0; i < bt->len; i++) {
+ if (prof_write(" 0x", propagate_err)
+ || prof_write(umax2s((uintptr_t)bt->vec[i], 16, buf),
+ propagate_err))
+ return (true);
+ }
+
+ if (prof_write("\n", propagate_err))
+ return (true);
+
+ return (false);
+}
+
+static bool
+prof_dump_maps(bool propagate_err)
+{
+ int mfd;
+ char buf[UMAX2S_BUFSIZE];
+ char *s;
+ unsigned i, slen;
+ /* /proc/<pid>/maps\0 */
+ char mpath[6 + UMAX2S_BUFSIZE
+ + 5 + 1];
+
+ i = 0;
+
+ s = "/proc/";
+ slen = strlen(s);
+ memcpy(&mpath[i], s, slen);
+ i += slen;
+
+ s = umax2s(getpid(), 10, buf);
+ slen = strlen(s);
+ memcpy(&mpath[i], s, slen);
+ i += slen;
+
+ s = "/maps";
+ slen = strlen(s);
+ memcpy(&mpath[i], s, slen);
+ i += slen;
+
+ mpath[i] = '\0';
+
+ mfd = open(mpath, O_RDONLY);
+ if (mfd != -1) {
+ ssize_t nread;
+
+ if (prof_write("\nMAPPED_LIBRARIES:\n", propagate_err) &&
+ propagate_err)
+ return (true);
+ nread = 0;
+ do {
+ prof_dump_buf_end += nread;
+ if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE) {
+ /* Make space in prof_dump_buf before read(). */
+ if (prof_flush(propagate_err) && propagate_err)
+ return (true);
+ }
+ nread = read(mfd, &prof_dump_buf[prof_dump_buf_end],
+ PROF_DUMP_BUF_SIZE - prof_dump_buf_end);
+ } while (nread > 0);
+ close(mfd);
+ } else
+ return (true);
+
+ return (false);
+}
+
+static bool
+prof_dump(const char *filename, bool leakcheck, bool propagate_err)
+{
+ prof_cnt_t cnt_all;
+ size_t tabind;
+ prof_bt_t *bt;
+ prof_ctx_t *ctx;
+ char buf[UMAX2S_BUFSIZE];
+ size_t leak_nctx;
+
+ prof_enter();
+ prof_dump_fd = creat(filename, 0644);
+ if (prof_dump_fd == -1) {
+ if (propagate_err == false) {
+ malloc_write("<jemalloc>: creat(\"");
+ malloc_write(filename);
+ malloc_write("\", 0644) failed\n");
+ if (opt_abort)
+ abort();
+ }
+ goto ERROR;
+ }
+
+ /* Merge per thread profile stats, and sum them in cnt_all. */
+ memset(&cnt_all, 0, sizeof(prof_cnt_t));
+ leak_nctx = 0;
+ for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, (void **)&ctx)
+ == false;) {
+ prof_ctx_merge(ctx, &cnt_all, &leak_nctx);
+ }
+
+ /* Dump profile header. */
+ if (prof_write("heap profile: ", propagate_err)
+ || prof_write(umax2s(cnt_all.curobjs, 10, buf), propagate_err)
+ || prof_write(": ", propagate_err)
+ || prof_write(umax2s(cnt_all.curbytes, 10, buf), propagate_err)
+ || prof_write(" [", propagate_err)
+ || prof_write(umax2s(cnt_all.accumobjs, 10, buf), propagate_err)
+ || prof_write(": ", propagate_err)
+ || prof_write(umax2s(cnt_all.accumbytes, 10, buf), propagate_err))
+ goto ERROR;
+
+ if (opt_lg_prof_sample == 0) {
+ if (prof_write("] @ heapprofile\n", propagate_err))
+ goto ERROR;
+ } else {
+ if (prof_write("] @ heap_v2/", propagate_err)
+ || prof_write(umax2s((uint64_t)1U << opt_lg_prof_sample, 10,
+ buf), propagate_err)
+ || prof_write("\n", propagate_err))
+ goto ERROR;
+ }
+
+ /* Dump per ctx profile stats. */
+ for (tabind = 0; ckh_iter(&bt2ctx, &tabind, (void **)&bt, (void **)&ctx)
+ == false;) {
+ if (prof_dump_ctx(ctx, bt, propagate_err))
+ goto ERROR;
+ }
+
+ /* Dump /proc/<pid>/maps if possible. */
+ if (prof_dump_maps(propagate_err))
+ goto ERROR;
+
+ if (prof_flush(propagate_err))
+ goto ERROR;
+ close(prof_dump_fd);
+ prof_leave();
+
+ if (leakcheck && cnt_all.curbytes != 0) {
+ malloc_write("<jemalloc>: Leak summary: ");
+ malloc_write(umax2s(cnt_all.curbytes, 10, buf));
+ malloc_write((cnt_all.curbytes != 1) ? " bytes, " : " byte, ");
+ malloc_write(umax2s(cnt_all.curobjs, 10, buf));
+ malloc_write((cnt_all.curobjs != 1) ? " objects, " :
+ " object, ");
+ malloc_write(umax2s(leak_nctx, 10, buf));
+ malloc_write((leak_nctx != 1) ? " contexts\n" : " context\n");
+ malloc_write("<jemalloc>: Run pprof on \"");
+ malloc_write(filename);
+ malloc_write("\" for leak detail\n");
+ }
+
+ return (false);
+ERROR:
+ prof_leave();
+ return (true);
+}
+
+#define DUMP_FILENAME_BUFSIZE (PATH_MAX+ UMAX2S_BUFSIZE \
+ + 1 \
+ + UMAX2S_BUFSIZE \
+ + 2 \
+ + UMAX2S_BUFSIZE \
+ + 5 + 1)
+static void
+prof_dump_filename(char *filename, char v, int64_t vseq)
+{
+ char buf[UMAX2S_BUFSIZE];
+ char *s;
+ unsigned i, slen;
+
+ /*
+ * Construct a filename of the form:
+ *
+ * <prefix>.<pid>.<seq>.v<vseq>.heap\0
+ * or
+ * jeprof.<pid>.<seq>.v<vseq>.heap\0
+ */
+
+ i = 0;
+
+ /*
+ * Use JEMALLOC_PROF_PREFIX if it's set, and if it is short enough to
+ * avoid overflowing DUMP_FILENAME_BUFSIZE. The result may exceed
+ * PATH_MAX, but creat(2) will catch that problem.
+ */
+ if ((s = getenv("JEMALLOC_PROF_PREFIX")) != NULL
+ && strlen(s) + (DUMP_FILENAME_BUFSIZE - PATH_MAX) <= PATH_MAX) {
+ slen = strlen(s);
+ memcpy(&filename[i], s, slen);
+ i += slen;
+
+ s = ".";
+ } else
+ s = "jeprof.";
+ slen = strlen(s);
+ memcpy(&filename[i], s, slen);
+ i += slen;
+
+ s = umax2s(getpid(), 10, buf);
+ slen = strlen(s);
+ memcpy(&filename[i], s, slen);
+ i += slen;
+
+ s = ".";
+ slen = strlen(s);
+ memcpy(&filename[i], s, slen);
+ i += slen;
+
+ s = umax2s(prof_dump_seq, 10, buf);
+ prof_dump_seq++;
+ slen = strlen(s);
+ memcpy(&filename[i], s, slen);
+ i += slen;
+
+ s = ".";
+ slen = strlen(s);
+ memcpy(&filename[i], s, slen);
+ i += slen;
+
+ filename[i] = v;
+ i++;
+
+ if (vseq != 0xffffffffffffffffLLU) {
+ s = umax2s(vseq, 10, buf);
+ slen = strlen(s);
+ memcpy(&filename[i], s, slen);
+ i += slen;
+ }
+
+ s = ".heap";
+ slen = strlen(s);
+ memcpy(&filename[i], s, slen);
+ i += slen;
+
+ filename[i] = '\0';
+}
+
+static void
+prof_fdump(void)
+{
+ char filename[DUMP_FILENAME_BUFSIZE];
+
+ if (prof_booted == false)
+ return;
+
+ malloc_mutex_lock(&prof_dump_seq_mtx);
+ prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU);
+ malloc_mutex_unlock(&prof_dump_seq_mtx);
+ prof_dump(filename, opt_prof_leak, false);
+}
+
+void
+prof_idump(void)
+{
+ char filename[DUMP_FILENAME_BUFSIZE];
+
+ if (prof_booted == false)
+ return;
+ malloc_mutex_lock(&enq_mtx);
+ if (enq) {
+ enq_idump = true;
+ malloc_mutex_unlock(&enq_mtx);
+ return;
+ }
+ malloc_mutex_unlock(&enq_mtx);
+
+ malloc_mutex_lock(&prof_dump_seq_mtx);
+ prof_dump_filename(filename, 'i', prof_dump_iseq);
+ prof_dump_iseq++;
+ malloc_mutex_unlock(&prof_dump_seq_mtx);
+ prof_dump(filename, false, false);
+}
+
+bool
+prof_mdump(const char *filename)
+{
+ char filename_buf[DUMP_FILENAME_BUFSIZE];
+
+ if (opt_prof == false || prof_booted == false)
+ return (true);
+
+ if (filename == NULL) {
+ /* No filename specified, so automatically generate one. */
+ malloc_mutex_lock(&prof_dump_seq_mtx);
+ prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
+ prof_dump_mseq++;
+ malloc_mutex_unlock(&prof_dump_seq_mtx);
+ filename = filename_buf;
+ }
+ return (prof_dump(filename, false, true));
+}
+
+void
+prof_udump(void)
+{
+ char filename[DUMP_FILENAME_BUFSIZE];
+
+ if (prof_booted == false)
+ return;
+ malloc_mutex_lock(&enq_mtx);
+ if (enq) {
+ enq_udump = true;
+ malloc_mutex_unlock(&enq_mtx);
+ return;
+ }
+ malloc_mutex_unlock(&enq_mtx);
+
+ malloc_mutex_lock(&prof_dump_seq_mtx);
+ prof_dump_filename(filename, 'u', prof_dump_useq);
+ prof_dump_useq++;
+ malloc_mutex_unlock(&prof_dump_seq_mtx);
+ prof_dump(filename, false, false);
+}
+
+static void
+prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
+{
+ size_t ret1, ret2;
+ uint64_t h;
+ prof_bt_t *bt = (prof_bt_t *)key;
+
+ assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
+ assert(hash1 != NULL);
+ assert(hash2 != NULL);
+
+ h = hash(bt->vec, bt->len * sizeof(void *), 0x94122f335b332aeaLLU);
+ if (minbits <= 32) {
+ /*
+ * Avoid doing multiple hashes, since a single hash provides
+ * enough bits.
+ */
+ ret1 = h & ZU(0xffffffffU);
+ ret2 = h >> 32;
+ } else {
+ ret1 = h;
+ ret2 = hash(bt->vec, bt->len * sizeof(void *),
+ 0x8432a476666bbc13U);
+ }
+
+ *hash1 = ret1;
+ *hash2 = ret2;
+}
+
+static bool
+prof_bt_keycomp(const void *k1, const void *k2)
+{
+ const prof_bt_t *bt1 = (prof_bt_t *)k1;
+ const prof_bt_t *bt2 = (prof_bt_t *)k2;
+
+ if (bt1->len != bt2->len)
+ return (false);
+ return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
+}
+
+static void
+bt2cnt_thread_cleanup(void *arg)
+{
+ ckh_t *bt2cnt;
+
+ bt2cnt = bt2cnt_tls;
+ if (bt2cnt != NULL) {
+ ql_head(prof_thr_cnt_t) cnts_ql;
+ size_t tabind;
+ prof_thr_cnt_t *cnt;
+
+ /* Iteratively merge cnt's into the global stats. */
+ ql_new(&cnts_ql);
+ tabind = 0;
+ while (ckh_iter(bt2cnt, &tabind, NULL, (void **)&cnt) ==
+ false) {
+ prof_ctx_t *ctx = cnt->ctx;
+ /* Merge stats and detach from ctx. */
+ malloc_mutex_lock(&ctx->lock);
+ ctx->cnt_merged.curobjs += cnt->cnts.curobjs;
+ ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
+ ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
+ ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
+ ql_remove(&ctx->cnts_ql, cnt, link);
+ malloc_mutex_unlock(&ctx->lock);
+
+ /*
+ * Stash cnt for deletion after finishing with
+ * ckh_iter().
+ */
+ ql_tail_insert(&cnts_ql, cnt, link);
+ }
+
+ /*
+ * Delete the hash table now that cnts_ql has a list of all
+ * cnt's.
+ */
+ ckh_delete(bt2cnt);
+ idalloc(bt2cnt);
+ bt2cnt_tls = NULL;
+
+ /* Delete cnt's. */
+ while ((cnt = ql_last(&cnts_ql, link)) != NULL) {
+ ql_remove(&cnts_ql, cnt, link);
+ idalloc(cnt);
+ }
+ }
+}
+
+void
+prof_boot0(void)
+{
+
+ /*
+ * opt_prof and prof_promote must be in their final state before any
+ * arenas are initialized, so this function must be executed early.
+ */
+
+ if (opt_prof_leak && opt_prof == false) {
+ /*
+ * Enable opt_prof, but in such a way that profiles are never
+ * automatically dumped.
+ */
+ opt_prof = true;
+ opt_prof_udump = false;
+ prof_interval = 0;
+ } else if (opt_prof) {
+ if (opt_lg_prof_interval >= 0) {
+ prof_interval = (((uint64_t)1U) <<
+ opt_lg_prof_interval);
+ } else
+ prof_interval = 0;
+ }
+
+ prof_promote = (opt_prof && opt_lg_prof_sample > PAGE_SHIFT);
+}
+
+bool
+prof_boot1(void)
+{
+
+ if (opt_prof) {
+ if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash,
+ prof_bt_keycomp))
+ return (true);
+ if (malloc_mutex_init(&bt2ctx_mtx))
+ return (true);
+ if (pthread_key_create(&bt2cnt_tsd, bt2cnt_thread_cleanup)
+ != 0) {
+ malloc_write(
+ "<jemalloc>: Error in pthread_key_create()\n");
+ abort();
+ }
+
+ prof_bt_max = (1U << opt_lg_prof_bt_max);
+ if (malloc_mutex_init(&prof_dump_seq_mtx))
+ return (true);
+
+ if (malloc_mutex_init(&enq_mtx))
+ return (true);
+ enq = false;
+ enq_idump = false;
+ enq_udump = false;
+
+ if (atexit(prof_fdump) != 0) {
+ malloc_write("<jemalloc>: Error in atexit()\n");
+ if (opt_abort)
+ abort();
+ }
+ }
+
+#ifdef JEMALLOC_PROF_LIBGCC
+ /*
+ * Cause the backtracing machinery to allocate its internal state
+ * before enabling profiling.
+ */
+ _Unwind_Backtrace(prof_unwind_init_callback, NULL);
+#endif
+
+ prof_booted = true;
+
+ return (false);
+}
+
+/******************************************************************************/
+#endif /* JEMALLOC_PROF */
diff --git a/dep/src/jmalloc/stats.c b/dep/src/jmalloc/stats.c
new file mode 100644
index 00000000000..9dc75293731
--- /dev/null
+++ b/dep/src/jmalloc/stats.c
@@ -0,0 +1,717 @@
+#define JEMALLOC_STATS_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+#define CTL_GET(n, v, t) do { \
+ size_t sz = sizeof(t); \
+ xmallctl(n, v, &sz, NULL, 0); \
+} while (0)
+
+#define CTL_I_GET(n, v, t) do { \
+ size_t mib[6]; \
+ size_t miblen = sizeof(mib) / sizeof(size_t); \
+ size_t sz = sizeof(t); \
+ xmallctlnametomib(n, mib, &miblen); \
+ mib[2] = i; \
+ xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \
+} while (0)
+
+#define CTL_J_GET(n, v, t) do { \
+ size_t mib[6]; \
+ size_t miblen = sizeof(mib) / sizeof(size_t); \
+ size_t sz = sizeof(t); \
+ xmallctlnametomib(n, mib, &miblen); \
+ mib[2] = j; \
+ xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \
+} while (0)
+
+#define CTL_IJ_GET(n, v, t) do { \
+ size_t mib[6]; \
+ size_t miblen = sizeof(mib) / sizeof(size_t); \
+ size_t sz = sizeof(t); \
+ xmallctlnametomib(n, mib, &miblen); \
+ mib[2] = i; \
+ mib[4] = j; \
+ xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \
+} while (0)
+
+/******************************************************************************/
+/* Data. */
+
+bool opt_stats_print = false;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+#ifdef JEMALLOC_STATS
+static void malloc_vcprintf(void (*write_cb)(void *, const char *),
+ void *cbopaque, const char *format, va_list ap);
+static void stats_arena_bins_print(void (*write_cb)(void *, const char *),
+ void *cbopaque, unsigned i);
+static void stats_arena_lruns_print(void (*write_cb)(void *, const char *),
+ void *cbopaque, unsigned i);
+static void stats_arena_print(void (*write_cb)(void *, const char *),
+ void *cbopaque, unsigned i);
+#endif
+
+/******************************************************************************/
+
+/*
+ * We don't want to depend on vsnprintf() for production builds, since that can
+ * cause unnecessary bloat for static binaries. umax2s() provides minimal
+ * integer printing functionality, so that malloc_printf() use can be limited to
+ * JEMALLOC_STATS code.
+ */
+char *
+umax2s(uintmax_t x, unsigned base, char *s)
+{
+ unsigned i;
+
+ i = UMAX2S_BUFSIZE - 1;
+ s[i] = '\0';
+ switch (base) {
+ case 10:
+ do {
+ i--;
+ s[i] = "0123456789"[x % 10];
+ x /= 10;
+ } while (x > 0);
+ break;
+ case 16:
+ do {
+ i--;
+ s[i] = "0123456789abcdef"[x & 0xf];
+ x >>= 4;
+ } while (x > 0);
+ break;
+ default:
+ do {
+ i--;
+ s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x % base];
+ x /= base;
+ } while (x > 0);
+ }
+
+ return (&s[i]);
+}
+
+#ifdef JEMALLOC_STATS
+static void
+malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+ const char *format, va_list ap)
+{
+ char buf[4096];
+
+ if (write_cb == NULL) {
+ /*
+ * The caller did not provide an alternate write_cb callback
+ * function, so use the default one. malloc_write() is an
+ * inline function, so use malloc_message() directly here.
+ */
+ write_cb = JEMALLOC_P(malloc_message);
+ cbopaque = NULL;
+ }
+
+ vsnprintf(buf, sizeof(buf), format, ap);
+ write_cb(cbopaque, buf);
+}
+
+/*
+ * Print to a callback function in such a way as to (hopefully) avoid memory
+ * allocation.
+ */
+JEMALLOC_ATTR(format(printf, 3, 4))
+void
+malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+ const char *format, ...)
+{
+ va_list ap;
+
+ va_start(ap, format);
+ malloc_vcprintf(write_cb, cbopaque, format, ap);
+ va_end(ap);
+}
+
+/*
+ * Print to stderr in such a way as to (hopefully) avoid memory allocation.
+ */
+JEMALLOC_ATTR(format(printf, 1, 2))
+void
+malloc_printf(const char *format, ...)
+{
+ va_list ap;
+
+ va_start(ap, format);
+ malloc_vcprintf(NULL, NULL, format, ap);
+ va_end(ap);
+}
+#endif
+
+#ifdef JEMALLOC_STATS
+static void
+stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
+ unsigned i)
+{
+ size_t pagesize;
+ bool config_tcache;
+ unsigned nbins, j, gap_start;
+
+ CTL_GET("arenas.pagesize", &pagesize, size_t);
+
+ CTL_GET("config.tcache", &config_tcache, bool);
+ if (config_tcache) {
+ malloc_cprintf(write_cb, cbopaque,
+ "bins: bin size regs pgs allocated nmalloc"
+ " ndalloc nrequests nfills nflushes"
+ " newruns reruns maxruns curruns\n");
+ } else {
+ malloc_cprintf(write_cb, cbopaque,
+ "bins: bin size regs pgs allocated nmalloc"
+ " ndalloc newruns reruns maxruns"
+ " curruns\n");
+ }
+ CTL_GET("arenas.nbins", &nbins, unsigned);
+ for (j = 0, gap_start = UINT_MAX; j < nbins; j++) {
+ uint64_t nruns;
+
+ CTL_IJ_GET("stats.arenas.0.bins.0.nruns", &nruns, uint64_t);
+ if (nruns == 0) {
+ if (gap_start == UINT_MAX)
+ gap_start = j;
+ } else {
+ unsigned ntbins_, nqbins, ncbins, nsbins;
+ size_t reg_size, run_size, allocated;
+ uint32_t nregs;
+ uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
+ uint64_t reruns;
+ size_t highruns, curruns;
+
+ if (gap_start != UINT_MAX) {
+ if (j > gap_start + 1) {
+ /* Gap of more than one size class. */
+ malloc_cprintf(write_cb, cbopaque,
+ "[%u..%u]\n", gap_start,
+ j - 1);
+ } else {
+ /* Gap of one size class. */
+ malloc_cprintf(write_cb, cbopaque,
+ "[%u]\n", gap_start);
+ }
+ gap_start = UINT_MAX;
+ }
+ CTL_GET("arenas.ntbins", &ntbins_, unsigned);
+ CTL_GET("arenas.nqbins", &nqbins, unsigned);
+ CTL_GET("arenas.ncbins", &ncbins, unsigned);
+ CTL_GET("arenas.nsbins", &nsbins, unsigned);
+ CTL_J_GET("arenas.bin.0.size", &reg_size, size_t);
+ CTL_J_GET("arenas.bin.0.nregs", &nregs, uint32_t);
+ CTL_J_GET("arenas.bin.0.run_size", &run_size, size_t);
+ CTL_IJ_GET("stats.arenas.0.bins.0.allocated",
+ &allocated, size_t);
+ CTL_IJ_GET("stats.arenas.0.bins.0.nmalloc",
+ &nmalloc, uint64_t);
+ CTL_IJ_GET("stats.arenas.0.bins.0.ndalloc",
+ &ndalloc, uint64_t);
+ if (config_tcache) {
+ CTL_IJ_GET("stats.arenas.0.bins.0.nrequests",
+ &nrequests, uint64_t);
+ CTL_IJ_GET("stats.arenas.0.bins.0.nfills",
+ &nfills, uint64_t);
+ CTL_IJ_GET("stats.arenas.0.bins.0.nflushes",
+ &nflushes, uint64_t);
+ }
+ CTL_IJ_GET("stats.arenas.0.bins.0.nreruns", &reruns,
+ uint64_t);
+ CTL_IJ_GET("stats.arenas.0.bins.0.highruns", &highruns,
+ size_t);
+ CTL_IJ_GET("stats.arenas.0.bins.0.curruns", &curruns,
+ size_t);
+ if (config_tcache) {
+ malloc_cprintf(write_cb, cbopaque,
+ "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64
+ " %12"PRIu64" %12"PRIu64" %12"PRIu64
+ " %12"PRIu64" %12"PRIu64" %12"PRIu64
+ " %12zu %12zu\n",
+ j,
+ j < ntbins_ ? "T" : j < ntbins_ + nqbins ?
+ "Q" : j < ntbins_ + nqbins + ncbins ? "C" :
+ "S",
+ reg_size, nregs, run_size / pagesize,
+ allocated, nmalloc, ndalloc, nrequests,
+ nfills, nflushes, nruns, reruns, highruns,
+ curruns);
+ } else {
+ malloc_cprintf(write_cb, cbopaque,
+ "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64
+ " %12"PRIu64" %12"PRIu64" %12"PRIu64
+ " %12zu %12zu\n",
+ j,
+ j < ntbins_ ? "T" : j < ntbins_ + nqbins ?
+ "Q" : j < ntbins_ + nqbins + ncbins ? "C" :
+ "S",
+ reg_size, nregs, run_size / pagesize,
+ allocated, nmalloc, ndalloc, nruns, reruns,
+ highruns, curruns);
+ }
+ }
+ }
+ if (gap_start != UINT_MAX) {
+ if (j > gap_start + 1) {
+ /* Gap of more than one size class. */
+ malloc_cprintf(write_cb, cbopaque, "[%u..%u]\n",
+ gap_start, j - 1);
+ } else {
+ /* Gap of one size class. */
+ malloc_cprintf(write_cb, cbopaque, "[%u]\n", gap_start);
+ }
+ }
+}
+
+static void
+stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
+ unsigned i)
+{
+ size_t pagesize, nlruns, j;
+ ssize_t gap_start;
+
+ CTL_GET("arenas.pagesize", &pagesize, size_t);
+
+ malloc_cprintf(write_cb, cbopaque,
+ "large: size pages nmalloc ndalloc nrequests"
+ " maxruns curruns\n");
+ CTL_GET("arenas.nlruns", &nlruns, size_t);
+ for (j = 0, gap_start = -1; j < nlruns; j++) {
+ uint64_t nmalloc, ndalloc, nrequests;
+ size_t run_size, highruns, curruns;
+
+ CTL_IJ_GET("stats.arenas.0.lruns.0.nmalloc", &nmalloc,
+ uint64_t);
+ CTL_IJ_GET("stats.arenas.0.lruns.0.ndalloc", &ndalloc,
+ uint64_t);
+ CTL_IJ_GET("stats.arenas.0.lruns.0.nrequests", &nrequests,
+ uint64_t);
+ if (nrequests == 0) {
+ if (gap_start == -1)
+ gap_start = j;
+ } else {
+ CTL_J_GET("arenas.lrun.0.size", &run_size, size_t);
+ CTL_IJ_GET("stats.arenas.0.lruns.0.highruns", &highruns,
+ size_t);
+ CTL_IJ_GET("stats.arenas.0.lruns.0.curruns", &curruns,
+ size_t);
+ if (gap_start != -1) {
+ malloc_cprintf(write_cb, cbopaque, "[%zu]\n",
+ j - gap_start);
+ gap_start = -1;
+ }
+ malloc_cprintf(write_cb, cbopaque,
+ "%13zu %5zu %12"PRIu64" %12"PRIu64" %12"PRIu64
+ " %12zu %12zu\n",
+ run_size, run_size / pagesize, nmalloc, ndalloc,
+ nrequests, highruns, curruns);
+ }
+ }
+ if (gap_start != -1)
+ malloc_cprintf(write_cb, cbopaque, "[%zu]\n", j - gap_start);
+}
+
+static void
+stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
+ unsigned i)
+{
+ size_t pagesize, pactive, pdirty, mapped;
+ uint64_t npurge, nmadvise, purged;
+ size_t small_allocated;
+ uint64_t small_nmalloc, small_ndalloc, small_nrequests;
+ size_t large_allocated;
+ uint64_t large_nmalloc, large_ndalloc, large_nrequests;
+
+ CTL_GET("arenas.pagesize", &pagesize, size_t);
+
+ CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t);
+ CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t);
+ CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t);
+ CTL_I_GET("stats.arenas.0.nmadvise", &nmadvise, uint64_t);
+ CTL_I_GET("stats.arenas.0.purged", &purged, uint64_t);
+ malloc_cprintf(write_cb, cbopaque,
+ "dirty pages: %zu:%zu active:dirty, %"PRIu64" sweep%s,"
+ " %"PRIu64" madvise%s, %"PRIu64" purged\n",
+ pactive, pdirty, npurge, npurge == 1 ? "" : "s",
+ nmadvise, nmadvise == 1 ? "" : "s", purged);
+
+ malloc_cprintf(write_cb, cbopaque,
+ " allocated nmalloc ndalloc nrequests\n");
+ CTL_I_GET("stats.arenas.0.small.allocated", &small_allocated, size_t);
+ CTL_I_GET("stats.arenas.0.small.nmalloc", &small_nmalloc, uint64_t);
+ CTL_I_GET("stats.arenas.0.small.ndalloc", &small_ndalloc, uint64_t);
+ CTL_I_GET("stats.arenas.0.small.nrequests", &small_nrequests, uint64_t);
+ malloc_cprintf(write_cb, cbopaque,
+ "small: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
+ small_allocated, small_nmalloc, small_ndalloc, small_nrequests);
+ CTL_I_GET("stats.arenas.0.large.allocated", &large_allocated, size_t);
+ CTL_I_GET("stats.arenas.0.large.nmalloc", &large_nmalloc, uint64_t);
+ CTL_I_GET("stats.arenas.0.large.ndalloc", &large_ndalloc, uint64_t);
+ CTL_I_GET("stats.arenas.0.large.nrequests", &large_nrequests, uint64_t);
+ malloc_cprintf(write_cb, cbopaque,
+ "large: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
+ large_allocated, large_nmalloc, large_ndalloc, large_nrequests);
+ malloc_cprintf(write_cb, cbopaque,
+ "total: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
+ small_allocated + large_allocated,
+ small_nmalloc + large_nmalloc,
+ small_ndalloc + large_ndalloc,
+ small_nrequests + large_nrequests);
+ malloc_cprintf(write_cb, cbopaque, "active: %12zu\n",
+ pactive * pagesize );
+ CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t);
+ malloc_cprintf(write_cb, cbopaque, "mapped: %12zu\n", mapped);
+
+ stats_arena_bins_print(write_cb, cbopaque, i);
+ stats_arena_lruns_print(write_cb, cbopaque, i);
+}
+#endif
+
+void
+stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
+ const char *opts)
+{
+ uint64_t epoch;
+ size_t u64sz;
+ char s[UMAX2S_BUFSIZE];
+ bool general = true;
+ bool merged = true;
+ bool unmerged = true;
+ bool bins = true;
+ bool large = true;
+
+ /* Refresh stats, in case mallctl() was called by the application. */
+ epoch = 1;
+ u64sz = sizeof(uint64_t);
+ xmallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t));
+
+ if (write_cb == NULL) {
+ /*
+ * The caller did not provide an alternate write_cb callback
+ * function, so use the default one. malloc_write() is an
+ * inline function, so use malloc_message() directly here.
+ */
+ write_cb = JEMALLOC_P(malloc_message);
+ cbopaque = NULL;
+ }
+
+ if (opts != NULL) {
+ unsigned i;
+
+ for (i = 0; opts[i] != '\0'; i++) {
+ switch (opts[i]) {
+ case 'g':
+ general = false;
+ break;
+ case 'm':
+ merged = false;
+ break;
+ case 'a':
+ unmerged = false;
+ break;
+ case 'b':
+ bins = false;
+ break;
+ case 'l':
+ large = false;
+ break;
+ default:;
+ }
+ }
+ }
+
+ write_cb(cbopaque, "___ Begin jemalloc statistics ___\n");
+ if (general) {
+ int err;
+ const char *cpv;
+ bool bv;
+ unsigned uv;
+ ssize_t ssv;
+ size_t sv, bsz, ssz;
+
+ bsz = sizeof(bool);
+ ssz = sizeof(size_t);
+
+ CTL_GET("version", &cpv, const char *);
+ write_cb(cbopaque, "Version: ");
+ write_cb(cbopaque, cpv);
+ write_cb(cbopaque, "\n");
+ CTL_GET("config.debug", &bv, bool);
+ write_cb(cbopaque, "Assertions ");
+ write_cb(cbopaque, bv ? "enabled" : "disabled");
+ write_cb(cbopaque, "\n");
+
+ write_cb(cbopaque, "Boolean JEMALLOC_OPTIONS: ");
+ if ((err = JEMALLOC_P(mallctl)("opt.abort", &bv, &bsz, NULL, 0))
+ == 0)
+ write_cb(cbopaque, bv ? "A" : "a");
+ if ((err = JEMALLOC_P(mallctl)("prof.active", &bv, &bsz,
+ NULL, 0)) == 0)
+ write_cb(cbopaque, bv ? "E" : "e");
+ if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0))
+ == 0)
+ write_cb(cbopaque, bv ? "F" : "f");
+ if ((err = JEMALLOC_P(mallctl)("opt.tcache", &bv, &bsz, NULL,
+ 0)) == 0)
+ write_cb(cbopaque, bv ? "H" : "h");
+ if ((err = JEMALLOC_P(mallctl)("opt.junk", &bv, &bsz, NULL, 0))
+ == 0)
+ write_cb(cbopaque, bv ? "J" : "j");
+ if ((err = JEMALLOC_P(mallctl)("opt.prof_leak", &bv, &bsz, NULL,
+ 0)) == 0)
+ write_cb(cbopaque, bv ? "L" : "l");
+ if ((err = JEMALLOC_P(mallctl)("opt.overcommit", &bv, &bsz,
+ NULL, 0)) == 0)
+ write_cb(cbopaque, bv ? "O" : "o");
+ if ((err = JEMALLOC_P(mallctl)("opt.stats_print", &bv, &bsz,
+ NULL, 0)) == 0)
+ write_cb(cbopaque, bv ? "P" : "p");
+ if ((err = JEMALLOC_P(mallctl)("opt.prof_udump", &bv, &bsz,
+ NULL, 0)) == 0)
+ write_cb(cbopaque, bv ? "U" : "u");
+ if ((err = JEMALLOC_P(mallctl)("opt.sysv", &bv, &bsz, NULL, 0))
+ == 0)
+ write_cb(cbopaque, bv ? "V" : "v");
+ if ((err = JEMALLOC_P(mallctl)("opt.xmalloc", &bv, &bsz, NULL,
+ 0)) == 0)
+ write_cb(cbopaque, bv ? "X" : "x");
+ if ((err = JEMALLOC_P(mallctl)("opt.zero", &bv, &bsz, NULL, 0))
+ == 0)
+ write_cb(cbopaque, bv ? "Z" : "z");
+ write_cb(cbopaque, "\n");
+
+ write_cb(cbopaque, "CPUs: ");
+ write_cb(cbopaque, umax2s(ncpus, 10, s));
+ write_cb(cbopaque, "\n");
+
+ CTL_GET("arenas.narenas", &uv, unsigned);
+ write_cb(cbopaque, "Max arenas: ");
+ write_cb(cbopaque, umax2s(uv, 10, s));
+ write_cb(cbopaque, "\n");
+
+ write_cb(cbopaque, "Pointer size: ");
+ write_cb(cbopaque, umax2s(sizeof(void *), 10, s));
+ write_cb(cbopaque, "\n");
+
+ CTL_GET("arenas.quantum", &sv, size_t);
+ write_cb(cbopaque, "Quantum size: ");
+ write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, "\n");
+
+ CTL_GET("arenas.cacheline", &sv, size_t);
+ write_cb(cbopaque, "Cacheline size (assumed): ");
+ write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, "\n");
+
+ CTL_GET("arenas.subpage", &sv, size_t);
+ write_cb(cbopaque, "Subpage spacing: ");
+ write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, "\n");
+
+ if ((err = JEMALLOC_P(mallctl)("arenas.tspace_min", &sv, &ssz,
+ NULL, 0)) == 0) {
+ write_cb(cbopaque, "Tiny 2^n-spaced sizes: [");
+ write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, "..");
+
+ CTL_GET("arenas.tspace_max", &sv, size_t);
+ write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, "]\n");
+ }
+
+ CTL_GET("arenas.qspace_min", &sv, size_t);
+ write_cb(cbopaque, "Quantum-spaced sizes: [");
+ write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, "..");
+ CTL_GET("arenas.qspace_max", &sv, size_t);
+ write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, "]\n");
+
+ CTL_GET("arenas.cspace_min", &sv, size_t);
+ write_cb(cbopaque, "Cacheline-spaced sizes: [");
+ write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, "..");
+ CTL_GET("arenas.cspace_max", &sv, size_t);
+ write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, "]\n");
+
+ CTL_GET("arenas.sspace_min", &sv, size_t);
+ write_cb(cbopaque, "Subpage-spaced sizes: [");
+ write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, "..");
+ CTL_GET("arenas.sspace_max", &sv, size_t);
+ write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, "]\n");
+
+ CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t);
+ if (ssv >= 0) {
+ write_cb(cbopaque,
+ "Min active:dirty page ratio per arena: ");
+ write_cb(cbopaque, umax2s((1U << ssv), 10, s));
+ write_cb(cbopaque, ":1\n");
+ } else {
+ write_cb(cbopaque,
+ "Min active:dirty page ratio per arena: N/A\n");
+ }
+ if ((err = JEMALLOC_P(mallctl)("arenas.tcache_max", &sv,
+ &ssz, NULL, 0)) == 0) {
+ write_cb(cbopaque,
+ "Maximum thread-cached size class: ");
+ write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, "\n");
+ }
+ if ((err = JEMALLOC_P(mallctl)("opt.lg_tcache_gc_sweep", &ssv,
+ &ssz, NULL, 0)) == 0) {
+ size_t tcache_gc_sweep = (1U << ssv);
+ bool tcache_enabled;
+ CTL_GET("opt.tcache", &tcache_enabled, bool);
+ write_cb(cbopaque, "Thread cache GC sweep interval: ");
+ write_cb(cbopaque, tcache_enabled && ssv >= 0 ?
+ umax2s(tcache_gc_sweep, 10, s) : "N/A");
+ write_cb(cbopaque, "\n");
+ }
+ if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0))
+ == 0 && bv) {
+ CTL_GET("opt.lg_prof_bt_max", &sv, size_t);
+ write_cb(cbopaque, "Maximum profile backtrace depth: ");
+ write_cb(cbopaque, umax2s((1U << sv), 10, s));
+ write_cb(cbopaque, "\n");
+
+ CTL_GET("opt.lg_prof_sample", &sv, size_t);
+ write_cb(cbopaque, "Average profile sample interval: ");
+ write_cb(cbopaque, umax2s((1U << sv), 10, s));
+ write_cb(cbopaque, " (2^");
+ write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, ")\n");
+
+ CTL_GET("opt.lg_prof_interval", &ssv, ssize_t);
+ write_cb(cbopaque, "Average profile dump interval: ");
+ if (ssv >= 0) {
+ write_cb(cbopaque, umax2s((1U << ssv), 10, s));
+ write_cb(cbopaque, " (2^");
+ write_cb(cbopaque, umax2s(ssv, 10, s));
+ write_cb(cbopaque, ")\n");
+ } else
+ write_cb(cbopaque, "N/A\n");
+ }
+ CTL_GET("arenas.chunksize", &sv, size_t);
+ write_cb(cbopaque, "Chunk size: ");
+ write_cb(cbopaque, umax2s(sv, 10, s));
+ CTL_GET("opt.lg_chunk", &sv, size_t);
+ write_cb(cbopaque, " (2^");
+ write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, ")\n");
+ }
+
+#ifdef JEMALLOC_STATS
+ {
+ int err;
+ size_t ssz;
+ size_t allocated, active, mapped;
+ size_t chunks_current, chunks_high, swap_avail;
+ uint64_t chunks_total;
+ size_t huge_allocated;
+ uint64_t huge_nmalloc, huge_ndalloc;
+
+ ssz = sizeof(size_t);
+
+ CTL_GET("stats.allocated", &allocated, size_t);
+ CTL_GET("stats.active", &active, size_t);
+ CTL_GET("stats.mapped", &mapped, size_t);
+ malloc_cprintf(write_cb, cbopaque,
+ "Allocated: %zu, active: %zu, mapped: %zu\n", allocated,
+ active, mapped);
+
+ /* Print chunk stats. */
+ CTL_GET("stats.chunks.total", &chunks_total, uint64_t);
+ CTL_GET("stats.chunks.high", &chunks_high, size_t);
+ CTL_GET("stats.chunks.current", &chunks_current, size_t);
+ if ((err = JEMALLOC_P(mallctl)("swap.avail", &swap_avail, &ssz,
+ NULL, 0)) == 0) {
+ size_t lg_chunk;
+
+ malloc_cprintf(write_cb, cbopaque, "chunks: nchunks "
+ "highchunks curchunks swap_avail\n");
+ CTL_GET("opt.lg_chunk", &lg_chunk, size_t);
+ malloc_cprintf(write_cb, cbopaque,
+ " %13"PRIu64"%13zu%13zu%13zu\n",
+ chunks_total, chunks_high, chunks_current,
+ swap_avail << lg_chunk);
+ } else {
+ malloc_cprintf(write_cb, cbopaque, "chunks: nchunks "
+ "highchunks curchunks\n");
+ malloc_cprintf(write_cb, cbopaque,
+ " %13"PRIu64"%13zu%13zu\n",
+ chunks_total, chunks_high, chunks_current);
+ }
+
+ /* Print huge stats. */
+ CTL_GET("stats.huge.nmalloc", &huge_nmalloc, uint64_t);
+ CTL_GET("stats.huge.ndalloc", &huge_ndalloc, uint64_t);
+ CTL_GET("stats.huge.allocated", &huge_allocated, size_t);
+ malloc_cprintf(write_cb, cbopaque,
+ "huge: nmalloc ndalloc allocated\n");
+ malloc_cprintf(write_cb, cbopaque,
+ " %12"PRIu64" %12"PRIu64" %12zu\n",
+ huge_nmalloc, huge_ndalloc, huge_allocated);
+
+ if (merged) {
+ unsigned narenas;
+
+ CTL_GET("arenas.narenas", &narenas, unsigned);
+ {
+ bool initialized[narenas];
+ size_t isz;
+ unsigned i, ninitialized;
+
+ isz = sizeof(initialized);
+ xmallctl("arenas.initialized", initialized,
+ &isz, NULL, 0);
+ for (i = ninitialized = 0; i < narenas; i++) {
+ if (initialized[i])
+ ninitialized++;
+ }
+
+ if (ninitialized > 1) {
+ /* Print merged arena stats. */
+ malloc_cprintf(write_cb, cbopaque,
+ "\nMerged arenas stats:\n");
+ stats_arena_print(write_cb, cbopaque,
+ narenas);
+ }
+ }
+ }
+
+ if (unmerged) {
+ unsigned narenas;
+
+ /* Print stats for each arena. */
+
+ CTL_GET("arenas.narenas", &narenas, unsigned);
+ {
+ bool initialized[narenas];
+ size_t isz;
+ unsigned i;
+
+ isz = sizeof(initialized);
+ xmallctl("arenas.initialized", initialized,
+ &isz, NULL, 0);
+
+ for (i = 0; i < narenas; i++) {
+ if (initialized[i]) {
+ malloc_cprintf(write_cb,
+ cbopaque,
+ "\narenas[%u]:\n", i);
+ stats_arena_print(write_cb,
+ cbopaque, i);
+ }
+ }
+ }
+ }
+ }
+#endif /* #ifdef JEMALLOC_STATS */
+ write_cb(cbopaque, "--- End jemalloc statistics ---\n");
+}
diff --git a/dep/src/jmalloc/tcache.c b/dep/src/jmalloc/tcache.c
new file mode 100644
index 00000000000..ce6ec996159
--- /dev/null
+++ b/dep/src/jmalloc/tcache.c
@@ -0,0 +1,403 @@
+#define JEMALLOC_TCACHE_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+#ifdef JEMALLOC_TCACHE
+/******************************************************************************/
+/* Data. */
+
+bool opt_tcache = true;
+ssize_t opt_lg_tcache_maxclass = LG_TCACHE_MAXCLASS_DEFAULT;
+ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
+
+/* Map of thread-specific caches. */
+__thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
+
+/*
+ * Same contents as tcache, but initialized such that the TSD destructor is
+ * called when a thread exits, so that the cache can be cleaned up.
+ */
+static pthread_key_t tcache_tsd;
+
+size_t nhbins;
+size_t tcache_maxclass;
+unsigned tcache_gc_incr;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void tcache_thread_cleanup(void *arg);
+
+/******************************************************************************/
+
+void *
+tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
+{
+ void *ret;
+
+ arena_tcache_fill_small(tcache->arena, tbin, binind
+#ifdef JEMALLOC_PROF
+ , tcache->prof_accumbytes
+#endif
+ );
+#ifdef JEMALLOC_PROF
+ tcache->prof_accumbytes = 0;
+#endif
+ ret = tcache_alloc_easy(tbin);
+
+ return (ret);
+}
+
+void
+tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+ , tcache_t *tcache
+#endif
+ )
+{
+ void *flush, *deferred, *ptr;
+ unsigned i, nflush, ndeferred;
+
+ assert(binind < nbins);
+ assert(rem <= tbin->ncached);
+
+ for (flush = tbin->avail, nflush = tbin->ncached - rem; flush != NULL;
+ flush = deferred, nflush = ndeferred) {
+ /* Lock the arena bin associated with the first object. */
+ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush);
+ arena_t *arena = chunk->arena;
+ arena_bin_t *bin = &arena->bins[binind];
+
+#ifdef JEMALLOC_PROF
+ if (arena == tcache->arena) {
+ malloc_mutex_lock(&arena->lock);
+ arena_prof_accum(arena, tcache->prof_accumbytes);
+ malloc_mutex_unlock(&arena->lock);
+ tcache->prof_accumbytes = 0;
+ }
+#endif
+
+ malloc_mutex_lock(&bin->lock);
+#ifdef JEMALLOC_STATS
+ if (arena == tcache->arena) {
+ bin->stats.nflushes++;
+ bin->stats.nrequests += tbin->tstats.nrequests;
+ tbin->tstats.nrequests = 0;
+ }
+#endif
+ deferred = NULL;
+ ndeferred = 0;
+ for (i = 0; i < nflush; i++) {
+ ptr = flush;
+ assert(ptr != NULL);
+ flush = *(void **)ptr;
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (chunk->arena == arena) {
+ size_t pageind = (((uintptr_t)ptr -
+ (uintptr_t)chunk) >> PAGE_SHIFT);
+ arena_chunk_map_t *mapelm =
+ &chunk->map[pageind];
+ arena_dalloc_bin(arena, chunk, ptr, mapelm);
+ } else {
+ /*
+ * This object was allocated via a different
+ * arena bin than the one that is currently
+ * locked. Stash the object, so that it can be
+ * handled in a future pass.
+ */
+ *(void **)ptr = deferred;
+ deferred = ptr;
+ ndeferred++;
+ }
+ }
+ malloc_mutex_unlock(&bin->lock);
+
+ if (flush != NULL) {
+ /*
+ * This was the first pass, and rem cached objects
+ * remain.
+ */
+ tbin->avail = flush;
+ }
+ }
+
+ tbin->ncached = rem;
+ if (tbin->ncached < tbin->low_water)
+ tbin->low_water = tbin->ncached;
+}
+
+void
+tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+ , tcache_t *tcache
+#endif
+ )
+{
+ void *flush, *deferred, *ptr;
+ unsigned i, nflush, ndeferred;
+
+ assert(binind < nhbins);
+ assert(rem <= tbin->ncached);
+
+ for (flush = tbin->avail, nflush = tbin->ncached - rem; flush != NULL;
+ flush = deferred, nflush = ndeferred) {
+ /* Lock the arena associated with the first object. */
+ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush);
+ arena_t *arena = chunk->arena;
+
+ malloc_mutex_lock(&arena->lock);
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ if (arena == tcache->arena) {
+#endif
+#ifdef JEMALLOC_PROF
+ arena_prof_accum(arena, tcache->prof_accumbytes);
+ tcache->prof_accumbytes = 0;
+#endif
+#ifdef JEMALLOC_STATS
+ arena->stats.nrequests_large += tbin->tstats.nrequests;
+ arena->stats.lstats[binind - nbins].nrequests +=
+ tbin->tstats.nrequests;
+ tbin->tstats.nrequests = 0;
+#endif
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ }
+#endif
+ deferred = NULL;
+ ndeferred = 0;
+ for (i = 0; i < nflush; i++) {
+ ptr = flush;
+ assert(ptr != NULL);
+ flush = *(void **)ptr;
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (chunk->arena == arena)
+ arena_dalloc_large(arena, chunk, ptr);
+ else {
+ /*
+ * This object was allocated via a different
+ * arena than the one that is currently locked.
+ * Stash the object, so that it can be handled
+ * in a future pass.
+ */
+ *(void **)ptr = deferred;
+ deferred = ptr;
+ ndeferred++;
+ }
+ }
+ malloc_mutex_unlock(&arena->lock);
+
+ if (flush != NULL) {
+ /*
+ * This was the first pass, and rem cached objects
+ * remain.
+ */
+ tbin->avail = flush;
+ }
+ }
+
+ tbin->ncached = rem;
+ if (tbin->ncached < tbin->low_water)
+ tbin->low_water = tbin->ncached;
+}
+
+tcache_t *
+tcache_create(arena_t *arena)
+{
+ tcache_t *tcache;
+ size_t size;
+ unsigned i;
+
+ size = sizeof(tcache_t) + (sizeof(tcache_bin_t) * (nhbins - 1));
+ /*
+ * Round up to the nearest multiple of the cacheline size, in order to
+ * avoid the possibility of false cacheline sharing.
+ *
+ * That this works relies on the same logic as in ipalloc().
+ */
+ size = (size + CACHELINE_MASK) & (-CACHELINE);
+
+ if (size <= small_maxclass)
+ tcache = (tcache_t *)arena_malloc_small(arena, size, true);
+ else
+ tcache = (tcache_t *)icalloc(size);
+
+ if (tcache == NULL)
+ return (NULL);
+
+#ifdef JEMALLOC_STATS
+ /* Link into list of extant tcaches. */
+ malloc_mutex_lock(&arena->lock);
+ ql_elm_new(tcache, link);
+ ql_tail_insert(&arena->tcache_ql, tcache, link);
+ malloc_mutex_unlock(&arena->lock);
+#endif
+
+ tcache->arena = arena;
+ assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
+ for (i = 0; i < nbins; i++) {
+ if ((arena->bins[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) {
+ tcache->tbins[i].ncached_max = (arena->bins[i].nregs <<
+ 1);
+ } else
+ tcache->tbins[i].ncached_max = TCACHE_NSLOTS_SMALL_MAX;
+ }
+ for (; i < nhbins; i++)
+ tcache->tbins[i].ncached_max = TCACHE_NSLOTS_LARGE;
+
+ tcache_tls = tcache;
+ pthread_setspecific(tcache_tsd, tcache);
+
+ return (tcache);
+}
+
+void
+tcache_destroy(tcache_t *tcache)
+{
+ unsigned i;
+
+#ifdef JEMALLOC_STATS
+ /* Unlink from list of extant tcaches. */
+ malloc_mutex_lock(&tcache->arena->lock);
+ ql_remove(&tcache->arena->tcache_ql, tcache, link);
+ malloc_mutex_unlock(&tcache->arena->lock);
+ tcache_stats_merge(tcache, tcache->arena);
+#endif
+
+ for (i = 0; i < nbins; i++) {
+ tcache_bin_t *tbin = &tcache->tbins[i];
+ tcache_bin_flush_small(tbin, i, 0
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+ , tcache
+#endif
+ );
+
+#ifdef JEMALLOC_STATS
+ if (tbin->tstats.nrequests != 0) {
+ arena_t *arena = tcache->arena;
+ arena_bin_t *bin = &arena->bins[i];
+ malloc_mutex_lock(&bin->lock);
+ bin->stats.nrequests += tbin->tstats.nrequests;
+ malloc_mutex_unlock(&bin->lock);
+ }
+#endif
+ }
+
+ for (; i < nhbins; i++) {
+ tcache_bin_t *tbin = &tcache->tbins[i];
+ tcache_bin_flush_large(tbin, i, 0
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+ , tcache
+#endif
+ );
+
+#ifdef JEMALLOC_STATS
+ if (tbin->tstats.nrequests != 0) {
+ arena_t *arena = tcache->arena;
+ malloc_mutex_lock(&arena->lock);
+ arena->stats.nrequests_large += tbin->tstats.nrequests;
+ arena->stats.lstats[i - nbins].nrequests +=
+ tbin->tstats.nrequests;
+ malloc_mutex_unlock(&arena->lock);
+ }
+#endif
+ }
+
+#ifdef JEMALLOC_PROF
+ if (tcache->prof_accumbytes > 0) {
+ malloc_mutex_lock(&tcache->arena->lock);
+ arena_prof_accum(tcache->arena, tcache->prof_accumbytes);
+ malloc_mutex_unlock(&tcache->arena->lock);
+ }
+#endif
+
+ if (arena_salloc(tcache) <= small_maxclass) {
+ arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
+ arena_t *arena = chunk->arena;
+ size_t pageind = (((uintptr_t)tcache - (uintptr_t)chunk) >>
+ PAGE_SHIFT);
+ arena_chunk_map_t *mapelm = &chunk->map[pageind];
+ arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
+ (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) <<
+ PAGE_SHIFT));
+ arena_bin_t *bin = run->bin;
+
+ malloc_mutex_lock(&bin->lock);
+ arena_dalloc_bin(arena, chunk, tcache, mapelm);
+ malloc_mutex_unlock(&bin->lock);
+ } else
+ idalloc(tcache);
+}
+
+static void
+tcache_thread_cleanup(void *arg)
+{
+ tcache_t *tcache = (tcache_t *)arg;
+
+ assert(tcache == tcache_tls);
+ if (tcache != NULL) {
+ assert(tcache != (void *)(uintptr_t)1);
+ tcache_destroy(tcache);
+ tcache_tls = (void *)(uintptr_t)1;
+ }
+}
+
+#ifdef JEMALLOC_STATS
+void
+tcache_stats_merge(tcache_t *tcache, arena_t *arena)
+{
+ unsigned i;
+
+ /* Merge and reset tcache stats. */
+ for (i = 0; i < nbins; i++) {
+ arena_bin_t *bin = &arena->bins[i];
+ tcache_bin_t *tbin = &tcache->tbins[i];
+ malloc_mutex_lock(&bin->lock);
+ bin->stats.nrequests += tbin->tstats.nrequests;
+ malloc_mutex_unlock(&bin->lock);
+ tbin->tstats.nrequests = 0;
+ }
+
+ for (; i < nhbins; i++) {
+ malloc_large_stats_t *lstats = &arena->stats.lstats[i - nbins];
+ tcache_bin_t *tbin = &tcache->tbins[i];
+ arena->stats.nrequests_large += tbin->tstats.nrequests;
+ lstats->nrequests += tbin->tstats.nrequests;
+ tbin->tstats.nrequests = 0;
+ }
+}
+#endif
+
+void
+tcache_boot(void)
+{
+
+ if (opt_tcache) {
+ /*
+ * If necessary, clamp opt_lg_tcache_maxclass, now that
+ * small_maxclass and arena_maxclass are known.
+ */
+ if (opt_lg_tcache_maxclass < 0 || (1U <<
+ opt_lg_tcache_maxclass) < small_maxclass)
+ tcache_maxclass = small_maxclass;
+ else if ((1U << opt_lg_tcache_maxclass) > arena_maxclass)
+ tcache_maxclass = arena_maxclass;
+ else
+ tcache_maxclass = (1U << opt_lg_tcache_maxclass);
+
+ nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT);
+
+ /* Compute incremental GC event threshold. */
+ if (opt_lg_tcache_gc_sweep >= 0) {
+ tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
+ nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins ==
+ 0) ? 0 : 1);
+ } else
+ tcache_gc_incr = 0;
+
+ if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) !=
+ 0) {
+ malloc_write(
+ "<jemalloc>: Error in pthread_key_create()\n");
+ abort();
+ }
+ }
+}
+/******************************************************************************/
+#endif /* JEMALLOC_TCACHE */
diff --git a/src/trinitycore/CMakeLists.txt b/src/trinitycore/CMakeLists.txt
index fcf63ae2d71..3cd1ac1abec 100644
--- a/src/trinitycore/CMakeLists.txt
+++ b/src/trinitycore/CMakeLists.txt
@@ -63,6 +63,7 @@ trinityauth
trinityconfig
vmaps
g3dlite
+jmalloc
${SCRIPT_LIB}
${READLINE_LIBRARY}
${TERMCAP_LIBRARY}