aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMultivitamin <DasUmba@.(none)>2012-04-13 10:49:09 +0200
committerMultivitamin <DasUmba@.(none)>2012-04-13 10:49:09 +0200
commit126fd13e5d6b57dc0c8830248d44db504c7d103f (patch)
treee33b3e612f5e1e694a7028c4fe5171f0105c6654
parent6400c13fcb342fff7e2d808e469f2c57d83766c5 (diff)
DEP: Updated Jemalloc to Version 2.5
Signed-off-by: Multivitamin <DasUmba@.(none)>
-rw-r--r--dep/jemalloc/VERSION2
-rw-r--r--dep/jemalloc/include/jemalloc/internal/arena.h252
-rw-r--r--dep/jemalloc/include/jemalloc/internal/atomic.h169
-rw-r--r--dep/jemalloc/include/jemalloc/internal/bitmap.h184
-rw-r--r--dep/jemalloc/include/jemalloc/internal/chunk.h2
-rw-r--r--dep/jemalloc/include/jemalloc/internal/ckh.h2
-rw-r--r--dep/jemalloc/include/jemalloc/internal/ctl.h1
-rw-r--r--dep/jemalloc/include/jemalloc/internal/hash.h6
-rw-r--r--dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h185
-rw-r--r--dep/jemalloc/include/jemalloc/internal/mb.h2
-rw-r--r--dep/jemalloc/include/jemalloc/internal/mutex.h26
-rw-r--r--dep/jemalloc/include/jemalloc/internal/private_namespace.h195
-rw-r--r--dep/jemalloc/include/jemalloc/internal/prof.h144
-rw-r--r--dep/jemalloc/include/jemalloc/internal/rtree.h2
-rw-r--r--dep/jemalloc/include/jemalloc/internal/stats.h37
-rw-r--r--dep/jemalloc/include/jemalloc/internal/tcache.h89
-rw-r--r--dep/jemalloc/include/jemalloc/jemalloc.h10
-rw-r--r--dep/jemalloc/include/jemalloc/jemalloc_defs.h35
-rw-r--r--dep/jemalloc/src/arena.c671
-rw-r--r--dep/jemalloc/src/atomic.c2
-rw-r--r--dep/jemalloc/src/bitmap.c90
-rw-r--r--dep/jemalloc/src/chunk.c16
-rw-r--r--dep/jemalloc/src/chunk_mmap.c6
-rw-r--r--dep/jemalloc/src/ckh.c46
-rw-r--r--dep/jemalloc/src/ctl.c37
-rw-r--r--dep/jemalloc/src/hash.c2
-rw-r--r--dep/jemalloc/src/huge.c30
-rw-r--r--dep/jemalloc/src/jemalloc.c264
-rw-r--r--dep/jemalloc/src/mb.c2
-rw-r--r--dep/jemalloc/src/mutex.c6
-rw-r--r--dep/jemalloc/src/prof.c175
-rw-r--r--dep/jemalloc/src/rtree.c7
-rw-r--r--dep/jemalloc/src/stats.c21
-rw-r--r--dep/jemalloc/src/tcache.c153
34 files changed, 2067 insertions, 804 deletions
diff --git a/dep/jemalloc/VERSION b/dep/jemalloc/VERSION
index 585f53edd80..aa85f5a2acf 100644
--- a/dep/jemalloc/VERSION
+++ b/dep/jemalloc/VERSION
@@ -1 +1 @@
-2.1.0-0-g1c4b088b08d3bc7617a34387e196ce03716160bf
+2.2.5-0-gfc1bb70e5f0d9a58b39efa39cc549b5af5104760
diff --git a/dep/jemalloc/include/jemalloc/internal/arena.h b/dep/jemalloc/include/jemalloc/internal/arena.h
index 9556c2c68f7..b80c118d811 100644
--- a/dep/jemalloc/include/jemalloc/internal/arena.h
+++ b/dep/jemalloc/include/jemalloc/internal/arena.h
@@ -19,6 +19,7 @@
#ifdef JEMALLOC_TINY
/* Smallest size class to support. */
# define LG_TINY_MIN LG_SIZEOF_PTR
+# define TINY_MIN (1U << LG_TINY_MIN)
#endif
/*
@@ -45,9 +46,10 @@
* point is implicitly RUN_BFP bits to the left.
*
* Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
- * honored for some/all object sizes, since there is one bit of header overhead
- * per object (plus a constant). This constraint is relaxed (ignored) for runs
- * that are so small that the per-region overhead is greater than:
+ * honored for some/all object sizes, since when heap profiling is enabled
+ * there is one pointer of header overhead per object (plus a constant). This
+ * constraint is relaxed (ignored) for runs that are so small that the
+ * per-region overhead is greater than:
*
* (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
*/
@@ -56,6 +58,10 @@
#define RUN_MAX_OVRHD 0x0000003dU
#define RUN_MAX_OVRHD_RELAX 0x00001800U
+/* Maximum number of regions in one run. */
+#define LG_RUN_MAXREGS 11
+#define RUN_MAXREGS (1U << LG_RUN_MAXREGS)
+
/*
* The minimum ratio of active:dirty pages per arena is computed as:
*
@@ -69,6 +75,7 @@
typedef struct arena_chunk_map_s arena_chunk_map_t;
typedef struct arena_chunk_s arena_chunk_t;
typedef struct arena_run_s arena_run_t;
+typedef struct arena_bin_info_s arena_bin_info_t;
typedef struct arena_bin_s arena_bin_t;
typedef struct arena_s arena_t;
@@ -105,7 +112,7 @@ struct arena_chunk_map_s {
* Run address (or size) and various flags are stored together. The bit
* layout looks like (assuming 32-bit system):
*
- * ???????? ???????? ????---- ----dzla
+ * ???????? ???????? ????---- ----dula
*
* ? : Unallocated: Run address for first/last pages, unset for internal
* pages.
@@ -113,7 +120,7 @@ struct arena_chunk_map_s {
* Large: Run size for first page, unset for trailing pages.
* - : Unused.
* d : dirty?
- * z : zeroed?
+ * u : unzeroed?
* l : large?
* a : allocated?
*
@@ -129,30 +136,30 @@ struct arena_chunk_map_s {
* [dula] : bit unset
*
* Unallocated (clean):
- * ssssssss ssssssss ssss---- ----du--
+ * ssssssss ssssssss ssss---- ----du-a
* xxxxxxxx xxxxxxxx xxxx---- -----Uxx
- * ssssssss ssssssss ssss---- ----dU--
+ * ssssssss ssssssss ssss---- ----dU-a
*
* Unallocated (dirty):
- * ssssssss ssssssss ssss---- ----D---
+ * ssssssss ssssssss ssss---- ----D--a
* xxxxxxxx xxxxxxxx xxxx---- ----xxxx
- * ssssssss ssssssss ssss---- ----D---
+ * ssssssss ssssssss ssss---- ----D--a
*
* Small:
- * pppppppp pppppppp pppp---- ----d--a
- * pppppppp pppppppp pppp---- -------a
- * pppppppp pppppppp pppp---- ----d--a
+ * pppppppp pppppppp pppp---- ----d--A
+ * pppppppp pppppppp pppp---- -------A
+ * pppppppp pppppppp pppp---- ----d--A
*
* Large:
- * ssssssss ssssssss ssss---- ----D-la
+ * ssssssss ssssssss ssss---- ----D-LA
* xxxxxxxx xxxxxxxx xxxx---- ----xxxx
- * -------- -------- -------- ----D-la
+ * -------- -------- -------- ----D-LA
*
* Large (sampled, size <= PAGE_SIZE):
- * ssssssss ssssssss sssscccc ccccD-la
+ * ssssssss ssssssss sssscccc ccccD-LA
*
* Large (not sampled, size == PAGE_SIZE):
- * ssssssss ssssssss ssss---- ----D-la
+ * ssssssss ssssssss ssss---- ----D-LA
*/
size_t bits;
#ifdef JEMALLOC_PROF
@@ -206,16 +213,52 @@ struct arena_run_s {
/* Bin this run is associated with. */
arena_bin_t *bin;
- /* Stack of available freed regions, or NULL. */
- void *avail;
-
- /* Next region that has never been allocated, or run boundary. */
- void *next;
+ /* Index of next region that has never been allocated, or nregs. */
+ uint32_t nextind;
/* Number of free regions in run. */
unsigned nfree;
};
+/*
+ * Read-only information associated with each element of arena_t's bins array
+ * is stored separately, partly to reduce memory usage (only one copy, rather
+ * than one per arena), but mainly to avoid false cacheline sharing.
+ */
+struct arena_bin_info_s {
+ /* Size of regions in a run for this bin's size class. */
+ size_t reg_size;
+
+ /* Total size of a run for this bin's size class. */
+ size_t run_size;
+
+ /* Total number of regions in a run for this bin's size class. */
+ uint32_t nregs;
+
+ /*
+ * Offset of first bitmap_t element in a run header for this bin's size
+ * class.
+ */
+ uint32_t bitmap_offset;
+
+ /*
+ * Metadata used to manipulate bitmaps for runs associated with this
+ * bin.
+ */
+ bitmap_info_t bitmap_info;
+
+#ifdef JEMALLOC_PROF
+ /*
+ * Offset of first (prof_ctx_t *) in a run header for this bin's size
+ * class, or 0 if (opt_prof == false).
+ */
+ uint32_t ctx0_offset;
+#endif
+
+ /* Offset of first region in a run for this bin's size class. */
+ uint32_t reg0_offset;
+};
+
struct arena_bin_s {
/*
* All operations on runcur, runs, and stats require that lock be
@@ -240,26 +283,6 @@ struct arena_bin_s {
*/
arena_run_tree_t runs;
- /* Size of regions in a run for this bin's size class. */
- size_t reg_size;
-
- /* Total size of a run for this bin's size class. */
- size_t run_size;
-
- /* Total number of regions in a run for this bin's size class. */
- uint32_t nregs;
-
-#ifdef JEMALLOC_PROF
- /*
- * Offset of first (prof_ctx_t *) in a run header for this bin's size
- * class, or 0 if (opt_prof == false).
- */
- uint32_t ctx0_offset;
-#endif
-
- /* Offset of first region in a run for this bin's size class. */
- uint32_t reg0_offset;
-
#ifdef JEMALLOC_STATS
/* Bin statistics. */
malloc_bin_stats_t stats;
@@ -276,8 +299,18 @@ struct arena_s {
unsigned ind;
/*
- * All non-bin-related operations on this arena require that lock be
- * locked.
+ * Number of threads currently assigned to this arena. This field is
+ * protected by arenas_lock.
+ */
+ unsigned nthreads;
+
+ /*
+ * There are three classes of arena operations from a locking
+ * perspective:
+ * 1) Thread asssignment (modifies nthreads) is protected by
+ * arenas_lock.
+ * 2) Bin-related operations are protected by bin locks.
+ * 3) Chunk- and run-related operations are protected by this mutex.
*/
malloc_mutex_t lock;
@@ -347,45 +380,35 @@ struct arena_s {
/*
* bins is used to store trees of free regions of the following sizes,
- * assuming a 16-byte quantum, 4 KiB page size, and default
- * JEMALLOC_OPTIONS.
+ * assuming a 64-bit system with 16-byte quantum, 4 KiB page size, and
+ * default MALLOC_CONF.
*
* bins[i] | size |
* --------+--------+
- * 0 | 2 |
- * 1 | 4 |
- * 2 | 8 |
+ * 0 | 8 |
* --------+--------+
- * 3 | 16 |
- * 4 | 32 |
- * 5 | 48 |
+ * 1 | 16 |
+ * 2 | 32 |
+ * 3 | 48 |
* : :
- * 8 | 96 |
- * 9 | 112 |
- * 10 | 128 |
+ * 6 | 96 |
+ * 7 | 112 |
+ * 8 | 128 |
* --------+--------+
- * 11 | 192 |
- * 12 | 256 |
- * 13 | 320 |
- * 14 | 384 |
- * 15 | 448 |
- * 16 | 512 |
- * --------+--------+
- * 17 | 768 |
- * 18 | 1024 |
- * 19 | 1280 |
- * : :
- * 27 | 3328 |
- * 28 | 3584 |
- * 29 | 3840 |
+ * 9 | 192 |
+ * 10 | 256 |
+ * 11 | 320 |
+ * 12 | 384 |
+ * 13 | 448 |
+ * 14 | 512 |
* --------+--------+
- * 30 | 4 KiB |
- * 31 | 6 KiB |
- * 33 | 8 KiB |
+ * 15 | 768 |
+ * 16 | 1024 |
+ * 17 | 1280 |
* : :
- * 43 | 28 KiB |
- * 44 | 30 KiB |
- * 45 | 32 KiB |
+ * 25 | 3328 |
+ * 26 | 3584 |
+ * 27 | 3840 |
* --------+--------+
*/
arena_bin_t bins[1]; /* Dynamically sized. */
@@ -397,8 +420,16 @@ struct arena_s {
extern size_t opt_lg_qspace_max;
extern size_t opt_lg_cspace_max;
-extern ssize_t opt_lg_dirty_mult;
+extern ssize_t opt_lg_dirty_mult;
+/*
+ * small_size2bin is a compact lookup table that rounds request sizes up to
+ * size classes. In order to reduce cache footprint, the table is compressed,
+ * and all accesses are via the SMALL_SIZE2BIN macro.
+ */
extern uint8_t const *small_size2bin;
+#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN])
+
+extern arena_bin_info_t *arena_bin_info;
/* Various bin-related settings. */
#ifdef JEMALLOC_TINY /* Number of (2^n)-spaced tiny bins. */
@@ -465,8 +496,9 @@ bool arena_boot(void);
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
-unsigned arena_run_regind(arena_run_t *run, arena_bin_t *bin,
- const void *ptr, size_t size);
+size_t arena_bin_index(arena_t *arena, arena_bin_t *bin);
+unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
+ const void *ptr);
# ifdef JEMALLOC_PROF
prof_ctx_t *arena_prof_ctx_get(const void *ptr);
void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
@@ -475,21 +507,37 @@ void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
+JEMALLOC_INLINE size_t
+arena_bin_index(arena_t *arena, arena_bin_t *bin)
+{
+ size_t binind = bin - arena->bins;
+ assert(binind < nbins);
+ return (binind);
+}
+
JEMALLOC_INLINE unsigned
-arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
- size_t size)
+arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
{
unsigned shift, diff, regind;
+ size_t size;
- assert(run->magic == ARENA_RUN_MAGIC);
+ dassert(run->magic == ARENA_RUN_MAGIC);
+ /*
+ * Freeing a pointer lower than region zero can cause assertion
+ * failure.
+ */
+ assert((uintptr_t)ptr >= (uintptr_t)run +
+ (uintptr_t)bin_info->reg0_offset);
/*
* Avoid doing division with a variable divisor if possible. Using
* actual division here can reduce allocator throughput by over 20%!
*/
- diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
+ diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run -
+ bin_info->reg0_offset);
/* Rescale (factor powers of 2 out of the numerator and denominator). */
+ size = bin_info->reg_size;
shift = ffs(size) - 1;
diff >>= shift;
size >>= shift;
@@ -512,8 +560,8 @@ arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
* divide by 0, and 1 and 2 are both powers of two, which are
* handled above.
*/
-#define SIZE_INV_SHIFT 21
-#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
+#define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS)
+#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
static const unsigned size_invs[] = {
SIZE_INV(3),
SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
@@ -533,7 +581,7 @@ arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
#undef SIZE_INV_SHIFT
}
assert(diff == regind * size);
- assert(regind < bin->nregs);
+ assert(regind < bin_info->nregs);
return (regind);
}
@@ -560,13 +608,14 @@ arena_prof_ctx_get(const void *ptr)
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
PAGE_SHIFT));
- arena_bin_t *bin = run->bin;
+ size_t binind = arena_bin_index(chunk->arena, run->bin);
+ arena_bin_info_t *bin_info = &arena_bin_info[binind];
unsigned regind;
- assert(run->magic == ARENA_RUN_MAGIC);
- regind = arena_run_regind(run, bin, ptr, bin->reg_size);
+ dassert(run->magic == ARENA_RUN_MAGIC);
+ regind = arena_run_regind(run, bin_info, ptr);
ret = *(prof_ctx_t **)((uintptr_t)run +
- bin->ctx0_offset + (regind *
+ bin_info->ctx0_offset + (regind *
sizeof(prof_ctx_t *)));
}
} else
@@ -594,12 +643,16 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
PAGE_SHIFT));
arena_bin_t *bin = run->bin;
+ size_t binind;
+ arena_bin_info_t *bin_info;
unsigned regind;
- assert(run->magic == ARENA_RUN_MAGIC);
- regind = arena_run_regind(run, bin, ptr, bin->reg_size);
+ dassert(run->magic == ARENA_RUN_MAGIC);
+ binind = arena_bin_index(chunk->arena, bin);
+ bin_info = &arena_bin_info[binind];
+ regind = arena_run_regind(run, bin_info, ptr);
- *((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset
+ *((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset
+ (regind * sizeof(prof_ctx_t *)))) = ctx;
} else
assert((uintptr_t)ctx == (uintptr_t)1U);
@@ -615,7 +668,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
arena_chunk_map_t *mapelm;
assert(arena != NULL);
- assert(arena->magic == ARENA_MAGIC);
+ dassert(arena->magic == ARENA_MAGIC);
assert(chunk->arena == arena);
assert(ptr != NULL);
assert(CHUNK_ADDR2BASE(ptr) != ptr);
@@ -638,11 +691,18 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapelm->bits >>
PAGE_SHIFT)) << PAGE_SHIFT));
- assert(run->magic == ARENA_RUN_MAGIC);
- assert(((uintptr_t)ptr - ((uintptr_t)run +
- (uintptr_t)run->bin->reg0_offset)) %
- run->bin->reg_size == 0);
+ dassert(run->magic == ARENA_RUN_MAGIC);
bin = run->bin;
+#ifdef JEMALLOC_DEBUG
+ {
+ size_t binind = arena_bin_index(arena, bin);
+ arena_bin_info_t *bin_info =
+ &arena_bin_info[binind];
+ assert(((uintptr_t)ptr - ((uintptr_t)run +
+ (uintptr_t)bin_info->reg0_offset)) %
+ bin_info->reg_size == 0);
+ }
+#endif
malloc_mutex_lock(&bin->lock);
arena_dalloc_bin(arena, chunk, ptr, mapelm);
malloc_mutex_unlock(&bin->lock);
diff --git a/dep/jemalloc/include/jemalloc/internal/atomic.h b/dep/jemalloc/include/jemalloc/internal/atomic.h
new file mode 100644
index 00000000000..9a298623f8a
--- /dev/null
+++ b/dep/jemalloc/include/jemalloc/internal/atomic.h
@@ -0,0 +1,169 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#define atomic_read_uint64(p) atomic_add_uint64(p, 0)
+#define atomic_read_uint32(p) atomic_add_uint32(p, 0)
+
+#if (LG_SIZEOF_PTR == 3)
+# define atomic_read_z(p) \
+ (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)0)
+# define atomic_add_z(p, x) \
+ (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x)
+# define atomic_sub_z(p, x) \
+ (size_t)atomic_sub_uint64((uint64_t *)p, (uint64_t)x)
+#elif (LG_SIZEOF_PTR == 2)
+# define atomic_read_z(p) \
+ (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)0)
+# define atomic_add_z(p, x) \
+ (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x)
+# define atomic_sub_z(p, x) \
+ (size_t)atomic_sub_uint32((uint32_t *)p, (uint32_t)x)
+#endif
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+uint64_t atomic_add_uint64(uint64_t *p, uint64_t x);
+uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x);
+uint32_t atomic_add_uint32(uint32_t *p, uint32_t x);
+uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
+/******************************************************************************/
+/* 64-bit operations. */
+#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+ return (__sync_add_and_fetch(p, x));
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+ return (__sync_sub_and_fetch(p, x));
+}
+#elif (defined(JEMALLOC_OSATOMIC))
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+ return (OSAtomicAdd64((int64_t)x, (int64_t *)p));
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+ return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
+}
+#elif (defined(__amd64_) || defined(__x86_64__))
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+ asm volatile (
+ "lock; xaddq %0, %1;"
+ : "+r" (x), "=m" (*p) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ );
+
+ return (x);
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+ x = (uint64_t)(-(int64_t)x);
+ asm volatile (
+ "lock; xaddq %0, %1;"
+ : "+r" (x), "=m" (*p) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ );
+
+ return (x);
+}
+#else
+# if (LG_SIZEOF_PTR == 3)
+# error "Missing implementation for 64-bit atomic operations"
+# endif
+#endif
+
+/******************************************************************************/
+/* 32-bit operations. */
+#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+ return (__sync_add_and_fetch(p, x));
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+ return (__sync_sub_and_fetch(p, x));
+}
+#elif (defined(JEMALLOC_OSATOMIC))
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+ return (OSAtomicAdd32((int32_t)x, (int32_t *)p));
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+ return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
+}
+#elif (defined(__i386__) || defined(__amd64_) || defined(__x86_64__))
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+ asm volatile (
+ "lock; xaddl %0, %1;"
+ : "+r" (x), "=m" (*p) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ );
+
+ return (x);
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+ x = (uint32_t)(-(int32_t)x);
+ asm volatile (
+ "lock; xaddl %0, %1;"
+ : "+r" (x), "=m" (*p) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ );
+
+ return (x);
+}
+#else
+# error "Missing implementation for 32-bit atomic operations"
+#endif
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/dep/jemalloc/include/jemalloc/internal/bitmap.h b/dep/jemalloc/include/jemalloc/internal/bitmap.h
new file mode 100644
index 00000000000..605ebac58c1
--- /dev/null
+++ b/dep/jemalloc/include/jemalloc/internal/bitmap.h
@@ -0,0 +1,184 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
+#define LG_BITMAP_MAXBITS LG_RUN_MAXREGS
+
+typedef struct bitmap_level_s bitmap_level_t;
+typedef struct bitmap_info_s bitmap_info_t;
+typedef unsigned long bitmap_t;
+#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG
+
+/* Number of bits per group. */
+#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3)
+#define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS)
+#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1)
+
+/* Maximum number of levels possible. */
+#define BITMAP_MAX_LEVELS \
+ (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \
+ + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct bitmap_level_s {
+ /* Offset of this level's groups within the array of groups. */
+ size_t group_offset;
+};
+
+struct bitmap_info_s {
+ /* Logical number of bits in bitmap (stored at bottom level). */
+ size_t nbits;
+
+ /* Number of levels necessary for nbits. */
+ unsigned nlevels;
+
+ /*
+ * Only the first (nlevels+1) elements are used, and levels are ordered
+ * bottom to top (e.g. the bottom level is stored in levels[0]).
+ */
+ bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
+size_t bitmap_info_ngroups(const bitmap_info_t *binfo);
+size_t bitmap_size(size_t nbits);
+void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo);
+bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
+void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
+size_t bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo);
+void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_))
+JEMALLOC_INLINE bool
+bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo)
+{
+ unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
+ bitmap_t rg = bitmap[rgoff];
+ /* The bitmap is full iff the root group is 0. */
+ return (rg == 0);
+}
+
+JEMALLOC_INLINE bool
+bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
+{
+ size_t goff;
+ bitmap_t g;
+
+ assert(bit < binfo->nbits);
+ goff = bit >> LG_BITMAP_GROUP_NBITS;
+ g = bitmap[goff];
+ return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))));
+}
+
+JEMALLOC_INLINE void
+bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
+{
+ size_t goff;
+ bitmap_t *gp;
+ bitmap_t g;
+
+ assert(bit < binfo->nbits);
+ assert(bitmap_get(bitmap, binfo, bit) == false);
+ goff = bit >> LG_BITMAP_GROUP_NBITS;
+ gp = &bitmap[goff];
+ g = *gp;
+ assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
+ g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+ *gp = g;
+ assert(bitmap_get(bitmap, binfo, bit));
+ /* Propagate group state transitions up the tree. */
+ if (g == 0) {
+ unsigned i;
+ for (i = 1; i < binfo->nlevels; i++) {
+ bit = goff;
+ goff = bit >> LG_BITMAP_GROUP_NBITS;
+ gp = &bitmap[binfo->levels[i].group_offset + goff];
+ g = *gp;
+ assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
+ g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+ *gp = g;
+ if (g != 0)
+ break;
+ }
+ }
+}
+
+/* sfu: set first unset. */
+JEMALLOC_INLINE size_t
+bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
+{
+ size_t bit;
+ bitmap_t g;
+ unsigned i;
+
+ assert(bitmap_full(bitmap, binfo) == false);
+
+ i = binfo->nlevels - 1;
+ g = bitmap[binfo->levels[i].group_offset];
+ bit = ffsl(g) - 1;
+ while (i > 0) {
+ i--;
+ g = bitmap[binfo->levels[i].group_offset + bit];
+ bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1);
+ }
+
+ bitmap_set(bitmap, binfo, bit);
+ return (bit);
+}
+
+JEMALLOC_INLINE void
+bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
+{
+ size_t goff;
+ bitmap_t *gp;
+ bitmap_t g;
+ bool propagate;
+
+ assert(bit < binfo->nbits);
+ assert(bitmap_get(bitmap, binfo, bit));
+ goff = bit >> LG_BITMAP_GROUP_NBITS;
+ gp = &bitmap[goff];
+ g = *gp;
+ propagate = (g == 0);
+ assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0);
+ g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+ *gp = g;
+ assert(bitmap_get(bitmap, binfo, bit) == false);
+ /* Propagate group state transitions up the tree. */
+ if (propagate) {
+ unsigned i;
+ for (i = 1; i < binfo->nlevels; i++) {
+ bit = goff;
+ goff = bit >> LG_BITMAP_GROUP_NBITS;
+ gp = &bitmap[binfo->levels[i].group_offset + goff];
+ g = *gp;
+ propagate = (g == 0);
+ assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))
+ == 0);
+ g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+ *gp = g;
+ if (propagate == false)
+ break;
+ }
+ }
+}
+
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/dep/jemalloc/include/jemalloc/internal/chunk.h b/dep/jemalloc/include/jemalloc/internal/chunk.h
index a60f0ad7498..54b6a3ec886 100644
--- a/dep/jemalloc/include/jemalloc/internal/chunk.h
+++ b/dep/jemalloc/include/jemalloc/internal/chunk.h
@@ -50,7 +50,7 @@ extern size_t map_bias; /* Number of arena chunk header pages. */
extern size_t arena_maxclass; /* Max size class for arenas. */
void *chunk_alloc(size_t size, bool base, bool *zero);
-void chunk_dealloc(void *chunk, size_t size);
+void chunk_dealloc(void *chunk, size_t size, bool unmap);
bool chunk_boot(void);
#endif /* JEMALLOC_H_EXTERNS */
diff --git a/dep/jemalloc/include/jemalloc/internal/ckh.h b/dep/jemalloc/include/jemalloc/internal/ckh.h
index d4e391b6360..3e4ad4c85f9 100644
--- a/dep/jemalloc/include/jemalloc/internal/ckh.h
+++ b/dep/jemalloc/include/jemalloc/internal/ckh.h
@@ -31,7 +31,7 @@ struct ckhc_s {
struct ckh_s {
#ifdef JEMALLOC_DEBUG
-#define CKH_MAGIG 0x3af2489d
+#define CKH_MAGIC 0x3af2489d
uint32_t magic;
#endif
diff --git a/dep/jemalloc/include/jemalloc/internal/ctl.h b/dep/jemalloc/include/jemalloc/internal/ctl.h
index 8776ad135a7..f1f5eb70a2a 100644
--- a/dep/jemalloc/include/jemalloc/internal/ctl.h
+++ b/dep/jemalloc/include/jemalloc/internal/ctl.h
@@ -29,6 +29,7 @@ struct ctl_node_s {
struct ctl_arena_stats_s {
bool initialized;
+ unsigned nthreads;
size_t pactive;
size_t pdirty;
#ifdef JEMALLOC_STATS
diff --git a/dep/jemalloc/include/jemalloc/internal/hash.h b/dep/jemalloc/include/jemalloc/internal/hash.h
index d12cdb8359f..8a46ce30803 100644
--- a/dep/jemalloc/include/jemalloc/internal/hash.h
+++ b/dep/jemalloc/include/jemalloc/internal/hash.h
@@ -17,7 +17,7 @@
uint64_t hash(const void *key, size_t len, uint64_t seed);
#endif
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(HASH_C_))
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_))
/*
* The following hash function is based on MurmurHash64A(), placed into the
* public domain by Austin Appleby. See http://murmurhash.googlepages.com/ for
@@ -26,7 +26,7 @@ uint64_t hash(const void *key, size_t len, uint64_t seed);
JEMALLOC_INLINE uint64_t
hash(const void *key, size_t len, uint64_t seed)
{
- const uint64_t m = 0xc6a4a7935bd1e995;
+ const uint64_t m = 0xc6a4a7935bd1e995LLU;
const int r = 47;
uint64_t h = seed ^ (len * m);
const uint64_t *data = (const uint64_t *)key;
@@ -62,7 +62,7 @@ hash(const void *key, size_t len, uint64_t seed)
h *= m;
h ^= h >> r;
- return h;
+ return (h);
}
#endif
diff --git a/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h b/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h
index 611f0c665a1..cd554bea1b9 100644
--- a/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h
+++ b/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h
@@ -33,6 +33,12 @@
#define JEMALLOC_MANGLE
#include "../jemalloc.h"
+#include "jemalloc/internal/private_namespace.h"
+
+#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
+#include <libkern/OSAtomic.h>
+#endif
+
#ifdef JEMALLOC_ZONE
#include <mach/mach_error.h>
#include <mach/mach_init.h>
@@ -55,8 +61,9 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
* Define a custom assert() in order to reduce the chances of deadlock during
* assertion failure.
*/
-#ifdef JEMALLOC_DEBUG
-# define assert(e) do { \
+#ifndef assert
+# ifdef JEMALLOC_DEBUG
+# define assert(e) do { \
if (!(e)) { \
char line_buf[UMAX2S_BUFSIZE]; \
malloc_write("<jemalloc>: "); \
@@ -70,8 +77,15 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
abort(); \
} \
} while (0)
+# else
+# define assert(e)
+# endif
+#endif
+
+#ifdef JEMALLOC_DEBUG
+# define dassert(e) assert(e)
#else
-#define assert(e)
+# define dassert(e)
#endif
/*
@@ -146,12 +160,19 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
#define QUANTUM_CEILING(a) \
(((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
+#define LONG ((size_t)(1U << LG_SIZEOF_LONG))
+#define LONG_MASK (LONG - 1)
+
+/* Return the smallest long multiple that is >= a. */
+#define LONG_CEILING(a) \
+ (((a) + LONG_MASK) & ~LONG_MASK)
+
#define SIZEOF_PTR (1U << LG_SIZEOF_PTR)
+#define PTR_MASK (SIZEOF_PTR - 1)
-/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
-#if (!defined(PIC) && !defined(NO_TLS))
-# define NO_TLS
-#endif
+/* Return the smallest (void *) multiple that is >= a. */
+#define PTR_CEILING(a) \
+ (((a) + PTR_MASK) & ~PTR_MASK)
/*
* Maximum size of L1 cache line. This is used to avoid cache line aliasing.
@@ -198,6 +219,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
#define PAGE_CEILING(s) \
(((s) + PAGE_MASK) & ~PAGE_MASK)
+#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/jemprn.h"
#include "jemalloc/internal/ckh.h"
#include "jemalloc/internal/stats.h"
@@ -206,6 +228,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
#include "jemalloc/internal/mb.h"
#include "jemalloc/internal/extent.h"
#include "jemalloc/internal/arena.h"
+#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/base.h"
#include "jemalloc/internal/chunk.h"
#include "jemalloc/internal/huge.h"
@@ -221,12 +244,14 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
/******************************************************************************/
#define JEMALLOC_H_STRUCTS
+#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/jemprn.h"
#include "jemalloc/internal/ckh.h"
#include "jemalloc/internal/stats.h"
#include "jemalloc/internal/ctl.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/mb.h"
+#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/extent.h"
#include "jemalloc/internal/arena.h"
#include "jemalloc/internal/base.h"
@@ -240,6 +265,13 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
#endif
#include "jemalloc/internal/prof.h"
+#ifdef JEMALLOC_STATS
+typedef struct {
+ uint64_t allocated;
+ uint64_t deallocated;
+} thread_allocated_t;
+#endif
+
#undef JEMALLOC_H_STRUCTS
/******************************************************************************/
#define JEMALLOC_H_EXTERNS
@@ -269,6 +301,7 @@ extern size_t lg_pagesize;
extern unsigned ncpus;
extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */
+extern pthread_key_t arenas_tsd;
#ifndef NO_TLS
/*
* Map of pthread_self() --> arenas[???], used for selecting an arena to use
@@ -278,9 +311,9 @@ extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
# define ARENA_GET() arenas_tls
# define ARENA_SET(v) do { \
arenas_tls = (v); \
+ pthread_setspecific(arenas_tsd, (void *)(v)); \
} while (0)
#else
-extern pthread_key_t arenas_tsd;
# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd))
# define ARENA_SET(v) do { \
pthread_setspecific(arenas_tsd, (void *)(v)); \
@@ -295,45 +328,28 @@ extern arena_t **arenas;
extern unsigned narenas;
#ifdef JEMALLOC_STATS
-typedef struct {
- uint64_t allocated;
- uint64_t deallocated;
-} thread_allocated_t;
# ifndef NO_TLS
extern __thread thread_allocated_t thread_allocated_tls;
-# define ALLOCATED_GET() thread_allocated_tls.allocated
-# define DEALLOCATED_GET() thread_allocated_tls.deallocated
+# define ALLOCATED_GET() (thread_allocated_tls.allocated)
+# define ALLOCATEDP_GET() (&thread_allocated_tls.allocated)
+# define DEALLOCATED_GET() (thread_allocated_tls.deallocated)
+# define DEALLOCATEDP_GET() (&thread_allocated_tls.deallocated)
# define ALLOCATED_ADD(a, d) do { \
thread_allocated_tls.allocated += a; \
thread_allocated_tls.deallocated += d; \
} while (0)
# else
extern pthread_key_t thread_allocated_tsd;
-# define ALLOCATED_GET() \
- (uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL) \
- ? ((thread_allocated_t *) \
- pthread_getspecific(thread_allocated_tsd))->allocated : 0)
-# define DEALLOCATED_GET() \
- (uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL) \
- ? ((thread_allocated_t \
- *)pthread_getspecific(thread_allocated_tsd))->deallocated : \
- 0)
+thread_allocated_t *thread_allocated_get_hard(void);
+
+# define ALLOCATED_GET() (thread_allocated_get()->allocated)
+# define ALLOCATEDP_GET() (&thread_allocated_get()->allocated)
+# define DEALLOCATED_GET() (thread_allocated_get()->deallocated)
+# define DEALLOCATEDP_GET() (&thread_allocated_get()->deallocated)
# define ALLOCATED_ADD(a, d) do { \
- thread_allocated_t *thread_allocated = (thread_allocated_t *) \
- pthread_getspecific(thread_allocated_tsd); \
- if (thread_allocated != NULL) { \
- thread_allocated->allocated += (a); \
- thread_allocated->deallocated += (d); \
- } else { \
- thread_allocated = (thread_allocated_t *) \
- imalloc(sizeof(thread_allocated_t)); \
- if (thread_allocated != NULL) { \
- pthread_setspecific(thread_allocated_tsd, \
- thread_allocated); \
- thread_allocated->allocated = (a); \
- thread_allocated->deallocated = (d); \
- } \
- } \
+ thread_allocated_t *thread_allocated = thread_allocated_get(); \
+ thread_allocated->allocated += (a); \
+ thread_allocated->deallocated += (d); \
} while (0)
# endif
#endif
@@ -344,12 +360,14 @@ int buferror(int errnum, char *buf, size_t buflen);
void jemalloc_prefork(void);
void jemalloc_postfork(void);
+#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/jemprn.h"
#include "jemalloc/internal/ckh.h"
#include "jemalloc/internal/stats.h"
#include "jemalloc/internal/ctl.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/mb.h"
+#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/extent.h"
#include "jemalloc/internal/arena.h"
#include "jemalloc/internal/base.h"
@@ -367,6 +385,7 @@ void jemalloc_postfork(void);
/******************************************************************************/
#define JEMALLOC_H_INLINES
+#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/jemprn.h"
#include "jemalloc/internal/ckh.h"
#include "jemalloc/internal/stats.h"
@@ -384,6 +403,9 @@ size_t s2u(size_t size);
size_t sa2u(size_t size, size_t alignment, size_t *run_size_p);
void malloc_write(const char *s);
arena_t *choose_arena(void);
+# if (defined(JEMALLOC_STATS) && defined(NO_TLS))
+thread_allocated_t *thread_allocated_get(void);
+# endif
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
@@ -414,10 +436,10 @@ s2u(size_t size)
{
if (size <= small_maxclass)
- return arenas[0]->bins[small_size2bin[size]].reg_size;
+ return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size);
if (size <= arena_maxclass)
- return PAGE_CEILING(size);
- return CHUNK_CEILING(size);
+ return (PAGE_CEILING(size));
+ return (CHUNK_CEILING(size));
}
/*
@@ -458,10 +480,8 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p)
}
if (usize <= arena_maxclass && alignment <= PAGE_SIZE) {
- if (usize <= small_maxclass) {
- return
- (arenas[0]->bins[small_size2bin[usize]].reg_size);
- }
+ if (usize <= small_maxclass)
+ return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size);
return (PAGE_CEILING(usize));
} else {
size_t run_size;
@@ -544,8 +564,22 @@ choose_arena(void)
return (ret);
}
+
+#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
+JEMALLOC_INLINE thread_allocated_t *
+thread_allocated_get(void)
+{
+ thread_allocated_t *thread_allocated = (thread_allocated_t *)
+ pthread_getspecific(thread_allocated_tsd);
+
+ if (thread_allocated == NULL)
+ return (thread_allocated_get_hard());
+ return (thread_allocated);
+}
+#endif
#endif
+#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/arena.h"
@@ -557,7 +591,7 @@ choose_arena(void)
#ifndef JEMALLOC_ENABLE_INLINE
void *imalloc(size_t size);
void *icalloc(size_t size);
-void *ipalloc(size_t size, size_t alignment, bool zero);
+void *ipalloc(size_t usize, size_t alignment, bool zero);
size_t isalloc(const void *ptr);
# ifdef JEMALLOC_IVSALLOC
size_t ivsalloc(const void *ptr);
@@ -591,28 +625,39 @@ icalloc(size_t size)
}
JEMALLOC_INLINE void *
-ipalloc(size_t size, size_t alignment, bool zero)
+ipalloc(size_t usize, size_t alignment, bool zero)
{
void *ret;
- size_t usize;
- size_t run_size
-# ifdef JEMALLOC_CC_SILENCE
- = 0
-# endif
- ;
- usize = sa2u(size, alignment, &run_size);
- if (usize == 0)
- return (NULL);
+ assert(usize != 0);
+ assert(usize == sa2u(usize, alignment, NULL));
+
if (usize <= arena_maxclass && alignment <= PAGE_SIZE)
ret = arena_malloc(usize, zero);
- else if (run_size <= arena_maxclass) {
- ret = arena_palloc(choose_arena(), usize, run_size, alignment,
- zero);
- } else if (alignment <= chunksize)
- ret = huge_malloc(usize, zero);
- else
- ret = huge_palloc(usize, alignment, zero);
+ else {
+ size_t run_size
+#ifdef JEMALLOC_CC_SILENCE
+ = 0
+#endif
+ ;
+
+ /*
+ * Ideally we would only ever call sa2u() once per aligned
+ * allocation request, and the caller of this function has
+ * already done so once. However, it's rather burdensome to
+ * require every caller to pass in run_size, especially given
+ * that it's only relevant to large allocations. Therefore,
+ * just call it again here in order to get run_size.
+ */
+ sa2u(usize, alignment, &run_size);
+ if (run_size <= arena_maxclass) {
+ ret = arena_palloc(choose_arena(), usize, run_size,
+ alignment, zero);
+ } else if (alignment <= chunksize)
+ ret = huge_malloc(usize, zero);
+ else
+ ret = huge_palloc(usize, alignment, zero);
+ }
assert(((uintptr_t)ret & (alignment - 1)) == 0);
return (ret);
@@ -629,7 +674,7 @@ isalloc(const void *ptr)
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk != ptr) {
/* Region. */
- assert(chunk->arena->magic == ARENA_MAGIC);
+ dassert(chunk->arena->magic == ARENA_MAGIC);
#ifdef JEMALLOC_PROF
ret = arena_salloc_demote(ptr);
@@ -683,7 +728,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
!= 0) {
- size_t copysize;
+ size_t usize, copysize;
/*
* Existing object alignment is inadquate; allocate new space
@@ -691,12 +736,18 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
*/
if (no_move)
return (NULL);
- ret = ipalloc(size + extra, alignment, zero);
+ usize = sa2u(size + extra, alignment, NULL);
+ if (usize == 0)
+ return (NULL);
+ ret = ipalloc(usize, alignment, zero);
if (ret == NULL) {
if (extra == 0)
return (NULL);
/* Try again, without extra this time. */
- ret = ipalloc(size, alignment, zero);
+ usize = sa2u(size, alignment, NULL);
+ if (usize == 0)
+ return (NULL);
+ ret = ipalloc(usize, alignment, zero);
if (ret == NULL)
return (NULL);
}
diff --git a/dep/jemalloc/include/jemalloc/internal/mb.h b/dep/jemalloc/include/jemalloc/internal/mb.h
index 1707aa91d68..dc9f2a54262 100644
--- a/dep/jemalloc/include/jemalloc/internal/mb.h
+++ b/dep/jemalloc/include/jemalloc/internal/mb.h
@@ -17,7 +17,7 @@
void mb_write(void);
#endif
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(MB_C_))
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MB_C_))
#ifdef __i386__
/*
* According to the Intel Architecture Software Developer's Manual, current
diff --git a/dep/jemalloc/include/jemalloc/internal/mutex.h b/dep/jemalloc/include/jemalloc/internal/mutex.h
index dcca01edd5d..62947ced55e 100644
--- a/dep/jemalloc/include/jemalloc/internal/mutex.h
+++ b/dep/jemalloc/include/jemalloc/internal/mutex.h
@@ -1,7 +1,11 @@
/******************************************************************************/
#ifdef JEMALLOC_H_TYPES
+#ifdef JEMALLOC_OSSPIN
+typedef OSSpinLock malloc_mutex_t;
+#else
typedef pthread_mutex_t malloc_mutex_t;
+#endif
#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
# define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
@@ -41,17 +45,26 @@ JEMALLOC_INLINE void
malloc_mutex_lock(malloc_mutex_t *mutex)
{
- if (isthreaded)
+ if (isthreaded) {
+#ifdef JEMALLOC_OSSPIN
+ OSSpinLockLock(mutex);
+#else
pthread_mutex_lock(mutex);
+#endif
+ }
}
JEMALLOC_INLINE bool
malloc_mutex_trylock(malloc_mutex_t *mutex)
{
- if (isthreaded)
+ if (isthreaded) {
+#ifdef JEMALLOC_OSSPIN
+ return (OSSpinLockTry(mutex) == false);
+#else
return (pthread_mutex_trylock(mutex) != 0);
- else
+#endif
+ } else
return (false);
}
@@ -59,8 +72,13 @@ JEMALLOC_INLINE void
malloc_mutex_unlock(malloc_mutex_t *mutex)
{
- if (isthreaded)
+ if (isthreaded) {
+#ifdef JEMALLOC_OSSPIN
+ OSSpinLockUnlock(mutex);
+#else
pthread_mutex_unlock(mutex);
+#endif
+ }
}
#endif
diff --git a/dep/jemalloc/include/jemalloc/internal/private_namespace.h b/dep/jemalloc/include/jemalloc/internal/private_namespace.h
new file mode 100644
index 00000000000..d4f5f96d7b2
--- /dev/null
+++ b/dep/jemalloc/include/jemalloc/internal/private_namespace.h
@@ -0,0 +1,195 @@
+#define arena_bin_index JEMALLOC_N(arena_bin_index)
+#define arena_boot JEMALLOC_N(arena_boot)
+#define arena_dalloc JEMALLOC_N(arena_dalloc)
+#define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin)
+#define arena_dalloc_large JEMALLOC_N(arena_dalloc_large)
+#define arena_malloc JEMALLOC_N(arena_malloc)
+#define arena_malloc_large JEMALLOC_N(arena_malloc_large)
+#define arena_malloc_small JEMALLOC_N(arena_malloc_small)
+#define arena_new JEMALLOC_N(arena_new)
+#define arena_palloc JEMALLOC_N(arena_palloc)
+#define arena_prof_accum JEMALLOC_N(arena_prof_accum)
+#define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get)
+#define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set)
+#define arena_prof_promoted JEMALLOC_N(arena_prof_promoted)
+#define arena_purge_all JEMALLOC_N(arena_purge_all)
+#define arena_ralloc JEMALLOC_N(arena_ralloc)
+#define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move)
+#define arena_run_regind JEMALLOC_N(arena_run_regind)
+#define arena_salloc JEMALLOC_N(arena_salloc)
+#define arena_salloc_demote JEMALLOC_N(arena_salloc_demote)
+#define arena_stats_merge JEMALLOC_N(arena_stats_merge)
+#define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small)
+#define arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index)
+#define arenas_extend JEMALLOC_N(arenas_extend)
+#define arenas_lrun_i_index JEMALLOC_N(arenas_lrun_i_index)
+#define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32)
+#define atomic_add_uint64 JEMALLOC_N(atomic_add_uint64)
+#define atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32)
+#define atomic_sub_uint64 JEMALLOC_N(atomic_sub_uint64)
+#define base_alloc JEMALLOC_N(base_alloc)
+#define base_boot JEMALLOC_N(base_boot)
+#define base_node_alloc JEMALLOC_N(base_node_alloc)
+#define base_node_dealloc JEMALLOC_N(base_node_dealloc)
+#define bitmap_full JEMALLOC_N(bitmap_full)
+#define bitmap_get JEMALLOC_N(bitmap_get)
+#define bitmap_info_init JEMALLOC_N(bitmap_info_init)
+#define bitmap_info_ngroups JEMALLOC_N(bitmap_info_ngroups)
+#define bitmap_init JEMALLOC_N(bitmap_init)
+#define bitmap_set JEMALLOC_N(bitmap_set)
+#define bitmap_sfu JEMALLOC_N(bitmap_sfu)
+#define bitmap_size JEMALLOC_N(bitmap_size)
+#define bitmap_unset JEMALLOC_N(bitmap_unset)
+#define bt_init JEMALLOC_N(bt_init)
+#define buferror JEMALLOC_N(buferror)
+#define choose_arena JEMALLOC_N(choose_arena)
+#define choose_arena_hard JEMALLOC_N(choose_arena_hard)
+#define chunk_alloc JEMALLOC_N(chunk_alloc)
+#define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss)
+#define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap)
+#define chunk_alloc_mmap_noreserve JEMALLOC_N(chunk_alloc_mmap_noreserve)
+#define chunk_alloc_swap JEMALLOC_N(chunk_alloc_swap)
+#define chunk_boot JEMALLOC_N(chunk_boot)
+#define chunk_dealloc JEMALLOC_N(chunk_dealloc)
+#define chunk_dealloc_dss JEMALLOC_N(chunk_dealloc_dss)
+#define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap)
+#define chunk_dealloc_swap JEMALLOC_N(chunk_dealloc_swap)
+#define chunk_dss_boot JEMALLOC_N(chunk_dss_boot)
+#define chunk_in_dss JEMALLOC_N(chunk_in_dss)
+#define chunk_in_swap JEMALLOC_N(chunk_in_swap)
+#define chunk_mmap_boot JEMALLOC_N(chunk_mmap_boot)
+#define chunk_swap_boot JEMALLOC_N(chunk_swap_boot)
+#define chunk_swap_enable JEMALLOC_N(chunk_swap_enable)
+#define ckh_bucket_search JEMALLOC_N(ckh_bucket_search)
+#define ckh_count JEMALLOC_N(ckh_count)
+#define ckh_delete JEMALLOC_N(ckh_delete)
+#define ckh_evict_reloc_insert JEMALLOC_N(ckh_evict_reloc_insert)
+#define ckh_insert JEMALLOC_N(ckh_insert)
+#define ckh_isearch JEMALLOC_N(ckh_isearch)
+#define ckh_iter JEMALLOC_N(ckh_iter)
+#define ckh_new JEMALLOC_N(ckh_new)
+#define ckh_pointer_hash JEMALLOC_N(ckh_pointer_hash)
+#define ckh_pointer_keycomp JEMALLOC_N(ckh_pointer_keycomp)
+#define ckh_rebuild JEMALLOC_N(ckh_rebuild)
+#define ckh_remove JEMALLOC_N(ckh_remove)
+#define ckh_search JEMALLOC_N(ckh_search)
+#define ckh_string_hash JEMALLOC_N(ckh_string_hash)
+#define ckh_string_keycomp JEMALLOC_N(ckh_string_keycomp)
+#define ckh_try_bucket_insert JEMALLOC_N(ckh_try_bucket_insert)
+#define ckh_try_insert JEMALLOC_N(ckh_try_insert)
+#define create_zone JEMALLOC_N(create_zone)
+#define ctl_boot JEMALLOC_N(ctl_boot)
+#define ctl_bymib JEMALLOC_N(ctl_bymib)
+#define ctl_byname JEMALLOC_N(ctl_byname)
+#define ctl_nametomib JEMALLOC_N(ctl_nametomib)
+#define extent_tree_ad_first JEMALLOC_N(extent_tree_ad_first)
+#define extent_tree_ad_insert JEMALLOC_N(extent_tree_ad_insert)
+#define extent_tree_ad_iter JEMALLOC_N(extent_tree_ad_iter)
+#define extent_tree_ad_iter_recurse JEMALLOC_N(extent_tree_ad_iter_recurse)
+#define extent_tree_ad_iter_start JEMALLOC_N(extent_tree_ad_iter_start)
+#define extent_tree_ad_last JEMALLOC_N(extent_tree_ad_last)
+#define extent_tree_ad_new JEMALLOC_N(extent_tree_ad_new)
+#define extent_tree_ad_next JEMALLOC_N(extent_tree_ad_next)
+#define extent_tree_ad_nsearch JEMALLOC_N(extent_tree_ad_nsearch)
+#define extent_tree_ad_prev JEMALLOC_N(extent_tree_ad_prev)
+#define extent_tree_ad_psearch JEMALLOC_N(extent_tree_ad_psearch)
+#define extent_tree_ad_remove JEMALLOC_N(extent_tree_ad_remove)
+#define extent_tree_ad_reverse_iter JEMALLOC_N(extent_tree_ad_reverse_iter)
+#define extent_tree_ad_reverse_iter_recurse JEMALLOC_N(extent_tree_ad_reverse_iter_recurse)
+#define extent_tree_ad_reverse_iter_start JEMALLOC_N(extent_tree_ad_reverse_iter_start)
+#define extent_tree_ad_search JEMALLOC_N(extent_tree_ad_search)
+#define extent_tree_szad_first JEMALLOC_N(extent_tree_szad_first)
+#define extent_tree_szad_insert JEMALLOC_N(extent_tree_szad_insert)
+#define extent_tree_szad_iter JEMALLOC_N(extent_tree_szad_iter)
+#define extent_tree_szad_iter_recurse JEMALLOC_N(extent_tree_szad_iter_recurse)
+#define extent_tree_szad_iter_start JEMALLOC_N(extent_tree_szad_iter_start)
+#define extent_tree_szad_last JEMALLOC_N(extent_tree_szad_last)
+#define extent_tree_szad_new JEMALLOC_N(extent_tree_szad_new)
+#define extent_tree_szad_next JEMALLOC_N(extent_tree_szad_next)
+#define extent_tree_szad_nsearch JEMALLOC_N(extent_tree_szad_nsearch)
+#define extent_tree_szad_prev JEMALLOC_N(extent_tree_szad_prev)
+#define extent_tree_szad_psearch JEMALLOC_N(extent_tree_szad_psearch)
+#define extent_tree_szad_remove JEMALLOC_N(extent_tree_szad_remove)
+#define extent_tree_szad_reverse_iter JEMALLOC_N(extent_tree_szad_reverse_iter)
+#define extent_tree_szad_reverse_iter_recurse JEMALLOC_N(extent_tree_szad_reverse_iter_recurse)
+#define extent_tree_szad_reverse_iter_start JEMALLOC_N(extent_tree_szad_reverse_iter_start)
+#define extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search)
+#define hash JEMALLOC_N(hash)
+#define huge_boot JEMALLOC_N(huge_boot)
+#define huge_dalloc JEMALLOC_N(huge_dalloc)
+#define huge_malloc JEMALLOC_N(huge_malloc)
+#define huge_palloc JEMALLOC_N(huge_palloc)
+#define huge_prof_ctx_get JEMALLOC_N(huge_prof_ctx_get)
+#define huge_prof_ctx_set JEMALLOC_N(huge_prof_ctx_set)
+#define huge_ralloc JEMALLOC_N(huge_ralloc)
+#define huge_ralloc_no_move JEMALLOC_N(huge_ralloc_no_move)
+#define huge_salloc JEMALLOC_N(huge_salloc)
+#define iallocm JEMALLOC_N(iallocm)
+#define icalloc JEMALLOC_N(icalloc)
+#define idalloc JEMALLOC_N(idalloc)
+#define imalloc JEMALLOC_N(imalloc)
+#define ipalloc JEMALLOC_N(ipalloc)
+#define iralloc JEMALLOC_N(iralloc)
+#define isalloc JEMALLOC_N(isalloc)
+#define ivsalloc JEMALLOC_N(ivsalloc)
+#define jemalloc_darwin_init JEMALLOC_N(jemalloc_darwin_init)
+#define jemalloc_postfork JEMALLOC_N(jemalloc_postfork)
+#define jemalloc_prefork JEMALLOC_N(jemalloc_prefork)
+#define malloc_cprintf JEMALLOC_N(malloc_cprintf)
+#define malloc_mutex_destroy JEMALLOC_N(malloc_mutex_destroy)
+#define malloc_mutex_init JEMALLOC_N(malloc_mutex_init)
+#define malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock)
+#define malloc_mutex_trylock JEMALLOC_N(malloc_mutex_trylock)
+#define malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock)
+#define malloc_printf JEMALLOC_N(malloc_printf)
+#define malloc_write JEMALLOC_N(malloc_write)
+#define mb_write JEMALLOC_N(mb_write)
+#define pow2_ceil JEMALLOC_N(pow2_ceil)
+#define prof_backtrace JEMALLOC_N(prof_backtrace)
+#define prof_boot0 JEMALLOC_N(prof_boot0)
+#define prof_boot1 JEMALLOC_N(prof_boot1)
+#define prof_boot2 JEMALLOC_N(prof_boot2)
+#define prof_ctx_get JEMALLOC_N(prof_ctx_get)
+#define prof_ctx_set JEMALLOC_N(prof_ctx_set)
+#define prof_free JEMALLOC_N(prof_free)
+#define prof_gdump JEMALLOC_N(prof_gdump)
+#define prof_idump JEMALLOC_N(prof_idump)
+#define prof_lookup JEMALLOC_N(prof_lookup)
+#define prof_malloc JEMALLOC_N(prof_malloc)
+#define prof_mdump JEMALLOC_N(prof_mdump)
+#define prof_realloc JEMALLOC_N(prof_realloc)
+#define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update)
+#define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update)
+#define prof_tdata_init JEMALLOC_N(prof_tdata_init)
+#define pthread_create JEMALLOC_N(pthread_create)
+#define rtree_get JEMALLOC_N(rtree_get)
+#define rtree_get_locked JEMALLOC_N(rtree_get_locked)
+#define rtree_new JEMALLOC_N(rtree_new)
+#define rtree_set JEMALLOC_N(rtree_set)
+#define s2u JEMALLOC_N(s2u)
+#define sa2u JEMALLOC_N(sa2u)
+#define stats_arenas_i_bins_j_index JEMALLOC_N(stats_arenas_i_bins_j_index)
+#define stats_arenas_i_index JEMALLOC_N(stats_arenas_i_index)
+#define stats_arenas_i_lruns_j_index JEMALLOC_N(stats_arenas_i_lruns_j_index)
+#define stats_cactive_add JEMALLOC_N(stats_cactive_add)
+#define stats_cactive_get JEMALLOC_N(stats_cactive_get)
+#define stats_cactive_sub JEMALLOC_N(stats_cactive_sub)
+#define stats_print JEMALLOC_N(stats_print)
+#define szone2ozone JEMALLOC_N(szone2ozone)
+#define tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy)
+#define tcache_alloc_large JEMALLOC_N(tcache_alloc_large)
+#define tcache_alloc_small JEMALLOC_N(tcache_alloc_small)
+#define tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard)
+#define tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large)
+#define tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small)
+#define tcache_boot JEMALLOC_N(tcache_boot)
+#define tcache_create JEMALLOC_N(tcache_create)
+#define tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large)
+#define tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small)
+#define tcache_destroy JEMALLOC_N(tcache_destroy)
+#define tcache_event JEMALLOC_N(tcache_event)
+#define tcache_get JEMALLOC_N(tcache_get)
+#define tcache_stats_merge JEMALLOC_N(tcache_stats_merge)
+#define thread_allocated_get JEMALLOC_N(thread_allocated_get)
+#define thread_allocated_get_hard JEMALLOC_N(thread_allocated_get_hard)
+#define u2s JEMALLOC_N(u2s)
diff --git a/dep/jemalloc/include/jemalloc/internal/prof.h b/dep/jemalloc/include/jemalloc/internal/prof.h
index 7864000b88b..e9064ba6e73 100644
--- a/dep/jemalloc/include/jemalloc/internal/prof.h
+++ b/dep/jemalloc/include/jemalloc/internal/prof.h
@@ -227,9 +227,60 @@ bool prof_boot2(void);
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
+#define PROF_ALLOC_PREP(nignore, size, ret) do { \
+ prof_tdata_t *prof_tdata; \
+ prof_bt_t bt; \
+ \
+ assert(size == s2u(size)); \
+ \
+ prof_tdata = PROF_TCACHE_GET(); \
+ if (prof_tdata == NULL) { \
+ prof_tdata = prof_tdata_init(); \
+ if (prof_tdata == NULL) { \
+ ret = NULL; \
+ break; \
+ } \
+ } \
+ \
+ if (opt_prof_active == false) { \
+ /* Sampling is currently inactive, so avoid sampling. */\
+ ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
+ } else if (opt_lg_prof_sample == 0) { \
+ /* Don't bother with sampling logic, since sampling */\
+ /* interval is 1. */\
+ bt_init(&bt, prof_tdata->vec); \
+ prof_backtrace(&bt, nignore, prof_bt_max); \
+ ret = prof_lookup(&bt); \
+ } else { \
+ if (prof_tdata->threshold == 0) { \
+ /* Initialize. Seed the prng differently for */\
+ /* each thread. */\
+ prof_tdata->prn_state = \
+ (uint64_t)(uintptr_t)&size; \
+ prof_sample_threshold_update(prof_tdata); \
+ } \
+ \
+ /* Determine whether to capture a backtrace based on */\
+ /* whether size is enough for prof_accum to reach */\
+ /* prof_tdata->threshold. However, delay updating */\
+ /* these variables until prof_{m,re}alloc(), because */\
+ /* we don't know for sure that the allocation will */\
+ /* succeed. */\
+ /* */\
+ /* Use subtraction rather than addition to avoid */\
+ /* potential integer overflow. */\
+ if (size >= prof_tdata->threshold - \
+ prof_tdata->accum) { \
+ bt_init(&bt, prof_tdata->vec); \
+ prof_backtrace(&bt, nignore, prof_bt_max); \
+ ret = prof_lookup(&bt); \
+ } else \
+ ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
+ } \
+} while (0)
+
#ifndef JEMALLOC_ENABLE_INLINE
void prof_sample_threshold_update(prof_tdata_t *prof_tdata);
-prof_thr_cnt_t *prof_alloc_prep(size_t size);
prof_ctx_t *prof_ctx_get(const void *ptr);
void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
bool prof_sample_accum_update(size_t size);
@@ -247,8 +298,22 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata)
double u;
/*
- * Compute prof_sample_threshold as a geometrically distributed random
+ * Compute sample threshold as a geometrically distributed random
* variable with mean (2^opt_lg_prof_sample).
+ *
+ * __ __
+ * | log(u) | 1
+ * prof_tdata->threshold = | -------- |, where p = -------------------
+ * | log(1-p) | opt_lg_prof_sample
+ * 2
+ *
+ * For more information on the math, see:
+ *
+ * Non-Uniform Random Variate Generation
+ * Luc Devroye
+ * Springer-Verlag, New York, 1986
+ * pp 500
+ * (http://cg.scs.carleton.ca/~luc/rnbookindex.html)
*/
prn64(r, 53, prof_tdata->prn_state,
(uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU);
@@ -258,71 +323,6 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata)
+ (uint64_t)1U;
}
-JEMALLOC_INLINE prof_thr_cnt_t *
-prof_alloc_prep(size_t size)
-{
-#ifdef JEMALLOC_ENABLE_INLINE
- /* This function does not have its own stack frame, because it is inlined. */
-# define NIGNORE 1
-#else
-# define NIGNORE 2
-#endif
- prof_thr_cnt_t *ret;
- prof_tdata_t *prof_tdata;
- prof_bt_t bt;
-
- assert(size == s2u(size));
-
- prof_tdata = PROF_TCACHE_GET();
- if (prof_tdata == NULL) {
- prof_tdata = prof_tdata_init();
- if (prof_tdata == NULL)
- return (NULL);
- }
-
- if (opt_prof_active == false) {
- /* Sampling is currently inactive, so avoid sampling. */
- ret = (prof_thr_cnt_t *)(uintptr_t)1U;
- } else if (opt_lg_prof_sample == 0) {
- /*
- * Don't bother with sampling logic, since sampling interval is
- * 1.
- */
- bt_init(&bt, prof_tdata->vec);
- prof_backtrace(&bt, NIGNORE, prof_bt_max);
- ret = prof_lookup(&bt);
- } else {
- if (prof_tdata->threshold == 0) {
- /*
- * Initialize. Seed the prng differently for each
- * thread.
- */
- prof_tdata->prn_state = (uint64_t)(uintptr_t)&size;
- prof_sample_threshold_update(prof_tdata);
- }
-
- /*
- * Determine whether to capture a backtrace based on whether
- * size is enough for prof_accum to reach
- * prof_tdata->threshold. However, delay updating these
- * variables until prof_{m,re}alloc(), because we don't know
- * for sure that the allocation will succeed.
- *
- * Use subtraction rather than addition to avoid potential
- * integer overflow.
- */
- if (size >= prof_tdata->threshold - prof_tdata->accum) {
- bt_init(&bt, prof_tdata->vec);
- prof_backtrace(&bt, NIGNORE, prof_bt_max);
- ret = prof_lookup(&bt);
- } else
- ret = (prof_thr_cnt_t *)(uintptr_t)1U;
- }
-
- return (ret);
-#undef NIGNORE
-}
-
JEMALLOC_INLINE prof_ctx_t *
prof_ctx_get(const void *ptr)
{
@@ -334,7 +334,7 @@ prof_ctx_get(const void *ptr)
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk != ptr) {
/* Region. */
- assert(chunk->arena->magic == ARENA_MAGIC);
+ dassert(chunk->arena->magic == ARENA_MAGIC);
ret = arena_prof_ctx_get(ptr);
} else
@@ -353,7 +353,7 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk != ptr) {
/* Region. */
- assert(chunk->arena->magic == ARENA_MAGIC);
+ dassert(chunk->arena->magic == ARENA_MAGIC);
arena_prof_ctx_set(ptr, ctx);
} else
@@ -374,7 +374,7 @@ prof_sample_accum_update(size_t size)
/* Take care to avoid integer overflow. */
if (size >= prof_tdata->threshold - prof_tdata->accum) {
prof_tdata->accum -= (prof_tdata->threshold - size);
- /* Compute new prof_sample_threshold. */
+ /* Compute new sample threshold. */
prof_sample_threshold_update(prof_tdata);
while (prof_tdata->accum >= prof_tdata->threshold) {
prof_tdata->accum -= prof_tdata->threshold;
@@ -401,7 +401,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
* always possible to tell in advance how large an
* object's usable size will be, so there should never
* be a difference between the size passed to
- * prof_alloc_prep() and prof_malloc().
+ * PROF_ALLOC_PREP() and prof_malloc().
*/
assert((uintptr_t)cnt == (uintptr_t)1U);
}
@@ -445,7 +445,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
if (prof_sample_accum_update(size)) {
/*
* Don't sample. The size passed to
- * prof_alloc_prep() was larger than what
+ * PROF_ALLOC_PREP() was larger than what
* actually got allocated, so a backtrace was
* captured for this allocation, even though
* its actual size was insufficient to cross
diff --git a/dep/jemalloc/include/jemalloc/internal/rtree.h b/dep/jemalloc/include/jemalloc/internal/rtree.h
index 9d58ebac545..95d6355a5f4 100644
--- a/dep/jemalloc/include/jemalloc/internal/rtree.h
+++ b/dep/jemalloc/include/jemalloc/internal/rtree.h
@@ -49,7 +49,7 @@ void *rtree_get(rtree_t *rtree, uintptr_t key);
bool rtree_set(rtree_t *rtree, uintptr_t key, void *val);
#endif
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(RTREE_C_))
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
#define RTREE_GET_GENERATE(f) \
/* The least significant bits of the key are ignored. */ \
JEMALLOC_INLINE void * \
diff --git a/dep/jemalloc/include/jemalloc/internal/stats.h b/dep/jemalloc/include/jemalloc/internal/stats.h
index 3fc2080a34b..2a9b31d9ffc 100644
--- a/dep/jemalloc/include/jemalloc/internal/stats.h
+++ b/dep/jemalloc/include/jemalloc/internal/stats.h
@@ -154,6 +154,10 @@ struct chunk_stats_s {
extern bool opt_stats_print;
+#ifdef JEMALLOC_STATS
+extern size_t stats_cactive;
+#endif
+
char *u2s(uint64_t x, unsigned base, char *s);
#ifdef JEMALLOC_STATS
void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
@@ -166,9 +170,38 @@ void stats_print(void (*write)(void *, const char *), void *cbopaque,
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
-#ifdef JEMALLOC_STATS
#ifdef JEMALLOC_H_INLINES
+#ifdef JEMALLOC_STATS
+
+#ifndef JEMALLOC_ENABLE_INLINE
+size_t stats_cactive_get(void);
+void stats_cactive_add(size_t size);
+void stats_cactive_sub(size_t size);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_STATS_C_))
+JEMALLOC_INLINE size_t
+stats_cactive_get(void)
+{
+
+ return (atomic_read_z(&stats_cactive));
+}
+
+JEMALLOC_INLINE void
+stats_cactive_add(size_t size)
+{
+
+ atomic_add_z(&stats_cactive, size);
+}
+
+JEMALLOC_INLINE void
+stats_cactive_sub(size_t size)
+{
+
+ atomic_sub_z(&stats_cactive, size);
+}
+#endif
-#endif /* JEMALLOC_H_INLINES */
#endif /* JEMALLOC_STATS */
+#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/
diff --git a/dep/jemalloc/include/jemalloc/internal/tcache.h b/dep/jemalloc/include/jemalloc/internal/tcache.h
index 1ad91a9b1e0..da3c68c5770 100644
--- a/dep/jemalloc/include/jemalloc/internal/tcache.h
+++ b/dep/jemalloc/include/jemalloc/internal/tcache.h
@@ -2,6 +2,7 @@
/******************************************************************************/
#ifdef JEMALLOC_H_TYPES
+typedef struct tcache_bin_info_s tcache_bin_info_t;
typedef struct tcache_bin_s tcache_bin_t;
typedef struct tcache_s tcache_t;
@@ -32,15 +33,22 @@ typedef struct tcache_s tcache_t;
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
+/*
+ * Read-only information associated with each element of tcache_t's tbins array
+ * is stored separately, mainly to reduce memory usage.
+ */
+struct tcache_bin_info_s {
+ unsigned ncached_max; /* Upper limit on ncached. */
+};
+
struct tcache_bin_s {
# ifdef JEMALLOC_STATS
tcache_bin_stats_t tstats;
# endif
- unsigned low_water; /* Min # cached since last GC. */
- unsigned high_water; /* Max # cached since last GC. */
+ int low_water; /* Min # cached since last GC. */
+ unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */
unsigned ncached; /* # of cached objects. */
- unsigned ncached_max; /* Upper limit on ncached. */
- void *avail; /* Chain of available objects. */
+ void **avail; /* Stack of available objects. */
};
struct tcache_s {
@@ -54,6 +62,12 @@ struct tcache_s {
unsigned ev_cnt; /* Event count since incremental GC. */
unsigned next_gc_bin; /* Next bin to GC. */
tcache_bin_t tbins[1]; /* Dynamically sized. */
+ /*
+ * The pointer stacks associated with tbins follow as a contiguous
+ * array. During tcache initialization, the avail pointer in each
+ * element of tbins is initialized to point to the proper offset within
+ * this array.
+ */
};
#endif /* JEMALLOC_H_STRUCTS */
@@ -64,6 +78,8 @@ extern bool opt_tcache;
extern ssize_t opt_lg_tcache_max;
extern ssize_t opt_lg_tcache_gc_sweep;
+extern tcache_bin_info_t *tcache_bin_info;
+
/* Map of thread-specific caches. */
#ifndef NO_TLS
extern __thread tcache_t *tcache_tls
@@ -110,7 +126,7 @@ void tcache_destroy(tcache_t *tcache);
#ifdef JEMALLOC_STATS
void tcache_stats_merge(tcache_t *tcache, arena_t *arena);
#endif
-void tcache_boot(void);
+bool tcache_boot(void);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
@@ -169,6 +185,7 @@ tcache_event(tcache_t *tcache)
if (tcache->ev_cnt == tcache_gc_incr) {
size_t binind = tcache->next_gc_bin;
tcache_bin_t *tbin = &tcache->tbins[binind];
+ tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
if (tbin->low_water > 0) {
/*
@@ -192,9 +209,22 @@ tcache_event(tcache_t *tcache)
#endif
);
}
+ /*
+ * Reduce fill count by 2X. Limit lg_fill_div such that
+ * the fill count is always at least 1.
+ */
+ if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1))
+ >= 1)
+ tbin->lg_fill_div++;
+ } else if (tbin->low_water < 0) {
+ /*
+ * Increase fill count by 2X. Make sure lg_fill_div
+ * stays greater than 0.
+ */
+ if (tbin->lg_fill_div > 1)
+ tbin->lg_fill_div--;
}
tbin->low_water = tbin->ncached;
- tbin->high_water = tbin->ncached;
tcache->next_gc_bin++;
if (tcache->next_gc_bin == nhbins)
@@ -208,13 +238,14 @@ tcache_alloc_easy(tcache_bin_t *tbin)
{
void *ret;
- if (tbin->ncached == 0)
+ if (tbin->ncached == 0) {
+ tbin->low_water = -1;
return (NULL);
+ }
tbin->ncached--;
- if (tbin->ncached < tbin->low_water)
+ if ((int)tbin->ncached < tbin->low_water)
tbin->low_water = tbin->ncached;
- ret = tbin->avail;
- tbin->avail = *(void **)ret;
+ ret = tbin->avail[tbin->ncached];
return (ret);
}
@@ -225,7 +256,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
size_t binind;
tcache_bin_t *tbin;
- binind = small_size2bin[size];
+ binind = SMALL_SIZE2BIN(size);
assert(binind < nbins);
tbin = &tcache->tbins[binind];
ret = tcache_alloc_easy(tbin);
@@ -234,7 +265,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
if (ret == NULL)
return (NULL);
}
- assert(arena_salloc(ret) == tcache->arena->bins[binind].reg_size);
+ assert(arena_salloc(ret) == arena_bin_info[binind].reg_size);
if (zero == false) {
#ifdef JEMALLOC_FILL
@@ -250,7 +281,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
tbin->tstats.nrequests++;
#endif
#ifdef JEMALLOC_PROF
- tcache->prof_accumbytes += tcache->arena->bins[binind].reg_size;
+ tcache->prof_accumbytes += arena_bin_info[binind].reg_size;
#endif
tcache_event(tcache);
return (ret);
@@ -314,6 +345,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr)
arena_run_t *run;
arena_bin_t *bin;
tcache_bin_t *tbin;
+ tcache_bin_info_t *tbin_info;
size_t pageind, binind;
arena_chunk_map_t *mapelm;
@@ -325,7 +357,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr)
mapelm = &chunk->map[pageind-map_bias];
run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
(mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT));
- assert(run->magic == ARENA_RUN_MAGIC);
+ dassert(run->magic == ARENA_RUN_MAGIC);
bin = run->bin;
binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) /
sizeof(arena_bin_t);
@@ -333,23 +365,22 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr)
#ifdef JEMALLOC_FILL
if (opt_junk)
- memset(ptr, 0x5a, bin->reg_size);
+ memset(ptr, 0x5a, arena_bin_info[binind].reg_size);
#endif
tbin = &tcache->tbins[binind];
- if (tbin->ncached == tbin->ncached_max) {
- tcache_bin_flush_small(tbin, binind, (tbin->ncached_max >> 1)
+ tbin_info = &tcache_bin_info[binind];
+ if (tbin->ncached == tbin_info->ncached_max) {
+ tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >>
+ 1)
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
, tcache
#endif
);
}
- assert(tbin->ncached < tbin->ncached_max);
- *(void **)ptr = tbin->avail;
- tbin->avail = ptr;
+ assert(tbin->ncached < tbin_info->ncached_max);
+ tbin->avail[tbin->ncached] = ptr;
tbin->ncached++;
- if (tbin->ncached > tbin->high_water)
- tbin->high_water = tbin->ncached;
tcache_event(tcache);
}
@@ -361,6 +392,7 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
arena_chunk_t *chunk;
size_t pageind, binind;
tcache_bin_t *tbin;
+ tcache_bin_info_t *tbin_info;
assert((size & PAGE_MASK) == 0);
assert(arena_salloc(ptr) > small_maxclass);
@@ -377,19 +409,18 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
#endif
tbin = &tcache->tbins[binind];
- if (tbin->ncached == tbin->ncached_max) {
- tcache_bin_flush_large(tbin, binind, (tbin->ncached_max >> 1)
+ tbin_info = &tcache_bin_info[binind];
+ if (tbin->ncached == tbin_info->ncached_max) {
+ tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >>
+ 1)
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
, tcache
#endif
);
}
- assert(tbin->ncached < tbin->ncached_max);
- *(void **)ptr = tbin->avail;
- tbin->avail = ptr;
+ assert(tbin->ncached < tbin_info->ncached_max);
+ tbin->avail[tbin->ncached] = ptr;
tbin->ncached++;
- if (tbin->ncached > tbin->high_water)
- tbin->high_water = tbin->ncached;
tcache_event(tcache);
}
diff --git a/dep/jemalloc/include/jemalloc/jemalloc.h b/dep/jemalloc/include/jemalloc/jemalloc.h
index 287dac46ed2..3842e28115e 100644
--- a/dep/jemalloc/include/jemalloc/jemalloc.h
+++ b/dep/jemalloc/include/jemalloc/jemalloc.h
@@ -7,19 +7,19 @@ extern "C" {
#include <limits.h>
#include <strings.h>
-#define JEMALLOC_VERSION "2.1.0-0-g1c4b088b08d3bc7617a34387e196ce03716160bf"
+#define JEMALLOC_VERSION "2.2.5-0-gfc1bb70e5f0d9a58b39efa39cc549b5af5104760"
#define JEMALLOC_VERSION_MAJOR 2
-#define JEMALLOC_VERSION_MINOR 1
-#define JEMALLOC_VERSION_BUGFIX 0
+#define JEMALLOC_VERSION_MINOR 2
+#define JEMALLOC_VERSION_BUGFIX 5
#define JEMALLOC_VERSION_NREV 0
-#define JEMALLOC_VERSION_GID "1c4b088b08d3bc7617a34387e196ce03716160bf"
+#define JEMALLOC_VERSION_GID "fc1bb70e5f0d9a58b39efa39cc549b5af5104760"
#include "jemalloc_defs.h"
#ifndef JEMALLOC_P
# define JEMALLOC_P(s) s
#endif
-#define ALLOCM_LG_ALIGN ((int)0x3f)
+#define ALLOCM_LG_ALIGN(la) (la)
#if LG_SIZEOF_PTR == 2
#define ALLOCM_ALIGN(a) (ffs(a)-1)
#else
diff --git a/dep/jemalloc/include/jemalloc/jemalloc_defs.h b/dep/jemalloc/include/jemalloc/jemalloc_defs.h
index a641b56da03..f0f8fa71a4d 100644
--- a/dep/jemalloc/include/jemalloc/jemalloc_defs.h
+++ b/dep/jemalloc/include/jemalloc/jemalloc_defs.h
@@ -20,11 +20,32 @@
#endif
/*
+ * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
+ * For shared libraries, symbol visibility mechanisms prevent these symbols
+ * from being exported, but for static libraries, naming collisions are a real
+ * possibility.
+ */
+#define JEMALLOC_PRIVATE_NAMESPACE ""
+#define JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix) string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix
+
+/*
* Hyper-threaded CPUs may need a special instruction inside spin loops in
* order to yield to another virtual CPU.
*/
#define CPU_SPINWAIT __asm__ volatile("pause")
+/*
+ * Defined if OSAtomic*() functions are available, as provided by Darwin, and
+ * documented in the atomic(3) manual page.
+ */
+/* #undef JEMALLOC_OSATOMIC */
+
+/*
+ * Defined if OSSpin*() functions are available, as provided by Darwin, and
+ * documented in the spinlock(3) manual page.
+ */
+/* #undef JEMALLOC_OSSPIN */
+
/* Defined if __attribute__((...)) syntax is supported. */
#define JEMALLOC_HAVE_ATTR
#ifdef JEMALLOC_HAVE_ATTR
@@ -54,18 +75,21 @@
/* Use libgcc for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_LIBGCC */
+/* Use gcc intrinsics for profile backtracing if defined. */
+/* #undef JEMALLOC_PROF_GCC */
+
/*
* JEMALLOC_TINY enables support for tiny objects, which are smaller than one
* quantum.
*/
-/* #undef JEMALLOC_TINY */
+#define JEMALLOC_TINY
/*
* JEMALLOC_TCACHE enables a thread-specific caching layer for small objects.
* This makes it possible to allocate/deallocate objects without any locking
* when the cache is in the steady state.
*/
-/* #undef JEMALLOC_TCACHE */
+#define JEMALLOC_TCACHE
/*
* JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
@@ -86,7 +110,7 @@
/* #undef JEMALLOC_SYSV */
/* Support lazy locking (avoid locking unless a second thread is launched). */
-/* #undef JEMALLOC_LAZY_LOCK */
+#define JEMALLOC_LAZY_LOCK
/* Determine page size at run time if defined. */
/* #undef DYNAMIC_PAGE_SHIFT */
@@ -133,9 +157,12 @@
/* #undef JEMALLOC_PURGE_MADVISE_FREE */
/* sizeof(void *) == 2^LG_SIZEOF_PTR. */
-#define LG_SIZEOF_PTR 2
+#define LG_SIZEOF_PTR 3
/* sizeof(int) == 2^LG_SIZEOF_INT. */
#define LG_SIZEOF_INT 2
+/* sizeof(long) == 2^LG_SIZEOF_LONG. */
+#define LG_SIZEOF_LONG 3
+
#endif /* JEMALLOC_DEFS_H_ */
diff --git a/dep/jemalloc/src/arena.c b/dep/jemalloc/src/arena.c
index 7f939b3cd77..d166ca1ec4d 100644
--- a/dep/jemalloc/src/arena.c
+++ b/dep/jemalloc/src/arena.c
@@ -8,6 +8,7 @@ size_t opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT;
size_t opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT;
ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
uint8_t const *small_size2bin;
+arena_bin_info_t *arena_bin_info;
/* Various bin-related settings. */
unsigned nqbins;
@@ -25,26 +26,27 @@ size_t mspace_mask;
/*
* const_small_size2bin is a static constant lookup table that in the common
- * case can be used as-is for small_size2bin. For dynamically linked programs,
- * this avoids a page of memory overhead per process.
+ * case can be used as-is for small_size2bin.
*/
-#define S2B_1(i) i,
-#define S2B_2(i) S2B_1(i) S2B_1(i)
-#define S2B_4(i) S2B_2(i) S2B_2(i)
+#if (LG_TINY_MIN == 2)
+#define S2B_4(i) i,
#define S2B_8(i) S2B_4(i) S2B_4(i)
+#elif (LG_TINY_MIN == 3)
+#define S2B_8(i) i,
+#else
+# error "Unsupported LG_TINY_MIN"
+#endif
#define S2B_16(i) S2B_8(i) S2B_8(i)
#define S2B_32(i) S2B_16(i) S2B_16(i)
#define S2B_64(i) S2B_32(i) S2B_32(i)
#define S2B_128(i) S2B_64(i) S2B_64(i)
#define S2B_256(i) S2B_128(i) S2B_128(i)
/*
- * The number of elements in const_small_size2bin is dependent on page size
- * and on the definition for SUBPAGE. If SUBPAGE changes, the '- 255' must also
- * change, along with the addition/removal of static lookup table element
- * definitions.
+ * The number of elements in const_small_size2bin is dependent on the
+ * definition for SUBPAGE.
*/
-static const uint8_t const_small_size2bin[STATIC_PAGE_SIZE - 255] = {
- S2B_1(0xffU) /* 0 */
+static JEMALLOC_ATTR(aligned(CACHELINE))
+ const uint8_t const_small_size2bin[] = {
#if (LG_QUANTUM == 4)
/* 16-byte quantum **********************/
# ifdef JEMALLOC_TINY
@@ -173,7 +175,6 @@ static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
-static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
static void arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
arena_bin_t *bin);
static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk,
@@ -191,6 +192,9 @@ static bool small_size2bin_init(void);
static void small_size2bin_validate(void);
#endif
static bool small_size2bin_init_hard(void);
+static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info,
+ size_t min_run_size);
+static bool bin_info_init(void);
/******************************************************************************/
@@ -246,57 +250,48 @@ rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t,
arena_chunk_map_t, u.rb_link, arena_avail_comp)
static inline void *
-arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
+arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info)
{
void *ret;
+ unsigned regind;
+ bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
+ (uintptr_t)bin_info->bitmap_offset);
- assert(run->magic == ARENA_RUN_MAGIC);
+ dassert(run->magic == ARENA_RUN_MAGIC);
assert(run->nfree > 0);
+ assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false);
+ regind = bitmap_sfu(bitmap, &bin_info->bitmap_info);
+ ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset +
+ (uintptr_t)(bin_info->reg_size * regind));
run->nfree--;
- ret = run->avail;
- if (ret != NULL) {
- /* Double free can cause assertion failure.*/
- assert(ret != NULL);
- /* Write-after free can cause assertion failure. */
- assert((uintptr_t)ret >= (uintptr_t)run +
- (uintptr_t)bin->reg0_offset);
- assert((uintptr_t)ret < (uintptr_t)run->next);
- assert(((uintptr_t)ret - ((uintptr_t)run +
- (uintptr_t)bin->reg0_offset)) % (uintptr_t)bin->reg_size ==
- 0);
- run->avail = *(void **)ret;
- return (ret);
- }
- ret = run->next;
- run->next = (void *)((uintptr_t)ret + (uintptr_t)bin->reg_size);
- assert(ret != NULL);
+ if (regind == run->nextind)
+ run->nextind++;
+ assert(regind < run->nextind);
return (ret);
}
static inline void
arena_run_reg_dalloc(arena_run_t *run, void *ptr)
{
-
- assert(run->nfree < run->bin->nregs);
+ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
+ size_t binind = arena_bin_index(chunk->arena, run->bin);
+ arena_bin_info_t *bin_info = &arena_bin_info[binind];
+ unsigned regind = arena_run_regind(run, bin_info, ptr);
+ bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
+ (uintptr_t)bin_info->bitmap_offset);
+
+ assert(run->nfree < bin_info->nregs);
/* Freeing an interior pointer can cause assertion failure. */
assert(((uintptr_t)ptr - ((uintptr_t)run +
- (uintptr_t)run->bin->reg0_offset)) % (uintptr_t)run->bin->reg_size
+ (uintptr_t)bin_info->reg0_offset)) % (uintptr_t)bin_info->reg_size
== 0);
- /*
- * Freeing a pointer lower than region zero can cause assertion
- * failure.
- */
assert((uintptr_t)ptr >= (uintptr_t)run +
- (uintptr_t)run->bin->reg0_offset);
- /*
- * Freeing a pointer past in the run's frontier can cause assertion
- * failure.
- */
- assert((uintptr_t)ptr < (uintptr_t)run->next);
+ (uintptr_t)bin_info->reg0_offset);
+ /* Freeing an unallocated pointer can cause assertion failure. */
+ assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind));
- *(void **)ptr = run->avail;
- run->avail = ptr;
+ bitmap_unset(bitmap, &bin_info->bitmap_info, regind);
run->nfree++;
}
@@ -320,6 +315,9 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i;
size_t flag_dirty;
arena_avail_tree_t *runs_avail;
+#ifdef JEMALLOC_STATS
+ size_t cactive_diff;
+#endif
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
old_ndirty = chunk->ndirty;
@@ -338,6 +336,13 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
rem_pages = total_pages - need_pages;
arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]);
+#ifdef JEMALLOC_STATS
+ /* Update stats_cactive if nactive is crossing a chunk multiple. */
+ cactive_diff = CHUNK_CEILING((arena->nactive + need_pages) <<
+ PAGE_SHIFT) - CHUNK_CEILING(arena->nactive << PAGE_SHIFT);
+ if (cactive_diff != 0)
+ stats_cactive_add(cactive_diff);
+#endif
arena->nactive += need_pages;
/* Keep track of trailing unused pages for later use. */
@@ -564,7 +569,7 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
arena->ndirty -= spare->ndirty;
}
malloc_mutex_unlock(&arena->lock);
- chunk_dealloc((void *)spare, chunksize);
+ chunk_dealloc((void *)spare, chunksize, true);
malloc_mutex_lock(&arena->lock);
#ifdef JEMALLOC_STATS
arena->stats.mapped -= chunksize;
@@ -725,6 +730,9 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
assert(pageind + npages <= chunk_npages);
if (mapelm->bits & CHUNK_MAP_DIRTY) {
size_t i;
+#ifdef JEMALLOC_STATS
+ size_t cactive_diff;
+#endif
arena_avail_tree_remove(
&arena->runs_avail_dirty, mapelm);
@@ -747,6 +755,17 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
CHUNK_MAP_ALLOCATED;
}
+#ifdef JEMALLOC_STATS
+ /*
+ * Update stats_cactive if nactive is crossing a
+ * chunk multiple.
+ */
+ cactive_diff = CHUNK_CEILING((arena->nactive +
+ npages) << PAGE_SHIFT) -
+ CHUNK_CEILING(arena->nactive << PAGE_SHIFT);
+ if (cactive_diff != 0)
+ stats_cactive_add(cactive_diff);
+#endif
arena->nactive += npages;
/* Append to list for later processing. */
ql_elm_new(mapelm, u.ql_link);
@@ -763,8 +782,12 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
chunk + (uintptr_t)(pageind << PAGE_SHIFT));
assert((mapelm->bits >> PAGE_SHIFT) == 0);
- assert(run->magic == ARENA_RUN_MAGIC);
- pageind += run->bin->run_size >> PAGE_SHIFT;
+ dassert(run->magic == ARENA_RUN_MAGIC);
+ size_t binind = arena_bin_index(arena,
+ run->bin);
+ arena_bin_info_t *bin_info =
+ &arena_bin_info[binind];
+ pageind += bin_info->run_size >> PAGE_SHIFT;
}
}
}
@@ -845,9 +868,10 @@ arena_purge(arena_t *arena, bool all)
}
assert(ndirty == arena->ndirty);
#endif
- assert(arena->ndirty > arena->npurgatory);
- assert(arena->ndirty > chunk_npages || all);
- assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty || all);
+ assert(arena->ndirty > arena->npurgatory || all);
+ assert(arena->ndirty - arena->npurgatory > chunk_npages || all);
+ assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty -
+ arena->npurgatory) || all);
#ifdef JEMALLOC_STATS
arena->stats.npurge++;
@@ -859,8 +883,10 @@ arena_purge(arena_t *arena, bool all)
* multiple threads from racing to reduce ndirty below the threshold.
*/
npurgatory = arena->ndirty - arena->npurgatory;
- if (all == false)
+ if (all == false) {
+ assert(npurgatory >= arena->nactive >> opt_lg_dirty_mult);
npurgatory -= arena->nactive >> opt_lg_dirty_mult;
+ }
arena->npurgatory += npurgatory;
while (npurgatory > 0) {
@@ -931,6 +957,9 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
arena_chunk_t *chunk;
size_t size, run_ind, run_pages, flag_dirty;
arena_avail_tree_t *runs_avail;
+#ifdef JEMALLOC_STATS
+ size_t cactive_diff;
+#endif
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk)
@@ -946,9 +975,19 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
CHUNK_MAP_LARGE) != 0);
assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits &
CHUNK_MAP_ALLOCATED) != 0);
- } else
- size = run->bin->run_size;
+ } else {
+ size_t binind = arena_bin_index(arena, run->bin);
+ arena_bin_info_t *bin_info = &arena_bin_info[binind];
+ size = bin_info->run_size;
+ }
run_pages = (size >> PAGE_SHIFT);
+#ifdef JEMALLOC_STATS
+ /* Update stats_cactive if nactive is crossing a chunk multiple. */
+ cactive_diff = CHUNK_CEILING(arena->nactive << PAGE_SHIFT) -
+ CHUNK_CEILING((arena->nactive - run_pages) << PAGE_SHIFT);
+ if (cactive_diff != 0)
+ stats_cactive_sub(cactive_diff);
+#endif
arena->nactive -= run_pages;
/*
@@ -1174,6 +1213,8 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
{
arena_chunk_map_t *mapelm;
arena_run_t *run;
+ size_t binind;
+ arena_bin_info_t *bin_info;
/* Look for a usable run. */
mapelm = arena_run_tree_first(&bin->runs);
@@ -1197,18 +1238,23 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
}
/* No existing runs have any space available. */
+ binind = arena_bin_index(arena, bin);
+ bin_info = &arena_bin_info[binind];
+
/* Allocate a new run. */
malloc_mutex_unlock(&bin->lock);
/******************************/
malloc_mutex_lock(&arena->lock);
- run = arena_run_alloc(arena, bin->run_size, false, false);
+ run = arena_run_alloc(arena, bin_info->run_size, false, false);
if (run != NULL) {
+ bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
+ (uintptr_t)bin_info->bitmap_offset);
+
/* Initialize run internals. */
run->bin = bin;
- run->avail = NULL;
- run->next = (void *)((uintptr_t)run +
- (uintptr_t)bin->reg0_offset);
- run->nfree = bin->nregs;
+ run->nextind = 0;
+ run->nfree = bin_info->nregs;
+ bitmap_init(bitmap, &bin_info->bitmap_info);
#ifdef JEMALLOC_DEBUG
run->magic = ARENA_RUN_MAGIC;
#endif
@@ -1259,8 +1305,12 @@ static void *
arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
{
void *ret;
+ size_t binind;
+ arena_bin_info_t *bin_info;
arena_run_t *run;
+ binind = arena_bin_index(arena, bin);
+ bin_info = &arena_bin_info[binind];
bin->runcur = NULL;
run = arena_bin_nonfull_run_get(arena, bin);
if (bin->runcur != NULL && bin->runcur->nfree > 0) {
@@ -1268,22 +1318,22 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
* Another thread updated runcur while this one ran without the
* bin lock in arena_bin_nonfull_run_get().
*/
- assert(bin->runcur->magic == ARENA_RUN_MAGIC);
+ dassert(bin->runcur->magic == ARENA_RUN_MAGIC);
assert(bin->runcur->nfree > 0);
- ret = arena_run_reg_alloc(bin->runcur, bin);
+ ret = arena_run_reg_alloc(bin->runcur, bin_info);
if (run != NULL) {
arena_chunk_t *chunk;
/*
* arena_run_alloc() may have allocated run, or it may
- * have pulled it from the bin's run tree. Therefore
+ * have pulled run from the bin's run tree. Therefore
* it is unsafe to make any assumptions about how run
* has previously been used, and arena_bin_lower_run()
* must be called, as if a region were just deallocated
* from the run.
*/
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
- if (run->nfree == bin->nregs)
+ if (run->nfree == bin_info->nregs)
arena_dalloc_bin_run(arena, chunk, run, bin);
else
arena_bin_lower_run(arena, chunk, run, bin);
@@ -1296,10 +1346,10 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
bin->runcur = run;
- assert(bin->runcur->magic == ARENA_RUN_MAGIC);
+ dassert(bin->runcur->magic == ARENA_RUN_MAGIC);
assert(bin->runcur->nfree > 0);
- return (arena_run_reg_alloc(bin->runcur, bin));
+ return (arena_run_reg_alloc(bin->runcur, bin_info));
}
#ifdef JEMALLOC_PROF
@@ -1339,18 +1389,19 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind
#endif
bin = &arena->bins[binind];
malloc_mutex_lock(&bin->lock);
- for (i = 0, nfill = (tbin->ncached_max >> 1); i < nfill; i++) {
+ for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
+ tbin->lg_fill_div); i < nfill; i++) {
if ((run = bin->runcur) != NULL && run->nfree > 0)
- ptr = arena_run_reg_alloc(run, bin);
+ ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);
else
ptr = arena_bin_malloc_hard(arena, bin);
if (ptr == NULL)
break;
- *(void **)ptr = tbin->avail;
- tbin->avail = ptr;
+ /* Insert such that low regions get used first. */
+ tbin->avail[nfill - 1 - i] = ptr;
}
#ifdef JEMALLOC_STATS
- bin->stats.allocated += (i - tbin->ncached) * bin->reg_size;
+ bin->stats.allocated += i * arena_bin_info[binind].reg_size;
bin->stats.nmalloc += i;
bin->stats.nrequests += tbin->tstats.nrequests;
bin->stats.nfills++;
@@ -1358,119 +1409,9 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind
#endif
malloc_mutex_unlock(&bin->lock);
tbin->ncached = i;
- if (tbin->ncached > tbin->high_water)
- tbin->high_water = tbin->ncached;
}
#endif
-/*
- * Calculate bin->run_size such that it meets the following constraints:
- *
- * *) bin->run_size >= min_run_size
- * *) bin->run_size <= arena_maxclass
- * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
- * *) run header size < PAGE_SIZE
- *
- * bin->nregs and bin->reg0_offset are also calculated here, since these
- * settings are all interdependent.
- */
-static size_t
-arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
-{
- size_t try_run_size, good_run_size;
- uint32_t try_nregs, good_nregs;
- uint32_t try_hdr_size, good_hdr_size;
-#ifdef JEMALLOC_PROF
- uint32_t try_ctx0_offset, good_ctx0_offset;
-#endif
- uint32_t try_reg0_offset, good_reg0_offset;
-
- assert(min_run_size >= PAGE_SIZE);
- assert(min_run_size <= arena_maxclass);
-
- /*
- * Calculate known-valid settings before entering the run_size
- * expansion loop, so that the first part of the loop always copies
- * valid settings.
- *
- * The do..while loop iteratively reduces the number of regions until
- * the run header and the regions no longer overlap. A closed formula
- * would be quite messy, since there is an interdependency between the
- * header's mask length and the number of regions.
- */
- try_run_size = min_run_size;
- try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
- + 1; /* Counter-act try_nregs-- in loop. */
- do {
- try_nregs--;
- try_hdr_size = sizeof(arena_run_t);
-#ifdef JEMALLOC_PROF
- if (opt_prof && prof_promote == false) {
- /* Pad to a quantum boundary. */
- try_hdr_size = QUANTUM_CEILING(try_hdr_size);
- try_ctx0_offset = try_hdr_size;
- /* Add space for one (prof_ctx_t *) per region. */
- try_hdr_size += try_nregs * sizeof(prof_ctx_t *);
- } else
- try_ctx0_offset = 0;
-#endif
- try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
- } while (try_hdr_size > try_reg0_offset);
-
- /* run_size expansion loop. */
- do {
- /*
- * Copy valid settings before trying more aggressive settings.
- */
- good_run_size = try_run_size;
- good_nregs = try_nregs;
- good_hdr_size = try_hdr_size;
-#ifdef JEMALLOC_PROF
- good_ctx0_offset = try_ctx0_offset;
-#endif
- good_reg0_offset = try_reg0_offset;
-
- /* Try more aggressive settings. */
- try_run_size += PAGE_SIZE;
- try_nregs = ((try_run_size - sizeof(arena_run_t)) /
- bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
- do {
- try_nregs--;
- try_hdr_size = sizeof(arena_run_t);
-#ifdef JEMALLOC_PROF
- if (opt_prof && prof_promote == false) {
- /* Pad to a quantum boundary. */
- try_hdr_size = QUANTUM_CEILING(try_hdr_size);
- try_ctx0_offset = try_hdr_size;
- /*
- * Add space for one (prof_ctx_t *) per region.
- */
- try_hdr_size += try_nregs *
- sizeof(prof_ctx_t *);
- }
-#endif
- try_reg0_offset = try_run_size - (try_nregs *
- bin->reg_size);
- } while (try_hdr_size > try_reg0_offset);
- } while (try_run_size <= arena_maxclass
- && try_run_size <= arena_maxclass
- && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
- && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
- && try_hdr_size < PAGE_SIZE);
-
- assert(good_hdr_size <= good_reg0_offset);
-
- /* Copy final settings. */
- bin->run_size = good_run_size;
- bin->nregs = good_nregs;
-#ifdef JEMALLOC_PROF
- bin->ctx0_offset = good_ctx0_offset;
-#endif
- bin->reg0_offset = good_reg0_offset;
-
- return (good_run_size);
-}
-
void *
arena_malloc_small(arena_t *arena, size_t size, bool zero)
{
@@ -1479,14 +1420,14 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
arena_run_t *run;
size_t binind;
- binind = small_size2bin[size];
+ binind = SMALL_SIZE2BIN(size);
assert(binind < nbins);
bin = &arena->bins[binind];
- size = bin->reg_size;
+ size = arena_bin_info[binind].reg_size;
malloc_mutex_lock(&bin->lock);
if ((run = bin->runcur) != NULL && run->nfree > 0)
- ret = arena_run_reg_alloc(run, bin);
+ ret = arena_run_reg_alloc(run, &arena_bin_info[binind]);
else
ret = arena_bin_malloc_hard(arena, bin);
@@ -1690,11 +1631,13 @@ arena_salloc(const void *ptr)
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
PAGE_SHIFT));
- assert(run->magic == ARENA_RUN_MAGIC);
+ dassert(run->magic == ARENA_RUN_MAGIC);
+ size_t binind = arena_bin_index(chunk->arena, run->bin);
+ arena_bin_info_t *bin_info = &arena_bin_info[binind];
assert(((uintptr_t)ptr - ((uintptr_t)run +
- (uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size ==
+ (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size ==
0);
- ret = run->bin->reg_size;
+ ret = bin_info->reg_size;
} else {
assert(((uintptr_t)ptr & PAGE_MASK) == 0);
ret = mapbits & ~PAGE_MASK;
@@ -1714,10 +1657,11 @@ arena_prof_promoted(const void *ptr, size_t size)
assert(ptr != NULL);
assert(CHUNK_ADDR2BASE(ptr) != ptr);
assert(isalloc(ptr) == PAGE_SIZE);
+ assert(size <= small_maxclass);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
- binind = small_size2bin[size];
+ binind = SMALL_SIZE2BIN(size);
assert(binind < nbins);
chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits &
~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT);
@@ -1741,11 +1685,13 @@ arena_salloc_demote(const void *ptr)
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
PAGE_SHIFT));
- assert(run->magic == ARENA_RUN_MAGIC);
+ dassert(run->magic == ARENA_RUN_MAGIC);
+ size_t binind = arena_bin_index(chunk->arena, run->bin);
+ arena_bin_info_t *bin_info = &arena_bin_info[binind];
assert(((uintptr_t)ptr - ((uintptr_t)run +
- (uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size ==
+ (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size ==
0);
- ret = run->bin->reg_size;
+ ret = bin_info->reg_size;
} else {
assert(((uintptr_t)ptr & PAGE_MASK) == 0);
ret = mapbits & ~PAGE_MASK;
@@ -1754,7 +1700,7 @@ arena_salloc_demote(const void *ptr)
size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >>
CHUNK_MAP_CLASS_SHIFT) - 1;
assert(binind < nbins);
- ret = chunk->arena->bins[binind].reg_size;
+ ret = arena_bin_info[binind].reg_size;
}
assert(ret != 0);
}
@@ -1771,17 +1717,22 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
/* Dissociate run from bin. */
if (run == bin->runcur)
bin->runcur = NULL;
- else if (bin->nregs != 1) {
- size_t run_pageind = (((uintptr_t)run - (uintptr_t)chunk)) >>
- PAGE_SHIFT;
- arena_chunk_map_t *run_mapelm =
- &chunk->map[run_pageind-map_bias];
- /*
- * This block's conditional is necessary because if the run
- * only contains one region, then it never gets inserted into
- * the non-full runs tree.
- */
- arena_run_tree_remove(&bin->runs, run_mapelm);
+ else {
+ size_t binind = arena_bin_index(chunk->arena, bin);
+ arena_bin_info_t *bin_info = &arena_bin_info[binind];
+
+ if (bin_info->nregs != 1) {
+ size_t run_pageind = (((uintptr_t)run -
+ (uintptr_t)chunk)) >> PAGE_SHIFT;
+ arena_chunk_map_t *run_mapelm =
+ &chunk->map[run_pageind-map_bias];
+ /*
+ * This block's conditional is necessary because if the
+ * run only contains one region, then it never gets
+ * inserted into the non-full runs tree.
+ */
+ arena_run_tree_remove(&bin->runs, run_mapelm);
+ }
}
}
@@ -1789,18 +1740,24 @@ static void
arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
arena_bin_t *bin)
{
+ size_t binind;
+ arena_bin_info_t *bin_info;
size_t npages, run_ind, past;
assert(run != bin->runcur);
assert(arena_run_tree_search(&bin->runs, &chunk->map[
(((uintptr_t)run-(uintptr_t)chunk)>>PAGE_SHIFT)-map_bias]) == NULL);
+ binind = arena_bin_index(chunk->arena, run->bin);
+ bin_info = &arena_bin_info[binind];
+
malloc_mutex_unlock(&bin->lock);
/******************************/
- npages = bin->run_size >> PAGE_SHIFT;
+ npages = bin_info->run_size >> PAGE_SHIFT;
run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT);
- past = (size_t)((PAGE_CEILING((uintptr_t)run->next) - (uintptr_t)chunk)
- >> PAGE_SHIFT);
+ past = (size_t)(PAGE_CEILING((uintptr_t)run +
+ (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind *
+ bin_info->reg_size) - (uintptr_t)chunk) >> PAGE_SHIFT);
malloc_mutex_lock(&arena->lock);
/*
@@ -1817,7 +1774,7 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE |
(chunk->map[run_ind+npages-1-map_bias].bits &
CHUNK_MAP_FLAGS_MASK);
- chunk->map[run_ind-map_bias].bits = bin->run_size |
+ chunk->map[run_ind-map_bias].bits = bin_info->run_size |
CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits &
CHUNK_MAP_FLAGS_MASK);
arena_run_trim_tail(arena, chunk, run, (npages << PAGE_SHIFT),
@@ -1886,10 +1843,12 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
(mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT));
- assert(run->magic == ARENA_RUN_MAGIC);
+ dassert(run->magic == ARENA_RUN_MAGIC);
bin = run->bin;
+ size_t binind = arena_bin_index(arena, bin);
+ arena_bin_info_t *bin_info = &arena_bin_info[binind];
#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS))
- size = bin->reg_size;
+ size = bin_info->reg_size;
#endif
#ifdef JEMALLOC_FILL
@@ -1898,7 +1857,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
#endif
arena_run_reg_dalloc(run, ptr);
- if (run->nfree == bin->nregs) {
+ if (run->nfree == bin_info->nregs) {
arena_dissociate_bin_run(chunk, run, bin);
arena_dalloc_bin_run(arena, chunk, run, bin);
} else if (run->nfree == 1 && run != bin->runcur)
@@ -2132,7 +2091,7 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra,
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
arena = chunk->arena;
- assert(arena->magic == ARENA_MAGIC);
+ dassert(arena->magic == ARENA_MAGIC);
if (psize < oldsize) {
#ifdef JEMALLOC_FILL
@@ -2170,11 +2129,11 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
*/
if (oldsize <= arena_maxclass) {
if (oldsize <= small_maxclass) {
- assert(choose_arena()->bins[small_size2bin[
- oldsize]].reg_size == oldsize);
+ assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size
+ == oldsize);
if ((size + extra <= small_maxclass &&
- small_size2bin[size + extra] ==
- small_size2bin[oldsize]) || (size <= oldsize &&
+ SMALL_SIZE2BIN(size + extra) ==
+ SMALL_SIZE2BIN(oldsize)) || (size <= oldsize &&
size + extra >= oldsize)) {
#ifdef JEMALLOC_FILL
if (opt_junk && size < oldsize) {
@@ -2210,24 +2169,29 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
if (ret != NULL)
return (ret);
-
/*
* size and oldsize are different enough that we need to move the
* object. In that case, fall back to allocating new space and
* copying.
*/
- if (alignment != 0)
- ret = ipalloc(size + extra, alignment, zero);
- else
+ if (alignment != 0) {
+ size_t usize = sa2u(size + extra, alignment, NULL);
+ if (usize == 0)
+ return (NULL);
+ ret = ipalloc(usize, alignment, zero);
+ } else
ret = arena_malloc(size + extra, zero);
if (ret == NULL) {
if (extra == 0)
return (NULL);
/* Try again, this time without extra. */
- if (alignment != 0)
- ret = ipalloc(size, alignment, zero);
- else
+ if (alignment != 0) {
+ size_t usize = sa2u(size, alignment, NULL);
+ if (usize == 0)
+ return (NULL);
+ ret = ipalloc(usize, alignment, zero);
+ } else
ret = arena_malloc(size, zero);
if (ret == NULL)
@@ -2251,9 +2215,9 @@ arena_new(arena_t *arena, unsigned ind)
{
unsigned i;
arena_bin_t *bin;
- size_t prev_run_size;
arena->ind = ind;
+ arena->nthreads = 0;
if (malloc_mutex_init(&arena->lock))
return (true);
@@ -2287,8 +2251,6 @@ arena_new(arena_t *arena, unsigned ind)
arena_avail_tree_new(&arena->runs_avail_dirty);
/* Initialize bins. */
- prev_run_size = PAGE_SIZE;
-
i = 0;
#ifdef JEMALLOC_TINY
/* (2^n)-spaced tiny bins. */
@@ -2298,11 +2260,6 @@ arena_new(arena_t *arena, unsigned ind)
return (true);
bin->runcur = NULL;
arena_run_tree_new(&bin->runs);
-
- bin->reg_size = (1U << (LG_TINY_MIN + i));
-
- prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
-
#ifdef JEMALLOC_STATS
memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
#endif
@@ -2316,11 +2273,6 @@ arena_new(arena_t *arena, unsigned ind)
return (true);
bin->runcur = NULL;
arena_run_tree_new(&bin->runs);
-
- bin->reg_size = (i - ntbins + 1) << LG_QUANTUM;
-
- prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
-
#ifdef JEMALLOC_STATS
memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
#endif
@@ -2333,12 +2285,6 @@ arena_new(arena_t *arena, unsigned ind)
return (true);
bin->runcur = NULL;
arena_run_tree_new(&bin->runs);
-
- bin->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
- LG_CACHELINE);
-
- prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
-
#ifdef JEMALLOC_STATS
memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
#endif
@@ -2351,12 +2297,6 @@ arena_new(arena_t *arena, unsigned ind)
return (true);
bin->runcur = NULL;
arena_run_tree_new(&bin->runs);
-
- bin->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins))
- << LG_SUBPAGE);
-
- prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
-
#ifdef JEMALLOC_STATS
memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
#endif
@@ -2375,40 +2315,39 @@ small_size2bin_validate(void)
{
size_t i, size, binind;
- assert(small_size2bin[0] == 0xffU);
i = 1;
# ifdef JEMALLOC_TINY
/* Tiny. */
for (; i < (1U << LG_TINY_MIN); i++) {
size = pow2_ceil(1U << LG_TINY_MIN);
binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
- assert(small_size2bin[i] == binind);
+ assert(SMALL_SIZE2BIN(i) == binind);
}
for (; i < qspace_min; i++) {
size = pow2_ceil(i);
binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
- assert(small_size2bin[i] == binind);
+ assert(SMALL_SIZE2BIN(i) == binind);
}
# endif
/* Quantum-spaced. */
for (; i <= qspace_max; i++) {
size = QUANTUM_CEILING(i);
binind = ntbins + (size >> LG_QUANTUM) - 1;
- assert(small_size2bin[i] == binind);
+ assert(SMALL_SIZE2BIN(i) == binind);
}
/* Cacheline-spaced. */
for (; i <= cspace_max; i++) {
size = CACHELINE_CEILING(i);
binind = ntbins + nqbins + ((size - cspace_min) >>
LG_CACHELINE);
- assert(small_size2bin[i] == binind);
+ assert(SMALL_SIZE2BIN(i) == binind);
}
/* Sub-page. */
for (; i <= sspace_max; i++) {
size = SUBPAGE_CEILING(i);
binind = ntbins + nqbins + ncbins + ((size - sspace_min)
>> LG_SUBPAGE);
- assert(small_size2bin[i] == binind);
+ assert(SMALL_SIZE2BIN(i) == binind);
}
}
#endif
@@ -2419,12 +2358,12 @@ small_size2bin_init(void)
if (opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
|| opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
- || sizeof(const_small_size2bin) != small_maxclass + 1)
+ || (sizeof(const_small_size2bin) != ((small_maxclass-1) >>
+ LG_TINY_MIN) + 1))
return (small_size2bin_init_hard());
small_size2bin = const_small_size2bin;
#ifdef JEMALLOC_DEBUG
- assert(sizeof(const_small_size2bin) == small_maxclass + 1);
small_size2bin_validate();
#endif
return (false);
@@ -2435,49 +2374,52 @@ small_size2bin_init_hard(void)
{
size_t i, size, binind;
uint8_t *custom_small_size2bin;
+#define CUSTOM_SMALL_SIZE2BIN(s) \
+ custom_small_size2bin[(s-1) >> LG_TINY_MIN]
assert(opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
|| opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
- || sizeof(const_small_size2bin) != small_maxclass + 1);
+ || (sizeof(const_small_size2bin) != ((small_maxclass-1) >>
+ LG_TINY_MIN) + 1));
- custom_small_size2bin = (uint8_t *)base_alloc(small_maxclass + 1);
+ custom_small_size2bin = (uint8_t *)
+ base_alloc(small_maxclass >> LG_TINY_MIN);
if (custom_small_size2bin == NULL)
return (true);
- custom_small_size2bin[0] = 0xffU;
i = 1;
#ifdef JEMALLOC_TINY
/* Tiny. */
- for (; i < (1U << LG_TINY_MIN); i++) {
+ for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) {
size = pow2_ceil(1U << LG_TINY_MIN);
binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
- custom_small_size2bin[i] = binind;
+ CUSTOM_SMALL_SIZE2BIN(i) = binind;
}
- for (; i < qspace_min; i++) {
+ for (; i < qspace_min; i += TINY_MIN) {
size = pow2_ceil(i);
binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
- custom_small_size2bin[i] = binind;
+ CUSTOM_SMALL_SIZE2BIN(i) = binind;
}
#endif
/* Quantum-spaced. */
- for (; i <= qspace_max; i++) {
+ for (; i <= qspace_max; i += TINY_MIN) {
size = QUANTUM_CEILING(i);
binind = ntbins + (size >> LG_QUANTUM) - 1;
- custom_small_size2bin[i] = binind;
+ CUSTOM_SMALL_SIZE2BIN(i) = binind;
}
/* Cacheline-spaced. */
- for (; i <= cspace_max; i++) {
+ for (; i <= cspace_max; i += TINY_MIN) {
size = CACHELINE_CEILING(i);
binind = ntbins + nqbins + ((size - cspace_min) >>
LG_CACHELINE);
- custom_small_size2bin[i] = binind;
+ CUSTOM_SMALL_SIZE2BIN(i) = binind;
}
/* Sub-page. */
- for (; i <= sspace_max; i++) {
+ for (; i <= sspace_max; i += TINY_MIN) {
size = SUBPAGE_CEILING(i);
binind = ntbins + nqbins + ncbins + ((size - sspace_min) >>
LG_SUBPAGE);
- custom_small_size2bin[i] = binind;
+ CUSTOM_SMALL_SIZE2BIN(i) = binind;
}
small_size2bin = custom_small_size2bin;
@@ -2485,6 +2427,190 @@ small_size2bin_init_hard(void)
small_size2bin_validate();
#endif
return (false);
+#undef CUSTOM_SMALL_SIZE2BIN
+}
+
+/*
+ * Calculate bin_info->run_size such that it meets the following constraints:
+ *
+ * *) bin_info->run_size >= min_run_size
+ * *) bin_info->run_size <= arena_maxclass
+ * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
+ * *) bin_info->nregs <= RUN_MAXREGS
+ *
+ * bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also
+ * calculated here, since these settings are all interdependent.
+ */
+static size_t
+bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
+{
+ size_t try_run_size, good_run_size;
+ uint32_t try_nregs, good_nregs;
+ uint32_t try_hdr_size, good_hdr_size;
+ uint32_t try_bitmap_offset, good_bitmap_offset;
+#ifdef JEMALLOC_PROF
+ uint32_t try_ctx0_offset, good_ctx0_offset;
+#endif
+ uint32_t try_reg0_offset, good_reg0_offset;
+
+ assert(min_run_size >= PAGE_SIZE);
+ assert(min_run_size <= arena_maxclass);
+
+ /*
+ * Calculate known-valid settings before entering the run_size
+ * expansion loop, so that the first part of the loop always copies
+ * valid settings.
+ *
+ * The do..while loop iteratively reduces the number of regions until
+ * the run header and the regions no longer overlap. A closed formula
+ * would be quite messy, since there is an interdependency between the
+ * header's mask length and the number of regions.
+ */
+ try_run_size = min_run_size;
+ try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin_info->reg_size)
+ + 1; /* Counter-act try_nregs-- in loop. */
+ if (try_nregs > RUN_MAXREGS) {
+ try_nregs = RUN_MAXREGS
+ + 1; /* Counter-act try_nregs-- in loop. */
+ }
+ do {
+ try_nregs--;
+ try_hdr_size = sizeof(arena_run_t);
+ /* Pad to a long boundary. */
+ try_hdr_size = LONG_CEILING(try_hdr_size);
+ try_bitmap_offset = try_hdr_size;
+ /* Add space for bitmap. */
+ try_hdr_size += bitmap_size(try_nregs);
+#ifdef JEMALLOC_PROF
+ if (opt_prof && prof_promote == false) {
+ /* Pad to a quantum boundary. */
+ try_hdr_size = QUANTUM_CEILING(try_hdr_size);
+ try_ctx0_offset = try_hdr_size;
+ /* Add space for one (prof_ctx_t *) per region. */
+ try_hdr_size += try_nregs * sizeof(prof_ctx_t *);
+ } else
+ try_ctx0_offset = 0;
+#endif
+ try_reg0_offset = try_run_size - (try_nregs *
+ bin_info->reg_size);
+ } while (try_hdr_size > try_reg0_offset);
+
+ /* run_size expansion loop. */
+ do {
+ /*
+ * Copy valid settings before trying more aggressive settings.
+ */
+ good_run_size = try_run_size;
+ good_nregs = try_nregs;
+ good_hdr_size = try_hdr_size;
+ good_bitmap_offset = try_bitmap_offset;
+#ifdef JEMALLOC_PROF
+ good_ctx0_offset = try_ctx0_offset;
+#endif
+ good_reg0_offset = try_reg0_offset;
+
+ /* Try more aggressive settings. */
+ try_run_size += PAGE_SIZE;
+ try_nregs = ((try_run_size - sizeof(arena_run_t)) /
+ bin_info->reg_size)
+ + 1; /* Counter-act try_nregs-- in loop. */
+ if (try_nregs > RUN_MAXREGS) {
+ try_nregs = RUN_MAXREGS
+ + 1; /* Counter-act try_nregs-- in loop. */
+ }
+ do {
+ try_nregs--;
+ try_hdr_size = sizeof(arena_run_t);
+ /* Pad to a long boundary. */
+ try_hdr_size = LONG_CEILING(try_hdr_size);
+ try_bitmap_offset = try_hdr_size;
+ /* Add space for bitmap. */
+ try_hdr_size += bitmap_size(try_nregs);
+#ifdef JEMALLOC_PROF
+ if (opt_prof && prof_promote == false) {
+ /* Pad to a quantum boundary. */
+ try_hdr_size = QUANTUM_CEILING(try_hdr_size);
+ try_ctx0_offset = try_hdr_size;
+ /*
+ * Add space for one (prof_ctx_t *) per region.
+ */
+ try_hdr_size += try_nregs *
+ sizeof(prof_ctx_t *);
+ }
+#endif
+ try_reg0_offset = try_run_size - (try_nregs *
+ bin_info->reg_size);
+ } while (try_hdr_size > try_reg0_offset);
+ } while (try_run_size <= arena_maxclass
+ && try_run_size <= arena_maxclass
+ && RUN_MAX_OVRHD * (bin_info->reg_size << 3) > RUN_MAX_OVRHD_RELAX
+ && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
+ && try_nregs < RUN_MAXREGS);
+
+ assert(good_hdr_size <= good_reg0_offset);
+
+ /* Copy final settings. */
+ bin_info->run_size = good_run_size;
+ bin_info->nregs = good_nregs;
+ bin_info->bitmap_offset = good_bitmap_offset;
+#ifdef JEMALLOC_PROF
+ bin_info->ctx0_offset = good_ctx0_offset;
+#endif
+ bin_info->reg0_offset = good_reg0_offset;
+
+ return (good_run_size);
+}
+
+static bool
+bin_info_init(void)
+{
+ arena_bin_info_t *bin_info;
+ unsigned i;
+ size_t prev_run_size;
+
+ arena_bin_info = base_alloc(sizeof(arena_bin_info_t) * nbins);
+ if (arena_bin_info == NULL)
+ return (true);
+
+ prev_run_size = PAGE_SIZE;
+ i = 0;
+#ifdef JEMALLOC_TINY
+ /* (2^n)-spaced tiny bins. */
+ for (; i < ntbins; i++) {
+ bin_info = &arena_bin_info[i];
+ bin_info->reg_size = (1U << (LG_TINY_MIN + i));
+ prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
+ bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
+ }
+#endif
+
+ /* Quantum-spaced bins. */
+ for (; i < ntbins + nqbins; i++) {
+ bin_info = &arena_bin_info[i];
+ bin_info->reg_size = (i - ntbins + 1) << LG_QUANTUM;
+ prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
+ bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
+ }
+
+ /* Cacheline-spaced bins. */
+ for (; i < ntbins + nqbins + ncbins; i++) {
+ bin_info = &arena_bin_info[i];
+ bin_info->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
+ LG_CACHELINE);
+ prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
+ bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
+ }
+
+ /* Subpage-spaced bins. */
+ for (; i < nbins; i++) {
+ bin_info = &arena_bin_info[i];
+ bin_info->reg_size = sspace_min + ((i - (ntbins + nqbins +
+ ncbins)) << LG_SUBPAGE);
+ prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
+ bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
+ }
+
+ return (false);
}
bool
@@ -2545,9 +2671,6 @@ arena_boot(void)
abort();
}
- if (small_size2bin_init())
- return (true);
-
/*
* Compute the header size such that it is large enough to contain the
* page map. The page map is biased to omit entries for the header
@@ -2571,5 +2694,11 @@ arena_boot(void)
arena_maxclass = chunksize - (map_bias << PAGE_SHIFT);
+ if (small_size2bin_init())
+ return (true);
+
+ if (bin_info_init())
+ return (true);
+
return (false);
}
diff --git a/dep/jemalloc/src/atomic.c b/dep/jemalloc/src/atomic.c
new file mode 100644
index 00000000000..77ee313113b
--- /dev/null
+++ b/dep/jemalloc/src/atomic.c
@@ -0,0 +1,2 @@
+#define JEMALLOC_ATOMIC_C_
+#include "jemalloc/internal/jemalloc_internal.h"
diff --git a/dep/jemalloc/src/bitmap.c b/dep/jemalloc/src/bitmap.c
new file mode 100644
index 00000000000..b47e2629093
--- /dev/null
+++ b/dep/jemalloc/src/bitmap.c
@@ -0,0 +1,90 @@
+#define JEMALLOC_BITMAP_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static size_t bits2groups(size_t nbits);
+
+/******************************************************************************/
+
+static size_t
+bits2groups(size_t nbits)
+{
+
+ return ((nbits >> LG_BITMAP_GROUP_NBITS) +
+ !!(nbits & BITMAP_GROUP_NBITS_MASK));
+}
+
+void
+bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
+{
+ unsigned i;
+ size_t group_count;
+
+ assert(nbits > 0);
+ assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
+
+ /*
+ * Compute the number of groups necessary to store nbits bits, and
+ * progressively work upward through the levels until reaching a level
+ * that requires only one group.
+ */
+ binfo->levels[0].group_offset = 0;
+ group_count = bits2groups(nbits);
+ for (i = 1; group_count > 1; i++) {
+ assert(i < BITMAP_MAX_LEVELS);
+ binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+ + group_count;
+ group_count = bits2groups(group_count);
+ }
+ binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+ + group_count;
+ binfo->nlevels = i;
+ binfo->nbits = nbits;
+}
+
+size_t
+bitmap_info_ngroups(const bitmap_info_t *binfo)
+{
+
+ return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP);
+}
+
+size_t
+bitmap_size(size_t nbits)
+{
+ bitmap_info_t binfo;
+
+ bitmap_info_init(&binfo, nbits);
+ return (bitmap_info_ngroups(&binfo));
+}
+
+void
+bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
+{
+ size_t extra;
+ unsigned i;
+
+ /*
+ * Bits are actually inverted with regard to the external bitmap
+ * interface, so the bitmap starts out with all 1 bits, except for
+ * trailing unused bits (if any). Note that each group uses bit 0 to
+ * correspond to the first logical bit in the group, so extra bits
+ * are the most significant bits of the last group.
+ */
+ memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset <<
+ LG_SIZEOF_BITMAP);
+ extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
+ & BITMAP_GROUP_NBITS_MASK;
+ if (extra != 0)
+ bitmap[binfo->levels[1].group_offset - 1] >>= extra;
+ for (i = 1; i < binfo->nlevels; i++) {
+ size_t group_count = binfo->levels[i].group_offset -
+ binfo->levels[i-1].group_offset;
+ extra = (BITMAP_GROUP_NBITS - (group_count &
+ BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK;
+ if (extra != 0)
+ bitmap[binfo->levels[i+1].group_offset - 1] >>= extra;
+ }
+}
diff --git a/dep/jemalloc/src/chunk.c b/dep/jemalloc/src/chunk.c
index 301519e8042..d190c6f49b3 100644
--- a/dep/jemalloc/src/chunk.c
+++ b/dep/jemalloc/src/chunk.c
@@ -70,7 +70,7 @@ RETURN:
#ifdef JEMALLOC_IVSALLOC
if (base == false && ret != NULL) {
if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) {
- chunk_dealloc(ret, size);
+ chunk_dealloc(ret, size, true);
return (NULL);
}
}
@@ -108,7 +108,7 @@ RETURN:
}
void
-chunk_dealloc(void *chunk, size_t size)
+chunk_dealloc(void *chunk, size_t size, bool unmap)
{
assert(chunk != NULL);
@@ -125,15 +125,17 @@ chunk_dealloc(void *chunk, size_t size)
malloc_mutex_unlock(&chunks_mtx);
#endif
+ if (unmap) {
#ifdef JEMALLOC_SWAP
- if (swap_enabled && chunk_dealloc_swap(chunk, size) == false)
- return;
+ if (swap_enabled && chunk_dealloc_swap(chunk, size) == false)
+ return;
#endif
#ifdef JEMALLOC_DSS
- if (chunk_dealloc_dss(chunk, size) == false)
- return;
+ if (chunk_dealloc_dss(chunk, size) == false)
+ return;
#endif
- chunk_dealloc_mmap(chunk, size);
+ chunk_dealloc_mmap(chunk, size);
+ }
}
bool
diff --git a/dep/jemalloc/src/chunk_mmap.c b/dep/jemalloc/src/chunk_mmap.c
index bc367559774..164e86e7b38 100644
--- a/dep/jemalloc/src/chunk_mmap.c
+++ b/dep/jemalloc/src/chunk_mmap.c
@@ -206,13 +206,15 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve)
void *
chunk_alloc_mmap(size_t size)
{
- return chunk_alloc_mmap_internal(size, false);
+
+ return (chunk_alloc_mmap_internal(size, false));
}
void *
chunk_alloc_mmap_noreserve(size_t size)
{
- return chunk_alloc_mmap_internal(size, true);
+
+ return (chunk_alloc_mmap_internal(size, true));
}
void
diff --git a/dep/jemalloc/src/ckh.c b/dep/jemalloc/src/ckh.c
index 682a8db65bf..43fcc25239d 100644
--- a/dep/jemalloc/src/ckh.c
+++ b/dep/jemalloc/src/ckh.c
@@ -34,7 +34,7 @@
* respectively.
*
******************************************************************************/
-#define CKH_C_
+#define JEMALLOC_CKH_C_
#include "jemalloc/internal/jemalloc_internal.h"
/******************************************************************************/
@@ -73,7 +73,7 @@ ckh_isearch(ckh_t *ckh, const void *key)
size_t hash1, hash2, bucket, cell;
assert(ckh != NULL);
- assert(ckh->magic = CKH_MAGIG);
+ dassert(ckh->magic == CKH_MAGIC);
ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
@@ -262,9 +262,15 @@ ckh_grow(ckh_t *ckh)
lg_prevbuckets = ckh->lg_curbuckets;
lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS;
while (true) {
+ size_t usize;
+
lg_curcells++;
- tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells,
- ZU(1) << LG_CACHELINE, true);
+ usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL);
+ if (usize == 0) {
+ ret = true;
+ goto RETURN;
+ }
+ tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
if (tab == NULL) {
ret = true;
goto RETURN;
@@ -295,7 +301,7 @@ static void
ckh_shrink(ckh_t *ckh)
{
ckhc_t *tab, *ttab;
- size_t lg_curcells;
+ size_t lg_curcells, usize;
unsigned lg_prevbuckets;
/*
@@ -304,8 +310,10 @@ ckh_shrink(ckh_t *ckh)
*/
lg_prevbuckets = ckh->lg_curbuckets;
lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
- tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells,
- ZU(1) << LG_CACHELINE, true);
+ usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL);
+ if (usize == 0)
+ return;
+ tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
if (tab == NULL) {
/*
* An OOM error isn't worth propagating, since it doesn't
@@ -340,7 +348,7 @@ bool
ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
{
bool ret;
- size_t mincells;
+ size_t mincells, usize;
unsigned lg_mincells;
assert(minitems > 0);
@@ -375,15 +383,19 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
ckh->hash = hash;
ckh->keycomp = keycomp;
- ckh->tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_mincells,
- (ZU(1) << LG_CACHELINE), true);
+ usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE, NULL);
+ if (usize == 0) {
+ ret = true;
+ goto RETURN;
+ }
+ ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
if (ckh->tab == NULL) {
ret = true;
goto RETURN;
}
#ifdef JEMALLOC_DEBUG
- ckh->magic = CKH_MAGIG;
+ ckh->magic = CKH_MAGIC;
#endif
ret = false;
@@ -396,7 +408,7 @@ ckh_delete(ckh_t *ckh)
{
assert(ckh != NULL);
- assert(ckh->magic = CKH_MAGIG);
+ dassert(ckh->magic == CKH_MAGIC);
#ifdef CKH_VERBOSE
malloc_printf(
@@ -421,7 +433,7 @@ ckh_count(ckh_t *ckh)
{
assert(ckh != NULL);
- assert(ckh->magic = CKH_MAGIG);
+ dassert(ckh->magic == CKH_MAGIC);
return (ckh->count);
}
@@ -452,7 +464,7 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data)
bool ret;
assert(ckh != NULL);
- assert(ckh->magic = CKH_MAGIG);
+ dassert(ckh->magic == CKH_MAGIC);
assert(ckh_search(ckh, key, NULL, NULL));
#ifdef CKH_COUNT
@@ -477,7 +489,7 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data)
size_t cell;
assert(ckh != NULL);
- assert(ckh->magic = CKH_MAGIG);
+ dassert(ckh->magic == CKH_MAGIC);
cell = ckh_isearch(ckh, searchkey);
if (cell != SIZE_T_MAX) {
@@ -509,7 +521,7 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
size_t cell;
assert(ckh != NULL);
- assert(ckh->magic = CKH_MAGIG);
+ dassert(ckh->magic == CKH_MAGIC);
cell = ckh_isearch(ckh, searchkey);
if (cell != SIZE_T_MAX) {
@@ -544,7 +556,7 @@ ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
} else {
ret1 = h;
ret2 = hash(key, strlen((const char *)key),
- 0x8432a476666bbc13U);
+ 0x8432a476666bbc13LLU);
}
*hash1 = ret1;
diff --git a/dep/jemalloc/src/ctl.c b/dep/jemalloc/src/ctl.c
index 3c8adab90a3..e5336d36949 100644
--- a/dep/jemalloc/src/ctl.c
+++ b/dep/jemalloc/src/ctl.c
@@ -182,6 +182,7 @@ CTL_PROTO(stats_arenas_i_lruns_j_highruns)
CTL_PROTO(stats_arenas_i_lruns_j_curruns)
INDEX_PROTO(stats_arenas_i_lruns_j)
#endif
+CTL_PROTO(stats_arenas_i_nthreads)
CTL_PROTO(stats_arenas_i_pactive)
CTL_PROTO(stats_arenas_i_pdirty)
#ifdef JEMALLOC_STATS
@@ -192,6 +193,7 @@ CTL_PROTO(stats_arenas_i_purged)
#endif
INDEX_PROTO(stats_arenas_i)
#ifdef JEMALLOC_STATS
+CTL_PROTO(stats_cactive)
CTL_PROTO(stats_allocated)
CTL_PROTO(stats_active)
CTL_PROTO(stats_mapped)
@@ -434,6 +436,7 @@ static const ctl_node_t stats_arenas_i_lruns_node[] = {
#endif
static const ctl_node_t stats_arenas_i_node[] = {
+ {NAME("nthreads"), CTL(stats_arenas_i_nthreads)},
{NAME("pactive"), CTL(stats_arenas_i_pactive)},
{NAME("pdirty"), CTL(stats_arenas_i_pdirty)}
#ifdef JEMALLOC_STATS
@@ -458,6 +461,7 @@ static const ctl_node_t stats_arenas_node[] = {
static const ctl_node_t stats_node[] = {
#ifdef JEMALLOC_STATS
+ {NAME("cactive"), CTL(stats_cactive)},
{NAME("allocated"), CTL(stats_allocated)},
{NAME("active"), CTL(stats_active)},
{NAME("mapped"), CTL(stats_mapped)},
@@ -620,6 +624,7 @@ ctl_arena_refresh(arena_t *arena, unsigned i)
ctl_arena_clear(astats);
+ sstats->nthreads += astats->nthreads;
#ifdef JEMALLOC_STATS
ctl_arena_stats_amerge(astats, arena);
/* Merge into sum stats as well. */
@@ -657,10 +662,17 @@ ctl_refresh(void)
* Clear sum stats, since they will be merged into by
* ctl_arena_refresh().
*/
+ ctl_stats.arenas[narenas].nthreads = 0;
ctl_arena_clear(&ctl_stats.arenas[narenas]);
malloc_mutex_lock(&arenas_lock);
memcpy(tarenas, arenas, sizeof(arena_t *) * narenas);
+ for (i = 0; i < narenas; i++) {
+ if (arenas[i] != NULL)
+ ctl_stats.arenas[i].nthreads = arenas[i]->nthreads;
+ else
+ ctl_stats.arenas[i].nthreads = 0;
+ }
malloc_mutex_unlock(&arenas_lock);
for (i = 0; i < narenas; i++) {
bool initialized = (tarenas[i] != NULL);
@@ -1114,8 +1126,8 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
unsigned newind, oldind;
newind = oldind = choose_arena()->ind;
- WRITE(oldind, unsigned);
- READ(newind, unsigned);
+ WRITE(newind, unsigned);
+ READ(oldind, unsigned);
if (newind != oldind) {
arena_t *arena;
@@ -1129,6 +1141,8 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
malloc_mutex_lock(&arenas_lock);
if ((arena = arenas[newind]) == NULL)
arena = arenas_extend(newind);
+ arenas[oldind]->nthreads--;
+ arenas[newind]->nthreads++;
malloc_mutex_unlock(&arenas_lock);
if (arena == NULL) {
ret = EAGAIN;
@@ -1137,6 +1151,13 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
/* Set new arena association. */
ARENA_SET(arena);
+#ifdef JEMALLOC_TCACHE
+ {
+ tcache_t *tcache = TCACHE_GET();
+ if (tcache != NULL)
+ tcache->arena = arena;
+ }
+#endif
}
ret = 0;
@@ -1146,9 +1167,9 @@ RETURN:
#ifdef JEMALLOC_STATS
CTL_RO_NL_GEN(thread_allocated, ALLOCATED_GET(), uint64_t);
-CTL_RO_NL_GEN(thread_allocatedp, &ALLOCATED_GET(), uint64_t *);
+CTL_RO_NL_GEN(thread_allocatedp, ALLOCATEDP_GET(), uint64_t *);
CTL_RO_NL_GEN(thread_deallocated, DEALLOCATED_GET(), uint64_t);
-CTL_RO_NL_GEN(thread_deallocatedp, &DEALLOCATED_GET(), uint64_t *);
+CTL_RO_NL_GEN(thread_deallocatedp, DEALLOCATEDP_GET(), uint64_t *);
#endif
/******************************************************************************/
@@ -1284,9 +1305,9 @@ CTL_RO_NL_GEN(opt_overcommit, opt_overcommit, bool)
/******************************************************************************/
-CTL_RO_NL_GEN(arenas_bin_i_size, arenas[0]->bins[mib[2]].reg_size, size_t)
-CTL_RO_NL_GEN(arenas_bin_i_nregs, arenas[0]->bins[mib[2]].nregs, uint32_t)
-CTL_RO_NL_GEN(arenas_bin_i_run_size, arenas[0]->bins[mib[2]].run_size, size_t)
+CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t)
+CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
+CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t)
const ctl_node_t *
arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
{
@@ -1531,6 +1552,7 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j)
}
#endif
+CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned)
CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
#ifdef JEMALLOC_STATS
@@ -1562,6 +1584,7 @@ RETURN:
}
#ifdef JEMALLOC_STATS
+CTL_RO_GEN(stats_cactive, &stats_cactive, size_t *)
CTL_RO_GEN(stats_allocated, ctl_stats.allocated, size_t)
CTL_RO_GEN(stats_active, ctl_stats.active, size_t)
CTL_RO_GEN(stats_mapped, ctl_stats.mapped, size_t)
diff --git a/dep/jemalloc/src/hash.c b/dep/jemalloc/src/hash.c
index 6a13d7a03c0..cfa4da0275c 100644
--- a/dep/jemalloc/src/hash.c
+++ b/dep/jemalloc/src/hash.c
@@ -1,2 +1,2 @@
-#define HASH_C_
+#define JEMALLOC_HASH_C_
#include "jemalloc/internal/jemalloc_internal.h"
diff --git a/dep/jemalloc/src/huge.c b/dep/jemalloc/src/huge.c
index 0aadc4339a9..a4f9b054ed5 100644
--- a/dep/jemalloc/src/huge.c
+++ b/dep/jemalloc/src/huge.c
@@ -50,6 +50,7 @@ huge_malloc(size_t size, bool zero)
malloc_mutex_lock(&huge_mtx);
extent_tree_ad_insert(&huge, node);
#ifdef JEMALLOC_STATS
+ stats_cactive_add(csize);
huge_nmalloc++;
huge_allocated += csize;
#endif
@@ -83,7 +84,7 @@ huge_palloc(size_t size, size_t alignment, bool zero)
* alignment, in order to assure the alignment can be achieved, then
* unmap leading and trailing chunks.
*/
- assert(alignment >= chunksize);
+ assert(alignment > chunksize);
chunk_size = CHUNK_CEILING(size);
@@ -109,12 +110,12 @@ huge_palloc(size_t size, size_t alignment, bool zero)
if (offset == 0) {
/* Trim trailing space. */
chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
- - chunk_size);
+ - chunk_size, true);
} else {
size_t trailsize;
/* Trim leading space. */
- chunk_dealloc(ret, alignment - offset);
+ chunk_dealloc(ret, alignment - offset, true);
ret = (void *)((uintptr_t)ret + (alignment - offset));
@@ -123,7 +124,7 @@ huge_palloc(size_t size, size_t alignment, bool zero)
/* Trim trailing space. */
assert(trailsize < alloc_size);
chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
- trailsize);
+ trailsize, true);
}
}
@@ -134,6 +135,7 @@ huge_palloc(size_t size, size_t alignment, bool zero)
malloc_mutex_lock(&huge_mtx);
extent_tree_ad_insert(&huge, node);
#ifdef JEMALLOC_STATS
+ stats_cactive_add(chunk_size);
huge_nmalloc++;
huge_allocated += chunk_size;
#endif
@@ -192,7 +194,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
* different size class. In that case, fall back to allocating new
* space and copying.
*/
- if (alignment != 0)
+ if (alignment > chunksize)
ret = huge_palloc(size + extra, alignment, zero);
else
ret = huge_malloc(size + extra, zero);
@@ -201,7 +203,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
if (extra == 0)
return (NULL);
/* Try again, this time without extra. */
- if (alignment != 0)
+ if (alignment > chunksize)
ret = huge_palloc(size, alignment, zero);
else
ret = huge_malloc(size, zero);
@@ -232,6 +234,13 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
) {
size_t newsize = huge_salloc(ret);
+ /*
+ * Remove ptr from the tree of huge allocations before
+ * performing the remap operation, in order to avoid the
+ * possibility of another thread acquiring that mapping before
+ * this one removes it from the tree.
+ */
+ huge_dalloc(ptr, false);
if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED,
ret) == MAP_FAILED) {
/*
@@ -251,9 +260,8 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
if (opt_abort)
abort();
memcpy(ret, ptr, copysize);
- idalloc(ptr);
- } else
- huge_dalloc(ptr, false);
+ chunk_dealloc_mmap(ptr, oldsize);
+ }
} else
#endif
{
@@ -278,6 +286,7 @@ huge_dalloc(void *ptr, bool unmap)
extent_tree_ad_remove(&huge, node);
#ifdef JEMALLOC_STATS
+ stats_cactive_sub(node->size);
huge_ndalloc++;
huge_allocated -= node->size;
#endif
@@ -292,9 +301,10 @@ huge_dalloc(void *ptr, bool unmap)
memset(node->addr, 0x5a, node->size);
#endif
#endif
- chunk_dealloc(node->addr, node->size);
}
+ chunk_dealloc(node->addr, node->size, unmap);
+
base_node_dealloc(node);
}
diff --git a/dep/jemalloc/src/jemalloc.c b/dep/jemalloc/src/jemalloc.c
index 2aebc51dd19..a161c2e26e1 100644
--- a/dep/jemalloc/src/jemalloc.c
+++ b/dep/jemalloc/src/jemalloc.c
@@ -7,12 +7,10 @@
malloc_mutex_t arenas_lock;
arena_t **arenas;
unsigned narenas;
-static unsigned next_arena;
+pthread_key_t arenas_tsd;
#ifndef NO_TLS
__thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
-#else
-pthread_key_t arenas_tsd;
#endif
#ifdef JEMALLOC_STATS
@@ -30,7 +28,13 @@ static bool malloc_initialized = false;
static pthread_t malloc_initializer = (unsigned long)0;
/* Used to avoid initialization races. */
-static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER;
+static malloc_mutex_t init_lock =
+#ifdef JEMALLOC_OSSPIN
+ 0
+#else
+ MALLOC_MUTEX_INITIALIZER
+#endif
+ ;
#ifdef DYNAMIC_PAGE_SHIFT
size_t pagesize;
@@ -70,6 +74,7 @@ size_t opt_narenas = 0;
static void wrtmessage(void *cbopaque, const char *s);
static void stats_print_atexit(void);
static unsigned malloc_ncpus(void);
+static void arenas_cleanup(void *arg);
#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
static void thread_allocated_cleanup(void *arg);
#endif
@@ -79,6 +84,7 @@ static void malloc_conf_error(const char *msg, const char *k, size_t klen,
const char *v, size_t vlen);
static void malloc_conf_init(void);
static bool malloc_init_hard(void);
+static int imemalign(void **memptr, size_t alignment, size_t size);
/******************************************************************************/
/* malloc_message() setup. */
@@ -147,13 +153,53 @@ choose_arena_hard(void)
arena_t *ret;
if (narenas > 1) {
+ unsigned i, choose, first_null;
+
+ choose = 0;
+ first_null = narenas;
malloc_mutex_lock(&arenas_lock);
- if ((ret = arenas[next_arena]) == NULL)
- ret = arenas_extend(next_arena);
- next_arena = (next_arena + 1) % narenas;
+ assert(arenas[0] != NULL);
+ for (i = 1; i < narenas; i++) {
+ if (arenas[i] != NULL) {
+ /*
+ * Choose the first arena that has the lowest
+ * number of threads assigned to it.
+ */
+ if (arenas[i]->nthreads <
+ arenas[choose]->nthreads)
+ choose = i;
+ } else if (first_null == narenas) {
+ /*
+ * Record the index of the first uninitialized
+ * arena, in case all extant arenas are in use.
+ *
+ * NB: It is possible for there to be
+ * discontinuities in terms of initialized
+ * versus uninitialized arenas, due to the
+ * "thread.arena" mallctl.
+ */
+ first_null = i;
+ }
+ }
+
+ if (arenas[choose] == 0 || first_null == narenas) {
+ /*
+ * Use an unloaded arena, or the least loaded arena if
+ * all arenas are already initialized.
+ */
+ ret = arenas[choose];
+ } else {
+ /* Initialize a new arena. */
+ ret = arenas_extend(first_null);
+ }
+ ret->nthreads++;
malloc_mutex_unlock(&arenas_lock);
- } else
+ } else {
ret = arenas[0];
+ malloc_mutex_lock(&arenas_lock);
+ ret->nthreads++;
+ malloc_mutex_unlock(&arenas_lock);
+ }
ARENA_SET(ret);
@@ -213,6 +259,28 @@ stats_print_atexit(void)
JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL);
}
+#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
+thread_allocated_t *
+thread_allocated_get_hard(void)
+{
+ thread_allocated_t *thread_allocated = (thread_allocated_t *)
+ imalloc(sizeof(thread_allocated_t));
+ if (thread_allocated == NULL) {
+ static thread_allocated_t static_thread_allocated = {0, 0};
+ malloc_write("<jemalloc>: Error allocating TSD;"
+ " mallctl(\"thread.{de,}allocated[p]\", ...)"
+ " will be inaccurate\n");
+ if (opt_abort)
+ abort();
+ return (&static_thread_allocated);
+ }
+ pthread_setspecific(thread_allocated_tsd, thread_allocated);
+ thread_allocated->allocated = 0;
+ thread_allocated->deallocated = 0;
+ return (thread_allocated);
+}
+#endif
+
/*
* End miscellaneous support functions.
*/
@@ -237,6 +305,16 @@ malloc_ncpus(void)
return (ret);
}
+static void
+arenas_cleanup(void *arg)
+{
+ arena_t *arena = (arena_t *)arg;
+
+ malloc_mutex_lock(&arenas_lock);
+ arena->nthreads--;
+ malloc_mutex_unlock(&arenas_lock);
+}
+
#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
static void
thread_allocated_cleanup(void *arg)
@@ -421,8 +499,8 @@ malloc_conf_init(void)
if ((opts = getenv(envname)) != NULL) {
/*
* Do nothing; opts is already initialized to
- * the value of the JEMALLOC_OPTIONS
- * environment variable.
+ * the value of the MALLOC_CONF environment
+ * variable.
*/
} else {
/* No configuration specified. */
@@ -611,7 +689,7 @@ malloc_init_hard(void)
result = sysconf(_SC_PAGESIZE);
assert(result != -1);
- pagesize = (unsigned)result;
+ pagesize = (size_t)result;
/*
* We assume that pagesize is a power of 2 when calculating
@@ -671,7 +749,10 @@ malloc_init_hard(void)
}
#ifdef JEMALLOC_TCACHE
- tcache_boot();
+ if (tcache_boot()) {
+ malloc_mutex_unlock(&init_lock);
+ return (true);
+ }
#endif
if (huge_boot()) {
@@ -688,6 +769,14 @@ malloc_init_hard(void)
}
#endif
+ if (malloc_mutex_init(&arenas_lock))
+ return (true);
+
+ if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) {
+ malloc_mutex_unlock(&init_lock);
+ return (true);
+ }
+
/*
* Create enough scaffolding to allow recursive allocation in
* malloc_ncpus().
@@ -712,8 +801,7 @@ malloc_init_hard(void)
* threaded mode.
*/
ARENA_SET(arenas[0]);
-
- malloc_mutex_init(&arenas_lock);
+ arenas[0]->nthreads++;
#ifdef JEMALLOC_PROF
if (prof_boot2()) {
@@ -753,15 +841,6 @@ malloc_init_hard(void)
malloc_write(")\n");
}
- next_arena = (narenas > 0) ? 1 : 0;
-
-#ifdef NO_TLS
- if (pthread_key_create(&arenas_tsd, NULL) != 0) {
- malloc_mutex_unlock(&init_lock);
- return (true);
- }
-#endif
-
/* Allocate and initialize arenas. */
arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
if (arenas == NULL) {
@@ -793,7 +872,6 @@ malloc_init_hard(void)
return (false);
}
-
#ifdef JEMALLOC_ZONE
JEMALLOC_ATTR(constructor)
void
@@ -862,7 +940,8 @@ JEMALLOC_P(malloc)(size_t size)
#ifdef JEMALLOC_PROF
if (opt_prof) {
usize = s2u(size);
- if ((cnt = prof_alloc_prep(usize)) == NULL) {
+ PROF_ALLOC_PREP(1, usize, cnt);
+ if (cnt == NULL) {
ret = NULL;
goto OOM;
}
@@ -911,19 +990,23 @@ RETURN:
}
JEMALLOC_ATTR(nonnull(1))
-JEMALLOC_ATTR(visibility("default"))
-int
-JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
+#ifdef JEMALLOC_PROF
+/*
+ * Avoid any uncertainty as to how many backtrace frames to ignore in
+ * PROF_ALLOC_PREP().
+ */
+JEMALLOC_ATTR(noinline)
+#endif
+static int
+imemalign(void **memptr, size_t alignment, size_t size)
{
int ret;
- void *result;
-#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
size_t usize
-# ifdef JEMALLOC_CC_SILENCE
+#ifdef JEMALLOC_CC_SILENCE
= 0
-# endif
- ;
#endif
+ ;
+ void *result;
#ifdef JEMALLOC_PROF
prof_thr_cnt_t *cnt
# ifdef JEMALLOC_CC_SILENCE
@@ -973,34 +1056,38 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
goto RETURN;
}
+ usize = sa2u(size, alignment, NULL);
+ if (usize == 0) {
+ result = NULL;
+ ret = ENOMEM;
+ goto RETURN;
+ }
+
#ifdef JEMALLOC_PROF
if (opt_prof) {
- usize = sa2u(size, alignment, NULL);
- if ((cnt = prof_alloc_prep(usize)) == NULL) {
+ PROF_ALLOC_PREP(2, usize, cnt);
+ if (cnt == NULL) {
result = NULL;
ret = EINVAL;
} else {
if (prof_promote && (uintptr_t)cnt !=
(uintptr_t)1U && usize <= small_maxclass) {
- result = ipalloc(small_maxclass+1,
- alignment, false);
+ assert(sa2u(small_maxclass+1,
+ alignment, NULL) != 0);
+ result = ipalloc(sa2u(small_maxclass+1,
+ alignment, NULL), alignment, false);
if (result != NULL) {
arena_prof_promoted(result,
usize);
}
} else {
- result = ipalloc(size, alignment,
+ result = ipalloc(usize, alignment,
false);
}
}
} else
#endif
- {
-#ifdef JEMALLOC_STATS
- usize = sa2u(size, alignment, NULL);
-#endif
- result = ipalloc(size, alignment, false);
- }
+ result = ipalloc(usize, alignment, false);
}
if (result == NULL) {
@@ -1032,6 +1119,15 @@ RETURN:
return (ret);
}
+JEMALLOC_ATTR(nonnull(1))
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
+{
+
+ return imemalign(memptr, alignment, size);
+}
+
JEMALLOC_ATTR(malloc)
JEMALLOC_ATTR(visibility("default"))
void *
@@ -1087,7 +1183,8 @@ JEMALLOC_P(calloc)(size_t num, size_t size)
#ifdef JEMALLOC_PROF
if (opt_prof) {
usize = s2u(num_size);
- if ((cnt = prof_alloc_prep(usize)) == NULL) {
+ PROF_ALLOC_PREP(1, usize, cnt);
+ if (cnt == NULL) {
ret = NULL;
goto RETURN;
}
@@ -1200,7 +1297,9 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
if (opt_prof) {
usize = s2u(size);
old_ctx = prof_ctx_get(ptr);
- if ((cnt = prof_alloc_prep(usize)) == NULL) {
+ PROF_ALLOC_PREP(1, usize, cnt);
+ if (cnt == NULL) {
+ old_ctx = NULL;
ret = NULL;
goto OOM;
}
@@ -1210,8 +1309,13 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
false, false);
if (ret != NULL)
arena_prof_promoted(ret, usize);
- } else
+ else
+ old_ctx = NULL;
+ } else {
ret = iralloc(ptr, size, 0, 0, false, false);
+ if (ret == NULL)
+ old_ctx = NULL;
+ }
} else
#endif
{
@@ -1249,7 +1353,8 @@ OOM:
#ifdef JEMALLOC_PROF
if (opt_prof) {
usize = s2u(size);
- if ((cnt = prof_alloc_prep(usize)) == NULL)
+ PROF_ALLOC_PREP(1, usize, cnt);
+ if (cnt == NULL)
ret = NULL;
else {
if (prof_promote && (uintptr_t)cnt !=
@@ -1354,7 +1459,7 @@ JEMALLOC_P(memalign)(size_t alignment, size_t size)
#ifdef JEMALLOC_CC_SILENCE
int result =
#endif
- JEMALLOC_P(posix_memalign)(&ret, alignment, size);
+ imemalign(&ret, alignment, size);
#ifdef JEMALLOC_CC_SILENCE
if (result != 0)
return (NULL);
@@ -1373,7 +1478,7 @@ JEMALLOC_P(valloc)(size_t size)
#ifdef JEMALLOC_CC_SILENCE
int result =
#endif
- JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size);
+ imemalign(&ret, PAGE_SIZE, size);
#ifdef JEMALLOC_CC_SILENCE
if (result != 0)
return (NULL);
@@ -1454,15 +1559,18 @@ JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp,
}
JEMALLOC_INLINE void *
-iallocm(size_t size, size_t alignment, bool zero)
+iallocm(size_t usize, size_t alignment, bool zero)
{
+ assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, alignment,
+ NULL)));
+
if (alignment != 0)
- return (ipalloc(size, alignment, zero));
+ return (ipalloc(usize, alignment, zero));
else if (zero)
- return (icalloc(size));
+ return (icalloc(usize));
else
- return (imalloc(size));
+ return (imalloc(usize));
}
JEMALLOC_ATTR(nonnull(1))
@@ -1485,38 +1593,43 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags)
if (malloc_init())
goto OOM;
+ usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL);
+ if (usize == 0)
+ goto OOM;
+
#ifdef JEMALLOC_PROF
if (opt_prof) {
- usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment,
- NULL);
- if ((cnt = prof_alloc_prep(usize)) == NULL)
+ PROF_ALLOC_PREP(1, usize, cnt);
+ if (cnt == NULL)
goto OOM;
if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <=
small_maxclass) {
- p = iallocm(small_maxclass+1, alignment, zero);
+ size_t usize_promoted = (alignment == 0) ?
+ s2u(small_maxclass+1) : sa2u(small_maxclass+1,
+ alignment, NULL);
+ assert(usize_promoted != 0);
+ p = iallocm(usize_promoted, alignment, zero);
if (p == NULL)
goto OOM;
arena_prof_promoted(p, usize);
} else {
- p = iallocm(size, alignment, zero);
+ p = iallocm(usize, alignment, zero);
if (p == NULL)
goto OOM;
}
-
+ prof_malloc(p, usize, cnt);
if (rsize != NULL)
*rsize = usize;
} else
#endif
{
- p = iallocm(size, alignment, zero);
+ p = iallocm(usize, alignment, zero);
if (p == NULL)
goto OOM;
#ifndef JEMALLOC_STATS
if (rsize != NULL)
#endif
{
- usize = (alignment == 0) ? s2u(size) : sa2u(size,
- alignment, NULL);
#ifdef JEMALLOC_STATS
if (rsize != NULL)
#endif
@@ -1559,7 +1672,6 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
bool no_move = flags & ALLOCM_NO_MOVE;
#ifdef JEMALLOC_PROF
prof_thr_cnt_t *cnt;
- prof_ctx_t *old_ctx;
#endif
assert(ptr != NULL);
@@ -1574,25 +1686,33 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
/*
* usize isn't knowable before iralloc() returns when extra is
* non-zero. Therefore, compute its maximum possible value and
- * use that in prof_alloc_prep() to decide whether to capture a
+ * use that in PROF_ALLOC_PREP() to decide whether to capture a
* backtrace. prof_realloc() will use the actual usize to
* decide whether to sample.
*/
size_t max_usize = (alignment == 0) ? s2u(size+extra) :
sa2u(size+extra, alignment, NULL);
+ prof_ctx_t *old_ctx = prof_ctx_get(p);
old_size = isalloc(p);
- old_ctx = prof_ctx_get(p);
- if ((cnt = prof_alloc_prep(max_usize)) == NULL)
+ PROF_ALLOC_PREP(1, max_usize, cnt);
+ if (cnt == NULL)
goto OOM;
- if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && max_usize
- <= small_maxclass) {
+ /*
+ * Use minimum usize to determine whether promotion may happen.
+ */
+ if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U
+ && ((alignment == 0) ? s2u(size) : sa2u(size,
+ alignment, NULL)) <= small_maxclass) {
q = iralloc(p, small_maxclass+1, (small_maxclass+1 >=
size+extra) ? 0 : size+extra - (small_maxclass+1),
alignment, zero, no_move);
if (q == NULL)
goto ERR;
- usize = isalloc(q);
- arena_prof_promoted(q, usize);
+ if (max_usize < PAGE_SIZE) {
+ usize = max_usize;
+ arena_prof_promoted(q, usize);
+ } else
+ usize = isalloc(q);
} else {
q = iralloc(p, size, extra, alignment, zero, no_move);
if (q == NULL)
@@ -1600,6 +1720,8 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
usize = isalloc(q);
}
prof_realloc(q, usize, cnt, old_size, old_ctx);
+ if (rsize != NULL)
+ *rsize = usize;
} else
#endif
{
diff --git a/dep/jemalloc/src/mb.c b/dep/jemalloc/src/mb.c
index 30a1a2e997a..dc2c0a256fd 100644
--- a/dep/jemalloc/src/mb.c
+++ b/dep/jemalloc/src/mb.c
@@ -1,2 +1,2 @@
-#define MB_C_
+#define JEMALLOC_MB_C_
#include "jemalloc/internal/jemalloc_internal.h"
diff --git a/dep/jemalloc/src/mutex.c b/dep/jemalloc/src/mutex.c
index 3ecb18a340e..ca89ef1c962 100644
--- a/dep/jemalloc/src/mutex.c
+++ b/dep/jemalloc/src/mutex.c
@@ -55,6 +55,9 @@ pthread_create(pthread_t *__restrict thread,
bool
malloc_mutex_init(malloc_mutex_t *mutex)
{
+#ifdef JEMALLOC_OSSPIN
+ *mutex = 0;
+#else
pthread_mutexattr_t attr;
if (pthread_mutexattr_init(&attr) != 0)
@@ -70,6 +73,7 @@ malloc_mutex_init(malloc_mutex_t *mutex)
}
pthread_mutexattr_destroy(&attr);
+#endif
return (false);
}
@@ -77,8 +81,10 @@ void
malloc_mutex_destroy(malloc_mutex_t *mutex)
{
+#ifndef JEMALLOC_OSSPIN
if (pthread_mutex_destroy(mutex) != 0) {
malloc_write("<jemalloc>: Error in pthread_mutex_destroy()\n");
abort();
}
+#endif
}
diff --git a/dep/jemalloc/src/prof.c b/dep/jemalloc/src/prof.c
index 636cccef52a..8a144b4e46c 100644
--- a/dep/jemalloc/src/prof.c
+++ b/dep/jemalloc/src/prof.c
@@ -3,15 +3,15 @@
#ifdef JEMALLOC_PROF
/******************************************************************************/
-#ifdef JEMALLOC_PROF_LIBGCC
-#include <unwind.h>
-#endif
-
#ifdef JEMALLOC_PROF_LIBUNWIND
#define UNW_LOCAL_ONLY
#include <libunwind.h>
#endif
+#ifdef JEMALLOC_PROF_LIBGCC
+#include <unwind.h>
+#endif
+
/******************************************************************************/
/* Data. */
@@ -169,39 +169,7 @@ prof_leave(void)
prof_gdump();
}
-#ifdef JEMALLOC_PROF_LIBGCC
-static _Unwind_Reason_Code
-prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
-{
-
- return (_URC_NO_REASON);
-}
-
-static _Unwind_Reason_Code
-prof_unwind_callback(struct _Unwind_Context *context, void *arg)
-{
- prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
-
- if (data->nignore > 0)
- data->nignore--;
- else {
- data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
- data->bt->len++;
- if (data->bt->len == data->max)
- return (_URC_END_OF_STACK);
- }
-
- return (_URC_NO_REASON);
-}
-
-void
-prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
-{
- prof_unwind_data_t data = {bt, nignore, max};
-
- _Unwind_Backtrace(prof_unwind_callback, &data);
-}
-#elif defined(JEMALLOC_PROF_LIBUNWIND)
+#ifdef JEMALLOC_PROF_LIBUNWIND
void
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
{
@@ -236,7 +204,41 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
break;
}
}
-#else
+#endif
+#ifdef JEMALLOC_PROF_LIBGCC
+static _Unwind_Reason_Code
+prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
+{
+
+ return (_URC_NO_REASON);
+}
+
+static _Unwind_Reason_Code
+prof_unwind_callback(struct _Unwind_Context *context, void *arg)
+{
+ prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
+
+ if (data->nignore > 0)
+ data->nignore--;
+ else {
+ data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
+ data->bt->len++;
+ if (data->bt->len == data->max)
+ return (_URC_END_OF_STACK);
+ }
+
+ return (_URC_NO_REASON);
+}
+
+void
+prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
+{
+ prof_unwind_data_t data = {bt, nignore, max};
+
+ _Unwind_Backtrace(prof_unwind_callback, &data);
+}
+#endif
+#ifdef JEMALLOC_PROF_GCC
void
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
{
@@ -432,6 +434,7 @@ prof_lookup(prof_bt_t *bt)
prof_ctx_t *p;
void *v;
} ctx;
+ bool new_ctx;
/*
* This thread's cache lacks bt. Look for it in the global
@@ -468,12 +471,26 @@ prof_lookup(prof_bt_t *bt)
idalloc(ctx.v);
return (NULL);
}
+ /*
+ * Artificially raise curobjs, in order to avoid a race
+ * condition with prof_ctx_merge()/prof_ctx_destroy().
+ *
+ * No locking is necessary for ctx here because no other
+ * threads have had the opportunity to fetch it from
+ * bt2ctx yet.
+ */
+ ctx.p->cnt_merged.curobjs++;
+ new_ctx = true;
+ } else {
+ /*
+ * Artificially raise curobjs, in order to avoid a race
+ * condition with prof_ctx_merge()/prof_ctx_destroy().
+ */
+ malloc_mutex_lock(&ctx.p->lock);
+ ctx.p->cnt_merged.curobjs++;
+ malloc_mutex_unlock(&ctx.p->lock);
+ new_ctx = false;
}
- /*
- * Acquire ctx's lock before releasing bt2ctx_mtx, in order to
- * avoid a race condition with prof_ctx_destroy().
- */
- malloc_mutex_lock(&ctx.p->lock);
prof_leave();
/* Link a prof_thd_cnt_t into ctx for this thread. */
@@ -486,8 +503,9 @@ prof_lookup(prof_bt_t *bt)
*/
ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
assert(ret.v != NULL);
- ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL,
- NULL);
+ if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt,
+ NULL, NULL))
+ assert(false);
ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
prof_ctx_merge(ret.p->ctx, ret.p);
/* ret can now be re-used. */
@@ -498,7 +516,8 @@ prof_lookup(prof_bt_t *bt)
/* Allocate and partially initialize a new cnt. */
ret.v = imalloc(sizeof(prof_thr_cnt_t));
if (ret.p == NULL) {
- malloc_mutex_unlock(&ctx.p->lock);
+ if (new_ctx)
+ prof_ctx_destroy(ctx.p);
return (NULL);
}
ql_elm_new(ret.p, cnts_link);
@@ -509,12 +528,15 @@ prof_lookup(prof_bt_t *bt)
ret.p->epoch = 0;
memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) {
- malloc_mutex_unlock(&ctx.p->lock);
+ if (new_ctx)
+ prof_ctx_destroy(ctx.p);
idalloc(ret.v);
return (NULL);
}
ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
+ malloc_mutex_lock(&ctx.p->lock);
ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
+ ctx.p->cnt_merged.curobjs--;
malloc_mutex_unlock(&ctx.p->lock);
} else {
/* Move ret to the front of the LRU. */
@@ -628,11 +650,10 @@ prof_ctx_destroy(prof_ctx_t *ctx)
/*
* Check that ctx is still unused by any thread cache before destroying
- * it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to
- * avoid a race condition with this function, and prof_ctx_merge()
- * artificially raises ctx->cnt_merged.curobjs in order to avoid a race
- * between the main body of prof_ctx_merge() and entry into this
- * function.
+ * it. prof_lookup() artificially raises ctx->cnt_merge.curobjs in
+ * order to avoid a race condition with this function, as does
+ * prof_ctx_merge() in order to avoid a race between the main body of
+ * prof_ctx_merge() and entry into this function.
*/
prof_enter();
malloc_mutex_lock(&ctx->lock);
@@ -641,7 +662,8 @@ prof_ctx_destroy(prof_ctx_t *ctx)
assert(ctx->cnt_merged.accumobjs == 0);
assert(ctx->cnt_merged.accumbytes == 0);
/* Remove ctx from bt2ctx. */
- ckh_remove(&bt2ctx, ctx->bt, NULL, NULL);
+ if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL))
+ assert(false);
prof_leave();
/* Destroy ctx. */
malloc_mutex_unlock(&ctx->lock);
@@ -649,7 +671,10 @@ prof_ctx_destroy(prof_ctx_t *ctx)
malloc_mutex_destroy(&ctx->lock);
idalloc(ctx);
} else {
- /* Compensate for increment in prof_ctx_merge(). */
+ /*
+ * Compensate for increment in prof_ctx_merge() or
+ * prof_lookup().
+ */
ctx->cnt_merged.curobjs--;
malloc_mutex_unlock(&ctx->lock);
prof_leave();
@@ -1056,7 +1081,7 @@ prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
} else {
ret1 = h;
ret2 = hash(bt->vec, bt->len * sizeof(void *),
- 0x8432a476666bbc13U);
+ 0x8432a476666bbc13LLU);
}
*hash1 = ret1;
@@ -1093,7 +1118,6 @@ prof_tdata_init(void)
prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max);
if (prof_tdata->vec == NULL) {
-
ckh_delete(&prof_tdata->bt2cnt);
idalloc(prof_tdata);
return (NULL);
@@ -1111,33 +1135,26 @@ prof_tdata_init(void)
static void
prof_tdata_cleanup(void *arg)
{
- prof_tdata_t *prof_tdata;
+ prof_thr_cnt_t *cnt;
+ prof_tdata_t *prof_tdata = (prof_tdata_t *)arg;
- prof_tdata = PROF_TCACHE_GET();
- if (prof_tdata != NULL) {
- prof_thr_cnt_t *cnt;
-
- /*
- * Delete the hash table. All of its contents can still be
- * iterated over via the LRU.
- */
- ckh_delete(&prof_tdata->bt2cnt);
+ /*
+ * Delete the hash table. All of its contents can still be iterated
+ * over via the LRU.
+ */
+ ckh_delete(&prof_tdata->bt2cnt);
- /*
- * Iteratively merge cnt's into the global stats and delete
- * them.
- */
- while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
- prof_ctx_merge(cnt->ctx, cnt);
- ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
- idalloc(cnt);
- }
+ /* Iteratively merge cnt's into the global stats and delete them. */
+ while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
+ ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
+ prof_ctx_merge(cnt->ctx, cnt);
+ idalloc(cnt);
+ }
- idalloc(prof_tdata->vec);
+ idalloc(prof_tdata->vec);
- idalloc(prof_tdata);
- PROF_TCACHE_SET(NULL);
- }
+ idalloc(prof_tdata);
+ PROF_TCACHE_SET(NULL);
}
void
diff --git a/dep/jemalloc/src/rtree.c b/dep/jemalloc/src/rtree.c
index 7753743c5e6..eb0ff1e24af 100644
--- a/dep/jemalloc/src/rtree.c
+++ b/dep/jemalloc/src/rtree.c
@@ -1,4 +1,4 @@
-#define RTREE_C_
+#define JEMALLOC_RTREE_C_
#include "jemalloc/internal/jemalloc_internal.h"
rtree_t *
@@ -20,7 +20,10 @@ rtree_new(unsigned bits)
memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) *
height));
- malloc_mutex_init(&ret->mutex);
+ if (malloc_mutex_init(&ret->mutex)) {
+ /* Leak the rtree. */
+ return (NULL);
+ }
ret->height = height;
if (bits_per_level * height > bits)
ret->level2bits[0] = bits % bits_per_level;
diff --git a/dep/jemalloc/src/stats.c b/dep/jemalloc/src/stats.c
index 3dfe0d232a6..dc172e425c0 100644
--- a/dep/jemalloc/src/stats.c
+++ b/dep/jemalloc/src/stats.c
@@ -39,6 +39,10 @@
bool opt_stats_print = false;
+#ifdef JEMALLOC_STATS
+size_t stats_cactive = 0;
+#endif
+
/******************************************************************************/
/* Function prototypes for non-inline static functions. */
@@ -319,6 +323,7 @@ static void
stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
unsigned i)
{
+ unsigned nthreads;
size_t pagesize, pactive, pdirty, mapped;
uint64_t npurge, nmadvise, purged;
size_t small_allocated;
@@ -328,6 +333,9 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
CTL_GET("arenas.pagesize", &pagesize, size_t);
+ CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned);
+ malloc_cprintf(write_cb, cbopaque,
+ "assigned threads: %u\n", nthreads);
CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t);
CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t);
CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t);
@@ -669,21 +677,26 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
#ifdef JEMALLOC_STATS
{
int err;
- size_t ssz;
+ size_t sszp, ssz;
+ size_t *cactive;
size_t allocated, active, mapped;
size_t chunks_current, chunks_high, swap_avail;
uint64_t chunks_total;
size_t huge_allocated;
uint64_t huge_nmalloc, huge_ndalloc;
+ sszp = sizeof(size_t *);
ssz = sizeof(size_t);
+ CTL_GET("stats.cactive", &cactive, size_t *);
CTL_GET("stats.allocated", &allocated, size_t);
CTL_GET("stats.active", &active, size_t);
CTL_GET("stats.mapped", &mapped, size_t);
malloc_cprintf(write_cb, cbopaque,
- "Allocated: %zu, active: %zu, mapped: %zu\n", allocated,
- active, mapped);
+ "Allocated: %zu, active: %zu, mapped: %zu\n",
+ allocated, active, mapped);
+ malloc_cprintf(write_cb, cbopaque,
+ "Current active ceiling: %zu\n", atomic_read_z(cactive));
/* Print chunk stats. */
CTL_GET("stats.chunks.total", &chunks_total, uint64_t);
@@ -735,7 +748,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
ninitialized++;
}
- if (ninitialized > 1) {
+ if (ninitialized > 1 || unmerged == false) {
/* Print merged arena stats. */
malloc_cprintf(write_cb, cbopaque,
"\nMerged arenas stats:\n");
diff --git a/dep/jemalloc/src/tcache.c b/dep/jemalloc/src/tcache.c
index cbbe7a113a9..31c329e1613 100644
--- a/dep/jemalloc/src/tcache.c
+++ b/dep/jemalloc/src/tcache.c
@@ -8,6 +8,9 @@ bool opt_tcache = true;
ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
+tcache_bin_info_t *tcache_bin_info;
+static unsigned stack_nelms; /* Total stack elms per tcache. */
+
/* Map of thread-specific caches. */
#ifndef NO_TLS
__thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
@@ -55,18 +58,19 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
#endif
)
{
- void *flush, *deferred, *ptr;
+ void *ptr;
unsigned i, nflush, ndeferred;
- bool first_pass;
+#ifdef JEMALLOC_STATS
+ bool merged_stats = false;
+#endif
assert(binind < nbins);
assert(rem <= tbin->ncached);
- assert(tbin->ncached > 0 || tbin->avail == NULL);
- for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass =
- true; flush != NULL; flush = deferred, nflush = ndeferred) {
+ for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
/* Lock the arena bin associated with the first object. */
- arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush);
+ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
+ tbin->avail[0]);
arena_t *arena = chunk->arena;
arena_bin_t *bin = &arena->bins[binind];
@@ -82,17 +86,17 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
malloc_mutex_lock(&bin->lock);
#ifdef JEMALLOC_STATS
if (arena == tcache->arena) {
+ assert(merged_stats == false);
+ merged_stats = true;
bin->stats.nflushes++;
bin->stats.nrequests += tbin->tstats.nrequests;
tbin->tstats.nrequests = 0;
}
#endif
- deferred = NULL;
ndeferred = 0;
for (i = 0; i < nflush; i++) {
- ptr = flush;
+ ptr = tbin->avail[i];
assert(ptr != NULL);
- flush = *(void **)ptr;
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk->arena == arena) {
size_t pageind = ((uintptr_t)ptr -
@@ -107,21 +111,31 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
* locked. Stash the object, so that it can be
* handled in a future pass.
*/
- *(void **)ptr = deferred;
- deferred = ptr;
+ tbin->avail[ndeferred] = ptr;
ndeferred++;
}
}
malloc_mutex_unlock(&bin->lock);
-
- if (first_pass) {
- tbin->avail = flush;
- first_pass = false;
- }
}
+#ifdef JEMALLOC_STATS
+ if (merged_stats == false) {
+ /*
+ * The flush loop didn't happen to flush to this thread's
+ * arena, so the stats didn't get merged. Manually do so now.
+ */
+ arena_bin_t *bin = &tcache->arena->bins[binind];
+ malloc_mutex_lock(&bin->lock);
+ bin->stats.nflushes++;
+ bin->stats.nrequests += tbin->tstats.nrequests;
+ tbin->tstats.nrequests = 0;
+ malloc_mutex_unlock(&bin->lock);
+ }
+#endif
+ memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
+ rem * sizeof(void *));
tbin->ncached = rem;
- if (tbin->ncached < tbin->low_water)
+ if ((int)tbin->ncached < tbin->low_water)
tbin->low_water = tbin->ncached;
}
@@ -132,18 +146,19 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
#endif
)
{
- void *flush, *deferred, *ptr;
+ void *ptr;
unsigned i, nflush, ndeferred;
- bool first_pass;
+#ifdef JEMALLOC_STATS
+ bool merged_stats = false;
+#endif
assert(binind < nhbins);
assert(rem <= tbin->ncached);
- assert(tbin->ncached > 0 || tbin->avail == NULL);
- for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass =
- true; flush != NULL; flush = deferred, nflush = ndeferred) {
+ for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
/* Lock the arena associated with the first object. */
- arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush);
+ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
+ tbin->avail[0]);
arena_t *arena = chunk->arena;
malloc_mutex_lock(&arena->lock);
@@ -155,6 +170,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
tcache->prof_accumbytes = 0;
#endif
#ifdef JEMALLOC_STATS
+ merged_stats = true;
arena->stats.nrequests_large += tbin->tstats.nrequests;
arena->stats.lstats[binind - nbins].nrequests +=
tbin->tstats.nrequests;
@@ -163,12 +179,10 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
}
#endif
- deferred = NULL;
ndeferred = 0;
for (i = 0; i < nflush; i++) {
- ptr = flush;
+ ptr = tbin->avail[i];
assert(ptr != NULL);
- flush = *(void **)ptr;
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk->arena == arena)
arena_dalloc_large(arena, chunk, ptr);
@@ -179,21 +193,32 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
* Stash the object, so that it can be handled
* in a future pass.
*/
- *(void **)ptr = deferred;
- deferred = ptr;
+ tbin->avail[ndeferred] = ptr;
ndeferred++;
}
}
malloc_mutex_unlock(&arena->lock);
-
- if (first_pass) {
- tbin->avail = flush;
- first_pass = false;
- }
}
+#ifdef JEMALLOC_STATS
+ if (merged_stats == false) {
+ /*
+ * The flush loop didn't happen to flush to this thread's
+ * arena, so the stats didn't get merged. Manually do so now.
+ */
+ arena_t *arena = tcache->arena;
+ malloc_mutex_lock(&arena->lock);
+ arena->stats.nrequests_large += tbin->tstats.nrequests;
+ arena->stats.lstats[binind - nbins].nrequests +=
+ tbin->tstats.nrequests;
+ tbin->tstats.nrequests = 0;
+ malloc_mutex_unlock(&arena->lock);
+ }
+#endif
+ memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
+ rem * sizeof(void *));
tbin->ncached = rem;
- if (tbin->ncached < tbin->low_water)
+ if ((int)tbin->ncached < tbin->low_water)
tbin->low_water = tbin->ncached;
}
@@ -201,10 +226,14 @@ tcache_t *
tcache_create(arena_t *arena)
{
tcache_t *tcache;
- size_t size;
+ size_t size, stack_offset;
unsigned i;
size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
+ /* Naturally align the pointer stacks. */
+ size = PTR_CEILING(size);
+ stack_offset = size;
+ size += stack_nelms * sizeof(void *);
/*
* Round up to the nearest multiple of the cacheline size, in order to
* avoid the possibility of false cacheline sharing.
@@ -217,6 +246,8 @@ tcache_create(arena_t *arena)
if (size <= small_maxclass)
tcache = (tcache_t *)arena_malloc_small(arena, size, true);
+ else if (size <= tcache_maxclass)
+ tcache = (tcache_t *)arena_malloc_large(arena, size, true);
else
tcache = (tcache_t *)icalloc(size);
@@ -233,15 +264,12 @@ tcache_create(arena_t *arena)
tcache->arena = arena;
assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
- for (i = 0; i < nbins; i++) {
- if ((arena->bins[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) {
- tcache->tbins[i].ncached_max = (arena->bins[i].nregs <<
- 1);
- } else
- tcache->tbins[i].ncached_max = TCACHE_NSLOTS_SMALL_MAX;
+ for (i = 0; i < nhbins; i++) {
+ tcache->tbins[i].lg_fill_div = 1;
+ tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
+ (uintptr_t)stack_offset);
+ stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
}
- for (; i < nhbins; i++)
- tcache->tbins[i].ncached_max = TCACHE_NSLOTS_LARGE;
TCACHE_SET(tcache);
@@ -252,6 +280,7 @@ void
tcache_destroy(tcache_t *tcache)
{
unsigned i;
+ size_t tcache_size;
#ifdef JEMALLOC_STATS
/* Unlink from list of extant tcaches. */
@@ -308,7 +337,8 @@ tcache_destroy(tcache_t *tcache)
}
#endif
- if (arena_salloc(tcache) <= small_maxclass) {
+ tcache_size = arena_salloc(tcache);
+ if (tcache_size <= small_maxclass) {
arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
arena_t *arena = chunk->arena;
size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
@@ -322,6 +352,13 @@ tcache_destroy(tcache_t *tcache)
malloc_mutex_lock(&bin->lock);
arena_dalloc_bin(arena, chunk, tcache, mapelm);
malloc_mutex_unlock(&bin->lock);
+ } else if (tcache_size <= tcache_maxclass) {
+ arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
+ arena_t *arena = chunk->arena;
+
+ malloc_mutex_lock(&arena->lock);
+ arena_dalloc_large(arena, chunk, tcache);
+ malloc_mutex_unlock(&arena->lock);
} else
idalloc(tcache);
}
@@ -378,11 +415,13 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena)
}
#endif
-void
+bool
tcache_boot(void)
{
if (opt_tcache) {
+ unsigned i;
+
/*
* If necessary, clamp opt_lg_tcache_max, now that
* small_maxclass and arena_maxclass are known.
@@ -397,6 +436,28 @@ tcache_boot(void)
nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT);
+ /* Initialize tcache_bin_info. */
+ tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
+ sizeof(tcache_bin_info_t));
+ if (tcache_bin_info == NULL)
+ return (true);
+ stack_nelms = 0;
+ for (i = 0; i < nbins; i++) {
+ if ((arena_bin_info[i].nregs << 1) <=
+ TCACHE_NSLOTS_SMALL_MAX) {
+ tcache_bin_info[i].ncached_max =
+ (arena_bin_info[i].nregs << 1);
+ } else {
+ tcache_bin_info[i].ncached_max =
+ TCACHE_NSLOTS_SMALL_MAX;
+ }
+ stack_nelms += tcache_bin_info[i].ncached_max;
+ }
+ for (; i < nhbins; i++) {
+ tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
+ stack_nelms += tcache_bin_info[i].ncached_max;
+ }
+
/* Compute incremental GC event threshold. */
if (opt_lg_tcache_gc_sweep >= 0) {
tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
@@ -412,6 +473,8 @@ tcache_boot(void)
abort();
}
}
+
+ return (false);
}
/******************************************************************************/
#endif /* JEMALLOC_TCACHE */